polyrun 1.5.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -0
  3. data/README.md +2 -2
  4. data/docs/SETUP_PROFILE.md +2 -0
  5. data/lib/polyrun/cli/coverage_commands.rb +1 -1
  6. data/lib/polyrun/cli/failure_commands.rb +1 -1
  7. data/lib/polyrun/cli/help.rb +20 -17
  8. data/lib/polyrun/cli/helpers.rb +16 -0
  9. data/lib/polyrun/cli/init_command.rb +8 -1
  10. data/lib/polyrun/cli/partition_diagnostics.rb +22 -0
  11. data/lib/polyrun/cli/plan_command.rb +47 -18
  12. data/lib/polyrun/cli/queue_command.rb +25 -2
  13. data/lib/polyrun/cli/run_queue_command.rb +145 -0
  14. data/lib/polyrun/cli/run_shards_command.rb +6 -1
  15. data/lib/polyrun/cli/run_shards_parallel_children.rb +2 -1
  16. data/lib/polyrun/cli/run_shards_parallel_wait.rb +5 -1
  17. data/lib/polyrun/cli/run_shards_plan_boot_phases.rb +47 -2
  18. data/lib/polyrun/cli/run_shards_plan_options.rb +14 -4
  19. data/lib/polyrun/cli/run_shards_planning.rb +20 -12
  20. data/lib/polyrun/cli/run_shards_run.rb +22 -5
  21. data/lib/polyrun/cli/spec_quality_commands.rb +140 -0
  22. data/lib/polyrun/cli.rb +16 -2
  23. data/lib/polyrun/coverage/example_diff.rb +122 -0
  24. data/lib/polyrun/coverage/merge/formatters_html.rb +5 -5
  25. data/lib/polyrun/data/factory_counts.rb +14 -1
  26. data/lib/polyrun/database/clone_shards.rb +2 -0
  27. data/lib/polyrun/database/shard.rb +2 -1
  28. data/lib/polyrun/minitest.rb +9 -0
  29. data/lib/polyrun/partition/hrw.rb +40 -3
  30. data/lib/polyrun/partition/paths_build.rb +8 -3
  31. data/lib/polyrun/partition/plan.rb +88 -19
  32. data/lib/polyrun/partition/plan_lpt.rb +49 -7
  33. data/lib/polyrun/partition/plan_sharding.rb +8 -0
  34. data/lib/polyrun/partition/reports.rb +139 -0
  35. data/lib/polyrun/partition/timing_diagnostics.rb +139 -0
  36. data/lib/polyrun/partition/timing_keys.rb +2 -1
  37. data/lib/polyrun/queue/duration.rb +30 -0
  38. data/lib/polyrun/queue/file_store.rb +114 -3
  39. data/lib/polyrun/quick/example_runner.rb +2 -0
  40. data/lib/polyrun/quick/runner.rb +21 -0
  41. data/lib/polyrun/rspec.rb +10 -0
  42. data/lib/polyrun/spec_quality/config.rb +134 -0
  43. data/lib/polyrun/spec_quality/fragment.rb +39 -0
  44. data/lib/polyrun/spec_quality/merge.rb +78 -0
  45. data/lib/polyrun/spec_quality/minitest_hook.rb +42 -0
  46. data/lib/polyrun/spec_quality/plan_loader.rb +47 -0
  47. data/lib/polyrun/spec_quality/profile.rb +91 -0
  48. data/lib/polyrun/spec_quality/report.rb +261 -0
  49. data/lib/polyrun/spec_quality/rspec_hook.rb +55 -0
  50. data/lib/polyrun/spec_quality/sql_counter.rb +34 -0
  51. data/lib/polyrun/spec_quality.rb +205 -0
  52. data/lib/polyrun/templates/POLYRUN.md +6 -0
  53. data/lib/polyrun/templates/ci_matrix.polyrun.yml +4 -0
  54. data/lib/polyrun/templates/polyrun_hooks_spec_quality.rb +12 -0
  55. data/lib/polyrun/templates/polyrun_spec_quality.yml +20 -0
  56. data/lib/polyrun/templates/rails_prepare.polyrun.yml +5 -0
  57. data/lib/polyrun/timing/merge.rb +5 -5
  58. data/lib/polyrun/timing/rspec_example_formatter.rb +14 -7
  59. data/lib/polyrun/timing/stats.rb +76 -0
  60. data/lib/polyrun/timing/summary.rb +5 -2
  61. data/lib/polyrun/timing/variance_report.rb +51 -0
  62. data/lib/polyrun/version.rb +1 -1
  63. metadata +22 -1
@@ -23,14 +23,14 @@ module Polyrun
23
23
  )
24
24
  file_list_html = render_html_partial("file_list", file_rows_html: files.map { |file| html_file_list_row(file) }.join("\n"))
25
25
  file_sections_html = files.map { |file| render_html_partial("file_section", file: file) }.join("\n")
26
- ERB.new(File.read(html_template_path), trim_mode: "-").result_with_hash(
26
+ ERB.new(File.read(html_template_path, encoding: Encoding::UTF_8), trim_mode: "-").result_with_hash(
27
27
  title: CGI.escapeHTML(title.to_s),
28
28
  generated_label: html_generated_label(generated_at),
29
29
  overview_html: overview_html,
30
30
  file_list_html: file_list_html,
31
31
  file_sections_html: file_sections_html,
32
- stylesheet: File.read(html_stylesheet_path),
33
- javascript: File.read(html_javascript_path)
32
+ stylesheet: File.read(html_stylesheet_path, encoding: Encoding::UTF_8),
33
+ javascript: File.read(html_javascript_path, encoding: Encoding::UTF_8)
34
34
  )
35
35
  end
36
36
  # rubocop:enable Metrics/AbcSize
@@ -56,7 +56,7 @@ module Polyrun
56
56
  end
57
57
 
58
58
  def render_html_partial(name, locals = {})
59
- ERB.new(File.read(html_partial_path(name)), trim_mode: "-").result_with_hash(locals)
59
+ ERB.new(File.read(html_partial_path(name), encoding: Encoding::UTF_8), trim_mode: "-").result_with_hash(locals)
60
60
  end
61
61
 
62
62
  def html_file_payload(path, file, root)
@@ -152,7 +152,7 @@ module Polyrun
152
152
  def html_source_lines(path, fallback_length)
153
153
  return Array.new(fallback_length, "") unless File.file?(path.to_s)
154
154
 
155
- File.readlines(path.to_s, chomp: true)
155
+ File.readlines(path.to_s, chomp: true, encoding: Encoding::UTF_8)
156
156
  rescue Errno::ENOENT, Errno::EACCES, ArgumentError
157
157
  Array.new(fallback_length, "")
158
158
  end
@@ -6,11 +6,19 @@ module Polyrun
6
6
  class << self
7
7
  def reset!
8
8
  @counts = Hash.new(0)
9
+ @example_counts = Hash.new(0)
10
+ end
11
+
12
+ def reset_example!
13
+ @example_counts = Hash.new(0)
9
14
  end
10
15
 
11
16
  def record(factory_name)
12
17
  @counts ||= Hash.new(0)
13
- @counts[factory_name.to_s] += 1
18
+ @example_counts ||= Hash.new(0)
19
+ name = factory_name.to_s
20
+ @counts[name] += 1
21
+ @example_counts[name] += 1
14
22
  end
15
23
 
16
24
  def counts
@@ -18,6 +26,11 @@ module Polyrun
18
26
  @counts.dup
19
27
  end
20
28
 
29
+ def example_counts
30
+ @example_counts ||= Hash.new(0)
31
+ @example_counts.dup
32
+ end
33
+
21
34
  def summary_lines(top: 20)
22
35
  @counts ||= Hash.new(0)
23
36
  sorted = @counts.sort_by { |_, n| -n }
@@ -57,6 +57,8 @@ module Polyrun
57
57
  end
58
58
 
59
59
  plan.each { |row| create_one_shard!(row, replace, force_drop, dry_run) }
60
+ rescue => e
61
+ raise Polyrun::Error, "CloneShards shard_index=#{shard_index}: #{e.message}"
60
62
  end
61
63
  end
62
64
  threads.each(&:join)
@@ -44,11 +44,12 @@ module Polyrun
44
44
 
45
45
  return u unless u.match?(%r{\A[a-z][a-z0-9+.-]*://}i)
46
46
 
47
- if (m = u.match(%r{/([^/?]+)(\?|$)}))
47
+ if (m = u.match(%r{\A[a-z][a-z0-9+.-]*://[^/?#]+/([^/?]+)(\?|$)}i))
48
48
  base = m[1]
49
49
  suffixed = "#{base}_#{Integer(shard_index)}"
50
50
  u.sub(%r{/#{Regexp.escape(base)}(\?|$)}, "/#{suffixed}\\1")
51
51
  else
52
+ Polyrun::Log.warn "polyrun database: URL has no database segment; shard suffix skipped: #{u}"
52
53
  u
53
54
  end
54
55
  end
@@ -47,5 +47,14 @@ module Polyrun
47
47
  ::Minitest::Test.send(:prepend, WorkerPingTestHook)
48
48
  Polyrun::WorkerPing.ensure_interval_ping_thread!
49
49
  end
50
+
51
+ # Per-test spec quality when +POLYRUN_SPEC_QUALITY=1+ (requires stdlib +Coverage+ for line deltas).
52
+ def install_spec_quality!(only_if: nil, root: nil, output_path: nil)
53
+ pred = only_if || -> { Polyrun::SpecQuality.enabled? }
54
+ return unless pred.call
55
+
56
+ require_relative "spec_quality/minitest_hook"
57
+ Polyrun::SpecQuality::MinitestHook.install!(only_if: pred, root: root, output_path: output_path)
58
+ end
50
59
  end
51
60
  end
@@ -8,15 +8,42 @@ module Polyrun
8
8
 
9
9
  # @return [Integer] shard index in 0...m
10
10
  def shard_for(path:, total_shards:, seed: "")
11
+ pick_shard(path: path, total_shards: total_shards, seed: seed) { |p, j, salt| score(p, j, salt) }
12
+ end
13
+
14
+ # Per-shard weights (heterogeneous nodes). Uniform weights match +shard_for+.
15
+ def weighted_shard_for(path:, total_shards:, seed: "", shard_weights: nil)
16
+ weights = normalize_shard_weights(shard_weights, total_shards)
17
+ pick_shard(path: path, total_shards: total_shards, seed: seed) do |p, j, salt|
18
+ base = score(p, j, salt).to_f
19
+ w = weights[j]
20
+ w.positive? ? base / w : base
21
+ end
22
+ end
23
+
24
+ def normalize_shard_weights(shard_weights, total_shards)
25
+ m = Integer(total_shards)
26
+ return Array.new(m, 1.0) if shard_weights.nil? || shard_weights.empty?
27
+
28
+ weights = shard_weights.map { |w| w.to_f }
29
+ if weights.size < m
30
+ weights += Array.new(m - weights.size, 1.0)
31
+ elsif weights.size > m
32
+ weights = weights[0, m]
33
+ end
34
+ weights
35
+ end
36
+
37
+ def pick_shard(path:, total_shards:, seed:)
11
38
  m = Integer(total_shards)
12
39
  raise Polyrun::Error, "total_shards must be >= 1" if m < 1
13
40
 
14
41
  best_j = 0
15
- best = -1
42
+ best = -1.0
16
43
  salt = seed.to_s
17
44
  p = path.to_s
18
45
  m.times do |j|
19
- h = score(p, j, salt)
46
+ h = yield(p, j, salt)
20
47
  if h > best
21
48
  best = h
22
49
  best_j = j
@@ -26,8 +53,18 @@ module Polyrun
26
53
  end
27
54
 
28
55
  def score(path, shard_index, salt)
29
- Digest::SHA256.digest("#{salt}\n#{path}\n#{shard_index}").unpack1("H*").hex
56
+ digest = Digest::SHA256.digest("#{salt}\n#{path}\n#{shard_index}")
57
+ if fast_score?
58
+ digest.unpack1("Q>")
59
+ else
60
+ digest.unpack1("H*").hex
61
+ end
62
+ end
63
+
64
+ def fast_score?
65
+ %w[1 true yes].include?(ENV["POLYRUN_HRW_FAST_SCORE"]&.to_s&.downcase)
30
66
  end
67
+ private_class_method :fast_score?
31
68
  end
32
69
  end
33
70
  end
@@ -53,7 +53,7 @@ module Polyrun
53
53
  st = stringify_keys(raw)
54
54
  taken =
55
55
  if st["glob"]
56
- take_glob_paths(st, remaining, cwd)
56
+ take_glob_paths(st, remaining)
57
57
  elsif st["regex"]
58
58
  take_regex_paths(st, remaining)
59
59
  else
@@ -66,8 +66,9 @@ module Polyrun
66
66
  out
67
67
  end
68
68
 
69
- def take_glob_paths(st, remaining, cwd)
70
- taken = glob_under_cwd(st["glob"].to_s, cwd).select { |p| remaining.include?(p) }
69
+ def take_glob_paths(st, remaining)
70
+ pattern = st["glob"].to_s
71
+ taken = remaining.to_a.select { |p| path_matches_glob?(p, pattern) }
71
72
  if st["sort_by_substring_order"]
72
73
  subs = Array(st["sort_by_substring_order"]).map(&:to_s)
73
74
  def_prio = int_or(st["default_priority"], int_or(st["default_sort_key"], 99))
@@ -92,6 +93,10 @@ module Polyrun
92
93
  Dir.glob(File.join(root, pattern)).map { |p| normalize_rel(p, cwd) }
93
94
  end
94
95
 
96
+ def path_matches_glob?(rel_path, pattern)
97
+ File.fnmatch?(pattern, rel_path, File::FNM_PATHNAME | File::FNM_EXTGLOB)
98
+ end
99
+
95
100
  def normalize_rel(path, cwd)
96
101
  abs = File.expand_path(path, cwd)
97
102
  Pathname.new(abs).relative_path_from(Pathname.new(File.expand_path(cwd))).to_s.tr("\\", "/")
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Polyrun/FileLength, Metrics/ClassLength -- partition strategies + constraints
1
2
  require_relative "timing_keys"
2
3
  require_relative "constraints"
3
4
  require_relative "hrw"
@@ -15,15 +16,22 @@ module Polyrun
15
16
  # Default +timing_granularity+ is +file+ (one weight per spec file). Experimental +:example+
16
17
  # uses +path:line+ locators and per-example weights in the timing JSON.
17
18
  # - +hrw+ (+rendezvous+) — rendezvous hashing for minimal remapping when m changes; optional constraints.
19
+ # - +weighted_hrw+ — rendezvous with per-shard weights (+shard_weights+); use +stable_cost_binpack+ for path costs.
20
+ # - +lazy_robin+ — sorted round-robin assignment with timing loaded for diagnostics and +shard_seconds+.
21
+ # - +preserve_order_round_robin+ — round-robin in paths-file order (no sort); membership from +paths_build+ only.
18
22
  class Plan
19
- COST_STRATEGIES = %w[cost cost_binpack binpack timing].freeze
20
- HRW_STRATEGIES = %w[hrw rendezvous].freeze
23
+ COST_STRATEGIES = %w[cost cost_binpack binpack timing stable_cost_binpack].freeze
24
+ HRW_STRATEGIES = %w[hrw rendezvous weighted_hrw].freeze
25
+ LAZY_ROBIN_STRATEGIES = %w[lazy_robin].freeze
26
+ MOD_STRATEGIES = %w[round_robin random_round_robin lazy_robin preserve_order_round_robin].freeze
21
27
 
22
- attr_reader :items, :total_shards, :strategy, :seed, :constraints, :timing_granularity
28
+ attr_reader :items, :total_shards, :strategy, :seed, :constraints, :timing_granularity, :root
23
29
 
24
- def initialize(items:, total_shards:, strategy: "round_robin", seed: nil, costs: nil, constraints: nil, root: nil, timing_granularity: :file)
30
+ def initialize(items:, total_shards:, strategy: "round_robin", seed: nil, costs: nil, constraints: nil, root: nil, timing_granularity: :file, stable_assignment: nil, stable_imbalance_threshold: 1.30, shard_weights: nil)
25
31
  @timing_granularity = TimingKeys.normalize_granularity(timing_granularity)
26
32
  @root = root ? File.expand_path(root) : Dir.pwd
33
+ @stable_assignment = normalize_stable_assignment(stable_assignment)
34
+ @stable_imbalance_threshold = stable_imbalance_threshold.to_f
27
35
  @items = items.map do |x|
28
36
  if @timing_granularity == :example
29
37
  TimingKeys.normalize_locator(x, @root, :example)
@@ -38,23 +46,30 @@ module Polyrun
38
46
  @seed = seed
39
47
  @constraints = normalize_constraints(constraints)
40
48
  @costs = normalize_costs(costs)
49
+ @shard_weights = shard_weights
41
50
 
42
51
  validate_constraints_strategy_combo!
43
52
  if cost_strategy? && (@costs.nil? || @costs.empty?)
44
53
  raise Polyrun::Error,
45
54
  "strategy #{@strategy} requires a timing map (path => seconds or path:line => seconds), e.g. merged polyrun_timing.json"
46
55
  end
56
+ if lazy_robin_strategy? && (@costs.nil? || @costs.empty?)
57
+ raise Polyrun::Error,
58
+ "strategy lazy_robin requires a timing map (path => seconds), e.g. merged polyrun_timing.json"
59
+ end
47
60
  end
48
61
 
49
62
  def ordered_items
50
63
  @ordered_items ||= case strategy
51
- when "round_robin"
64
+ when "round_robin", "lazy_robin"
52
65
  items.sort
66
+ when "preserve_order_round_robin"
67
+ items.dup
53
68
  when "random_round_robin"
54
69
  StableShuffle.call(items.sort, random_seed)
55
70
  when "cost", "cost_binpack", "binpack", "timing"
56
71
  items.sort
57
- when "hrw", "rendezvous"
72
+ when "hrw", "rendezvous", "weighted_hrw"
58
73
  items.sort
59
74
  else
60
75
  raise Polyrun::Error, "unknown partition strategy: #{strategy}"
@@ -79,11 +94,40 @@ module Polyrun
79
94
  cost_shards.map { |paths| paths.sum { |p| weight_for(p) } }
80
95
  elsif hrw_strategy?
81
96
  hrw_shards.map { |paths| paths.sum { |p| weight_for_optional(p) } }
97
+ elsif lazy_robin_strategy? && @costs&.any?
98
+ mod_shards.map { |paths| paths.sum { |p| weight_for(p) } }
82
99
  else
83
100
  []
84
101
  end
85
102
  end
86
103
 
104
+ def file_weight(path)
105
+ (lazy_robin_strategy? || cost_strategy?) ? weight_for(path) : weight_for_optional(path)
106
+ end
107
+
108
+ def shard_file_weights(shard_index)
109
+ shard(shard_index).map { |p| [p, file_weight(p)] }.sort_by { |(_, w)| [-w, p] }
110
+ end
111
+
112
+ def default_weight
113
+ vals = @costs&.values || []
114
+ if vals.empty?
115
+ 1.0
116
+ else
117
+ vals.sum / vals.size
118
+ end
119
+ end
120
+
121
+ def stable_strategy?
122
+ strategy == "stable_cost_binpack"
123
+ end
124
+
125
+ attr_reader :stable_imbalance_threshold
126
+
127
+ def stable_assignment_map
128
+ @stable_assignment
129
+ end
130
+
87
131
  def manifest(shard_index)
88
132
  m = {
89
133
  "shard_index" => Integer(shard_index),
@@ -94,7 +138,7 @@ module Polyrun
94
138
  }
95
139
  m["timing_granularity"] = timing_granularity.to_s if timing_granularity == :example
96
140
  secs = shard_weight_totals
97
- m["shard_seconds"] = secs if cost_strategy? || (hrw_strategy? && secs.any? { |x| x > 0 })
141
+ m["shard_seconds"] = secs if emit_shard_seconds?(secs)
98
142
  m
99
143
  end
100
144
 
@@ -110,6 +154,14 @@ module Polyrun
110
154
  HRW_STRATEGIES.include?(name.to_s)
111
155
  end
112
156
 
157
+ def self.lazy_robin_strategy?(name)
158
+ LAZY_ROBIN_STRATEGIES.include?(name.to_s)
159
+ end
160
+
161
+ def self.timing_load_strategy?(name)
162
+ cost_strategy?(name) || hrw_strategy?(name) || lazy_robin_strategy?(name)
163
+ end
164
+
113
165
  private
114
166
 
115
167
  def cost_strategy?
@@ -120,12 +172,38 @@ module Polyrun
120
172
  self.class.hrw_strategy?(strategy)
121
173
  end
122
174
 
175
+ def lazy_robin_strategy?
176
+ self.class.lazy_robin_strategy?(strategy)
177
+ end
178
+
179
+ def emit_shard_seconds?(secs)
180
+ return false if secs.empty?
181
+
182
+ cost_strategy? || lazy_robin_strategy? || (hrw_strategy? && secs.any? { |x| x > 0 })
183
+ end
184
+
123
185
  def normalize_constraints(c)
124
186
  return nil if c.nil?
125
187
 
126
188
  c.is_a?(Constraints) ? c : Constraints.from_hash(c, root: @root)
127
189
  end
128
190
 
191
+ def normalize_stable_assignment(map)
192
+ return nil if map.nil? || map.empty?
193
+
194
+ out = {}
195
+ map.each do |k, v|
196
+ key =
197
+ if @timing_granularity == :example
198
+ TimingKeys.normalize_locator(k.to_s, @root, :example)
199
+ else
200
+ File.expand_path(k.to_s, @root)
201
+ end
202
+ out[key] = Integer(v)
203
+ end
204
+ out
205
+ end
206
+
129
207
  def normalize_costs(costs)
130
208
  return nil if costs.nil?
131
209
 
@@ -150,18 +228,6 @@ module Polyrun
150
228
  "partition constraints require strategy cost_binpack (with --timing) or hrw/rendezvous"
151
229
  end
152
230
 
153
- def default_weight
154
- return @default_weight if defined?(@default_weight)
155
-
156
- vals = @costs&.values || []
157
- @default_weight =
158
- if vals.empty?
159
- 1.0
160
- else
161
- vals.sum / vals.size
162
- end
163
- end
164
-
165
231
  def weight_for(path)
166
232
  key = cost_lookup_key(path.to_s)
167
233
  return @costs[key] if @costs&.key?(key)
@@ -197,3 +263,6 @@ end
197
263
 
198
264
  require_relative "plan_sharding"
199
265
  require_relative "plan_lpt"
266
+ require_relative "timing_diagnostics"
267
+ require_relative "reports"
268
+ # rubocop:enable Polyrun/FileLength, Metrics/ClassLength
@@ -9,20 +9,63 @@ module Polyrun
9
9
  end
10
10
 
11
11
  def build
12
+ if @plan.stable_strategy? && @plan.stable_assignment_map&.any?
13
+ stable = build_from_stable_map
14
+ return stable if imbalance_ratio(stable) <= @plan.stable_imbalance_threshold
15
+ end
16
+
12
17
  buckets = Array.new(@plan.total_shards) { [] }
13
18
  totals = Array.new(@plan.total_shards, 0.0)
14
- lpt_fill_forced!(buckets, totals)
15
- lpt_balance_free!(buckets, totals)
19
+ forced_pairs, free = partition_forced_and_free
20
+ lpt_apply_forced!(buckets, totals, forced_pairs)
21
+ lpt_balance_free!(buckets, totals, free)
22
+ buckets
23
+ end
24
+
25
+ def build_from_stable_map
26
+ buckets = Array.new(@plan.total_shards) { [] }
27
+ map = @plan.stable_assignment_map
28
+ @plan.items.each do |item|
29
+ key = @plan.send(:cost_lookup_key, item)
30
+ j = map[key]
31
+ j = Integer(j) if j
32
+ j = fallback_shard_for(item) unless j && j >= 0 && j < @plan.total_shards
33
+ buckets[j] << item
34
+ end
16
35
  buckets
17
36
  end
18
37
 
38
+ def fallback_shard_for(item)
39
+ Hrw.shard_for(path: item, total_shards: @plan.total_shards, seed: @plan.send(:hrw_salt))
40
+ end
41
+
42
+ def imbalance_ratio(buckets)
43
+ totals = buckets.map { |paths| paths.sum { |p| @plan.send(:weight_for, p) } }
44
+ return 1.0 if totals.empty?
45
+
46
+ avg = totals.sum / totals.size.to_f
47
+ return 1.0 unless avg.positive?
48
+
49
+ totals.max / avg
50
+ end
51
+
19
52
  private
20
53
 
21
- def lpt_fill_forced!(buckets, totals)
54
+ def partition_forced_and_free
55
+ forced_pairs = []
56
+ free = []
22
57
  @plan.items.each do |item|
23
- next unless @plan.constraints && (j = @plan.constraints.forced_shard_for(item))
58
+ if @plan.constraints && (j = @plan.constraints.forced_shard_for(item))
59
+ forced_pairs << [item, Integer(j)]
60
+ else
61
+ free << item
62
+ end
63
+ end
64
+ [forced_pairs, free]
65
+ end
24
66
 
25
- j = Integer(j)
67
+ def lpt_apply_forced!(buckets, totals, forced_pairs)
68
+ forced_pairs.each do |item, j|
26
69
  raise Polyrun::Error, "constraint shard #{j} out of range" if j < 0 || j >= @plan.total_shards
27
70
 
28
71
  buckets[j] << item
@@ -30,8 +73,7 @@ module Polyrun
30
73
  end
31
74
  end
32
75
 
33
- def lpt_balance_free!(buckets, totals)
34
- free = @plan.items.reject { |item| @plan.constraints&.forced_shard_for(item) }
76
+ def lpt_balance_free!(buckets, totals, free)
35
77
  pairs = free.map { |p| [p, @plan.send(:weight_for, p)] }
36
78
  pairs.sort_by! { |(p, w)| [-w, p] }
37
79
 
@@ -7,10 +7,18 @@ module Polyrun
7
7
  @hrw_shards ||= begin
8
8
  buckets = Array.new(total_shards) { [] }
9
9
  salt = hrw_salt
10
+ weighted = strategy == "weighted_hrw"
10
11
  items.each do |path|
11
12
  j =
12
13
  if @constraints && (fj = @constraints.forced_shard_for(path))
13
14
  Integer(fj)
15
+ elsif weighted
16
+ Hrw.weighted_shard_for(
17
+ path: path,
18
+ total_shards: total_shards,
19
+ seed: salt,
20
+ shard_weights: @shard_weights
21
+ )
14
22
  else
15
23
  Hrw.shard_for(path: path, total_shards: total_shards, seed: salt)
16
24
  end
@@ -0,0 +1,139 @@
1
+ module Polyrun
2
+ module Partition
3
+ # Imbalance and dominant-file reports from {Plan} shard weights.
4
+ module Reports
5
+ IMBALANCE_WARN = 1.20
6
+ IMBALANCE_ATTENTION = 1.50
7
+ DOMINANT_SHARD_FRACTION = 0.40
8
+
9
+ module_function
10
+
11
+ def emit_all!(plan)
12
+ totals = plan.shard_weight_totals
13
+ return if totals.empty? || totals.all?(&:zero?)
14
+
15
+ emit_imbalance!(plan, totals)
16
+ emit_dominant_files!(plan, totals)
17
+ end
18
+
19
+ def imbalance_metrics(totals)
20
+ return nil if totals.empty?
21
+
22
+ max = totals.max
23
+ min = totals.min
24
+ avg = totals.sum / totals.size.to_f
25
+ ratio = avg.positive? ? max / avg : 1.0
26
+ slowest = totals.each_with_index.max_by { |v, _| v }&.last
27
+ {
28
+ max_shard_seconds: max,
29
+ min_shard_seconds: min,
30
+ avg_shard_seconds: avg,
31
+ imbalance_ratio: ratio,
32
+ slowest_shard: slowest
33
+ }
34
+ end
35
+
36
+ # rubocop:disable Metrics/AbcSize -- imbalance summary lines
37
+ def emit_imbalance!(plan, totals = nil)
38
+ totals ||= plan.shard_weight_totals
39
+ m = imbalance_metrics(totals)
40
+ return unless m
41
+
42
+ lines = []
43
+ lines << "polyrun partition imbalance:"
44
+ lines << format(
45
+ " max=%.2fs min=%.2fs avg=%.2fs imbalance_ratio=%.2f slowest_shard=%d",
46
+ m[:max_shard_seconds],
47
+ m[:min_shard_seconds],
48
+ m[:avg_shard_seconds],
49
+ m[:imbalance_ratio],
50
+ m[:slowest_shard]
51
+ )
52
+
53
+ slow_idx = m[:slowest_shard]
54
+ slow_paths = plan.shard(slow_idx)
55
+ slow_total = totals[slow_idx]
56
+ if slow_total.positive? && slow_paths.any?
57
+ top = plan.shard_file_weights(slow_idx).first
58
+ if top
59
+ _path, w = top
60
+ pct = (w / slow_total) * 100.0
61
+ lines << format(" largest_file_percent_of_shard=%.1f%%", pct)
62
+ if pct > DOMINANT_SHARD_FRACTION * 100.0
63
+ lines << " hint: single file dominates slowest shard; try --timing-granularity example or split the file"
64
+ end
65
+ end
66
+ end
67
+
68
+ plan.total_shards.times do |i|
69
+ top5 = plan.shard_file_weights(i).first(5)
70
+ next if top5.empty?
71
+
72
+ lines << " shard #{i} top files:"
73
+ top5.each do |path, w|
74
+ lines << format(" %.2fs %s", w, path)
75
+ end
76
+ end
77
+
78
+ if m[:imbalance_ratio] > IMBALANCE_ATTENTION
79
+ lines << " Attention required: slowest shard is #{format("%.2f", m[:imbalance_ratio])}x average"
80
+ elsif m[:imbalance_ratio] > IMBALANCE_WARN
81
+ lines << " Warning: imbalance_ratio > #{IMBALANCE_WARN}"
82
+ end
83
+
84
+ lines.each { |ln| Polyrun::Log.warn ln }
85
+ end
86
+ # rubocop:enable Metrics/AbcSize
87
+
88
+ def dominant_candidates(plan, totals = nil)
89
+ totals ||= plan.shard_weight_totals
90
+ return [] if totals.empty?
91
+
92
+ suite_total = totals.sum
93
+ return [] if suite_total <= 0
94
+
95
+ target = suite_total / plan.total_shards.to_f
96
+ slow_idx = totals.each_with_index.max_by { |v, _| v }&.last
97
+ slow_total = slow_idx ? totals[slow_idx] : 0.0
98
+
99
+ weights = file_weights_aggregated(plan)
100
+ weights.filter_map do |path, w|
101
+ next if w <= target
102
+
103
+ mult = w / target
104
+ reasons = []
105
+ reasons << "#{format("%.1f", mult)}x target shard time" if mult > 1.0
106
+ reasons << "split candidate" if slow_total.positive? && w > DOMINANT_SHARD_FRACTION * slow_total
107
+ {path: path, seconds: w, target: target, multiple: mult, reasons: reasons}
108
+ end.sort_by { |h| -h[:seconds] }
109
+ end
110
+
111
+ def emit_dominant_files!(plan, totals = nil)
112
+ candidates = dominant_candidates(plan, totals)
113
+ return if candidates.empty?
114
+
115
+ Polyrun::Log.warn "Attention:"
116
+ candidates.first(10).each do |c|
117
+ Polyrun::Log.warn format(" %s: %.1fs", c[:path], c[:seconds])
118
+ Polyrun::Log.warn format(" This single file is %.1fx the target shard time.", c[:multiple])
119
+ Polyrun::Log.warn " Try --timing-granularity example or split this file."
120
+ end
121
+ end
122
+
123
+ def file_weights_aggregated(plan)
124
+ by_file = Hash.new(0.0)
125
+ plan.items.each do |p|
126
+ w = plan.file_weight(p)
127
+ key =
128
+ if plan.timing_granularity == :example
129
+ TimingDiagnostics.file_from_locator(p.to_s)
130
+ else
131
+ p.to_s
132
+ end
133
+ by_file[key] += w
134
+ end
135
+ by_file
136
+ end
137
+ end
138
+ end
139
+ end