polyrun 1.4.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/README.md +2 -2
  4. data/docs/SETUP_PROFILE.md +2 -0
  5. data/lib/polyrun/cli/ci_shard_hooks.rb +12 -4
  6. data/lib/polyrun/cli/ci_shard_run_command.rb +3 -1
  7. data/lib/polyrun/cli/help.rb +10 -2
  8. data/lib/polyrun/cli/helpers.rb +38 -0
  9. data/lib/polyrun/cli/init_command.rb +8 -1
  10. data/lib/polyrun/cli/partition_diagnostics.rb +22 -0
  11. data/lib/polyrun/cli/plan_command.rb +47 -18
  12. data/lib/polyrun/cli/queue_command.rb +25 -2
  13. data/lib/polyrun/cli/run_queue_command.rb +145 -0
  14. data/lib/polyrun/cli/run_shards_command.rb +6 -1
  15. data/lib/polyrun/cli/run_shards_parallel_children.rb +28 -35
  16. data/lib/polyrun/cli/run_shards_parallel_wait.rb +267 -0
  17. data/lib/polyrun/cli/run_shards_plan_boot_phases.rb +81 -3
  18. data/lib/polyrun/cli/run_shards_plan_options.rb +17 -3
  19. data/lib/polyrun/cli/run_shards_planning.rb +20 -12
  20. data/lib/polyrun/cli/run_shards_run.rb +28 -37
  21. data/lib/polyrun/cli/run_shards_worker_interrupt.rb +75 -0
  22. data/lib/polyrun/cli/spec_quality_commands.rb +140 -0
  23. data/lib/polyrun/cli.rb +16 -2
  24. data/lib/polyrun/coverage/example_diff.rb +122 -0
  25. data/lib/polyrun/coverage/merge/formatters_html.rb +4 -0
  26. data/lib/polyrun/data/factory_counts.rb +14 -1
  27. data/lib/polyrun/database/clone_shards.rb +2 -0
  28. data/lib/polyrun/database/shard.rb +2 -1
  29. data/lib/polyrun/hooks.rb +9 -1
  30. data/lib/polyrun/log.rb +16 -0
  31. data/lib/polyrun/minitest.rb +43 -0
  32. data/lib/polyrun/partition/hrw.rb +40 -3
  33. data/lib/polyrun/partition/paths_build.rb +8 -3
  34. data/lib/polyrun/partition/plan.rb +88 -19
  35. data/lib/polyrun/partition/plan_lpt.rb +49 -7
  36. data/lib/polyrun/partition/plan_sharding.rb +8 -0
  37. data/lib/polyrun/partition/reports.rb +139 -0
  38. data/lib/polyrun/partition/timing_diagnostics.rb +139 -0
  39. data/lib/polyrun/partition/timing_keys.rb +2 -1
  40. data/lib/polyrun/queue/duration.rb +30 -0
  41. data/lib/polyrun/queue/file_store.rb +107 -3
  42. data/lib/polyrun/quick/example_runner.rb +13 -0
  43. data/lib/polyrun/quick/runner.rb +21 -0
  44. data/lib/polyrun/rspec.rb +26 -0
  45. data/lib/polyrun/spec_quality/config.rb +134 -0
  46. data/lib/polyrun/spec_quality/fragment.rb +39 -0
  47. data/lib/polyrun/spec_quality/merge.rb +78 -0
  48. data/lib/polyrun/spec_quality/minitest_hook.rb +42 -0
  49. data/lib/polyrun/spec_quality/plan_loader.rb +47 -0
  50. data/lib/polyrun/spec_quality/profile.rb +91 -0
  51. data/lib/polyrun/spec_quality/report.rb +261 -0
  52. data/lib/polyrun/spec_quality/rspec_hook.rb +55 -0
  53. data/lib/polyrun/spec_quality/sql_counter.rb +34 -0
  54. data/lib/polyrun/spec_quality.rb +205 -0
  55. data/lib/polyrun/templates/POLYRUN.md +6 -0
  56. data/lib/polyrun/templates/ci_matrix.polyrun.yml +4 -0
  57. data/lib/polyrun/templates/polyrun_hooks_spec_quality.rb +12 -0
  58. data/lib/polyrun/templates/polyrun_spec_quality.yml +20 -0
  59. data/lib/polyrun/templates/rails_prepare.polyrun.yml +5 -0
  60. data/lib/polyrun/timing/merge.rb +5 -5
  61. data/lib/polyrun/timing/stats.rb +76 -0
  62. data/lib/polyrun/timing/summary.rb +5 -2
  63. data/lib/polyrun/timing/variance_report.rb +51 -0
  64. data/lib/polyrun/version.rb +1 -1
  65. data/lib/polyrun/worker_ping.rb +74 -0
  66. data/sig/polyrun/minitest.rbs +2 -0
  67. data/sig/polyrun/rspec.rbs +4 -0
  68. data/sig/polyrun/worker_ping.rbs +10 -0
  69. metadata +26 -1
@@ -0,0 +1,139 @@
1
+ require "set"
2
+
3
+ module Polyrun
4
+ module Partition
5
+ # Stale / missing timing coverage before cost-based partition.
6
+ module TimingDiagnostics
7
+ SUSPICIOUS_BASENAME = /system|feature|integration|playwright|capybara/i
8
+
9
+ module_function
10
+
11
+ # @return [Hash] analysis result with :missing_files, :stale_entries, :coverage, etc.
12
+ # rubocop:disable Metrics/AbcSize -- timing coverage scan
13
+ def analyze(items:, costs:, timing_path:, root:, granularity: :file)
14
+ root = File.expand_path(root || Dir.pwd)
15
+ g = TimingKeys.normalize_granularity(granularity)
16
+ item_keys = items.map { |p| lookup_key(p, root, g) }
17
+ cost_keys = costs&.keys || []
18
+
19
+ if g == :example
20
+ item_keys_set = item_keys.to_set
21
+ cost_keys_set = cost_keys.to_set
22
+ cost_file_keys_set = cost_keys.map { |k| file_from_locator(k) }.uniq.to_set
23
+ known = item_keys.count do |ik|
24
+ cost_keys_set.include?(ik) || cost_file_keys_set.include?(file_from_locator(ik))
25
+ end
26
+ missing = item_keys.reject do |ik|
27
+ cost_keys_set.include?(ik) || cost_file_keys_set.include?(file_from_locator(ik))
28
+ end
29
+ stale = cost_keys.reject { |k| item_keys_set.include?(k) }
30
+ else
31
+ known = item_keys.count { |k| costs&.key?(k) }
32
+ missing = item_keys.reject { |k| costs&.key?(k) }
33
+ stale = cost_keys.reject { |k| item_keys.include?(k) }
34
+ end
35
+ total = item_keys.size
36
+
37
+ coverage = total.zero? ? 1.0 : known.to_f / total
38
+ default_weight = default_weight_for(costs)
39
+ suspicious = missing.select { |k| suspicious_path?(k) }
40
+
41
+ {
42
+ missing_files: missing,
43
+ stale_entries: stale,
44
+ coverage: coverage,
45
+ known_files: known,
46
+ total_files: total,
47
+ timing_file_age: timing_file_age(timing_path),
48
+ default_weight: default_weight,
49
+ suspicious_missing: suspicious
50
+ }
51
+ end
52
+ # rubocop:enable Metrics/AbcSize
53
+
54
+ # rubocop:disable Metrics/AbcSize -- stale/missing timing warnings
55
+ def emit_warnings!(analysis)
56
+ cov = analysis[:coverage]
57
+ if cov < 0.50
58
+ Polyrun::Log.warn "polyrun: timing coverage #{format_percent(cov)} — binpack quality low; run full timing capture first"
59
+ elsif cov < 0.80
60
+ Polyrun::Log.warn "polyrun: timing coverage #{format_percent(cov)} (< 80%)"
61
+ end
62
+
63
+ dw = analysis[:default_weight]
64
+ Polyrun::Log.warn "polyrun: default weight for missing files: #{format("%.4f", dw)}s (mean of known costs)"
65
+
66
+ if analysis[:timing_file_age]
67
+ Polyrun::Log.warn "polyrun: timing file age: #{analysis[:timing_file_age]}"
68
+ end
69
+
70
+ missing = analysis[:missing_files]
71
+ unless missing.empty?
72
+ Polyrun::Log.warn "polyrun: #{missing.size} file(s) without timing data"
73
+ missing.first(10).each { |p| Polyrun::Log.warn " missing: #{p}" }
74
+ Polyrun::Log.warn " ..." if missing.size > 10
75
+ end
76
+
77
+ stale = analysis[:stale_entries]
78
+ unless stale.empty?
79
+ Polyrun::Log.warn "polyrun: #{stale.size} timing entry(ies) for files not in suite"
80
+ stale.first(5).each { |p| Polyrun::Log.warn " stale: #{p}" }
81
+ Polyrun::Log.warn " ..." if stale.size > 5
82
+ end
83
+
84
+ suspicious = analysis[:suspicious_missing]
85
+ return if suspicious.empty?
86
+
87
+ Polyrun::Log.warn "polyrun: suspicious missing timing (#{suspicious.size} slow-path pattern(s)):"
88
+ suspicious.first(5).each { |p| Polyrun::Log.warn " suspicious: #{p}" }
89
+ end
90
+ # rubocop:enable Metrics/AbcSize
91
+
92
+ def lookup_key(path, root, granularity)
93
+ TimingKeys.normalize_locator(path.to_s, root, granularity)
94
+ end
95
+
96
+ def file_from_locator(key)
97
+ s = key.to_s
98
+ m = s.match(/\A(.+):(\d+)\z/)
99
+ m ? m[1] : s
100
+ end
101
+
102
+ def suspicious_path?(key)
103
+ base = File.basename(file_from_locator(key))
104
+ base.match?(SUSPICIOUS_BASENAME)
105
+ end
106
+
107
+ def default_weight_for(costs)
108
+ vals = costs&.values || []
109
+ return 1.0 if vals.empty?
110
+
111
+ vals.sum / vals.size.to_f
112
+ end
113
+
114
+ def timing_file_age(timing_path)
115
+ return nil unless timing_path
116
+
117
+ abs = File.expand_path(timing_path.to_s, Dir.pwd)
118
+ return nil unless File.file?(abs)
119
+
120
+ age_sec = Time.now - File.mtime(abs)
121
+ format_age(age_sec)
122
+ end
123
+
124
+ def format_age(sec)
125
+ if sec < 3600
126
+ format("%.0fm ago", sec / 60.0)
127
+ elsif sec < 86_400
128
+ format("%.1fh ago", sec / 3600.0)
129
+ else
130
+ format("%.1fd ago", sec / 86_400.0)
131
+ end
132
+ end
133
+
134
+ def format_percent(ratio)
135
+ format("%.1f%%", ratio * 100.0)
136
+ end
137
+ end
138
+ end
139
+ end
@@ -1,6 +1,7 @@
1
1
  require "json"
2
2
 
3
3
  require_relative "../log"
4
+ require_relative "../timing/stats"
4
5
 
5
6
  module Polyrun
6
7
  module Partition
@@ -70,7 +71,7 @@ module Polyrun
70
71
  out = {}
71
72
  data.each do |k, v|
72
73
  key = normalize_locator(k.to_s, root, g)
73
- fv = v.to_f
74
+ fv = Polyrun::Timing::Stats.binpack_weight(v)
74
75
  if out.key?(key) && out[key] != fv
75
76
  Polyrun::Log.warn(
76
77
  "polyrun: timing JSON duplicate key #{key.inspect} after normalize (#{out[key]} vs #{fv}); using #{fv}"
@@ -0,0 +1,30 @@
1
+ require "json"
2
+ require "optparse"
3
+ require "shellwords"
4
+
5
+ module Polyrun
6
+ module Queue
7
+ # Parse duration strings like 10m, 1h, 600s into seconds.
8
+ module Duration
9
+ module_function
10
+
11
+ def parse_seconds(text)
12
+ s = text.to_s.strip
13
+ return Float(s) if s.match?(/\A\d+(\.\d+)?\z/)
14
+
15
+ m = s.match(/\A(\d+(?:\.\d+)?)(s|m|h|d)\z/i)
16
+ raise Polyrun::Error, "invalid duration: #{text.inspect}" unless m
17
+
18
+ val = Float(m[1])
19
+ case m[2].downcase
20
+ when "s" then val
21
+ when "m" then val * 60
22
+ when "h" then val * 3600
23
+ when "d" then val * 86_400
24
+ else
25
+ raise Polyrun::Error, "invalid duration: #{text.inspect}"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Polyrun/FileLength, Metrics/ClassLength -- file-backed queue chunks + leases
1
2
  require "fileutils"
2
3
  require "json"
3
4
  require "securerandom"
@@ -77,19 +78,121 @@ module Polyrun
77
78
  true
78
79
  end
79
80
 
80
- def status
81
+ def status(detailed: false)
81
82
  with_lock do
82
83
  meta = load_meta!
83
- {
84
+ leases = read_leases
85
+ base = {
84
86
  "pending" => Integer(meta["pending_count"]),
85
87
  "done" => Integer(meta["done_count"]),
86
- "leases" => read_leases.keys.size
88
+ "leases" => leases.keys.size
87
89
  }
90
+ return base unless detailed
91
+
92
+ base.merge("lease_details" => lease_details(leases))
93
+ end
94
+ end
95
+
96
+ # Reclaim leases older than +older_than+ seconds and/or matching +worker_id+ when set.
97
+ # @return [Integer] number of paths returned to pending
98
+ def reclaim!(older_than: nil, worker_id: nil)
99
+ reclaimed_paths = 0
100
+ with_lock do
101
+ meta = load_meta!
102
+ leases = read_leases
103
+ keep = {}
104
+ leases.each do |lease_id, lease|
105
+ if reclaim_lease?(lease, older_than: older_than, worker_id: worker_id)
106
+ paths = lease["paths"] || []
107
+ return_paths_to_pending!(meta, paths)
108
+ reclaimed_paths += paths.size
109
+ append_ledger(
110
+ "RECLAIM" => lease_id,
111
+ "worker_id" => lease["worker_id"],
112
+ "paths" => paths
113
+ )
114
+ else
115
+ keep[lease_id] = lease
116
+ end
117
+ end
118
+ write_leases!(keep)
119
+ write_meta!(meta)
120
+ end
121
+ reclaimed_paths
122
+ end
123
+
124
+ def reclaim_lease!(lease_id)
125
+ reclaimed = 0
126
+ with_lock do
127
+ leases = read_leases
128
+ lease = leases[lease_id]
129
+ return 0 unless lease
130
+
131
+ leases.delete(lease_id)
132
+ write_leases!(leases)
133
+ meta = load_meta!
134
+ paths = lease["paths"] || []
135
+ return_paths_to_pending!(meta, paths)
136
+ write_meta!(meta)
137
+ reclaimed = paths.size
138
+ append_ledger("RECLAIM" => lease_id, "worker_id" => lease["worker_id"], "paths" => paths)
88
139
  end
140
+ reclaimed
89
141
  end
90
142
 
91
143
  private
92
144
 
145
+ def reclaim_lease?(lease, older_than:, worker_id:)
146
+ if worker_id && lease["worker_id"].to_s != worker_id.to_s
147
+ return false
148
+ end
149
+ if older_than
150
+ claimed = Time.parse(lease["claimed_at"].to_s)
151
+ return (Time.now - claimed) >= older_than
152
+ end
153
+ !!worker_id
154
+ rescue ArgumentError
155
+ true
156
+ end
157
+
158
+ def lease_details(leases)
159
+ now = Time.now
160
+ leases.map do |lease_id, lease|
161
+ claimed = Time.parse(lease["claimed_at"].to_s)
162
+ {
163
+ "lease_id" => lease_id,
164
+ "worker_id" => lease["worker_id"],
165
+ "paths_count" => (lease["paths"] || []).size,
166
+ "claimed_at" => lease["claimed_at"],
167
+ "age_seconds" => (now - claimed).round(1)
168
+ }
169
+ rescue ArgumentError
170
+ {
171
+ "lease_id" => lease_id,
172
+ "worker_id" => lease["worker_id"],
173
+ "paths_count" => (lease["paths"] || []).size,
174
+ "claimed_at" => lease["claimed_at"],
175
+ "age_seconds" => nil
176
+ }
177
+ end
178
+ end
179
+
180
+ def return_paths_to_pending!(meta, paths)
181
+ return if paths.empty?
182
+
183
+ meta["pending_count"] = Integer(meta["pending_count"]) + paths.size
184
+ files = sorted_chunk_files
185
+ if files.empty?
186
+ FileUtils.mkdir_p(pending_dir)
187
+ atomic_write(File.join(pending_dir, "000001.json"), JSON.generate(paths.map(&:to_s)))
188
+ else
189
+ head = files.first
190
+ chunk = JSON.parse(File.read(head))
191
+ chunk = paths.map(&:to_s) + chunk
192
+ atomic_write(head, JSON.generate(chunk))
193
+ end
194
+ end
195
+
93
196
  def queue_path
94
197
  File.join(@root, "queue.json")
95
198
  end
@@ -197,3 +300,4 @@ module Polyrun
197
300
  end
198
301
 
199
302
  require_relative "file_store_pending"
303
+ # rubocop:enable Polyrun/FileLength, Metrics/ClassLength
@@ -1,7 +1,10 @@
1
+ require_relative "../worker_ping"
1
2
  require_relative "assertions"
2
3
  require_relative "errors"
3
4
  require_relative "matchers"
4
5
 
6
+ Polyrun::WorkerPing.ensure_interval_ping_thread!
7
+
5
8
  module Polyrun
6
9
  module Quick
7
10
  # Per-example execution: merged lets, hooks, assertions, optional Capybara::DSL.
@@ -20,6 +23,9 @@ module Polyrun
20
23
  define_let_methods!
21
24
  run_let_bangs_from_chain
22
25
  extend_capybara_if_enabled!
26
+ qloc = quick_example_location(block)
27
+ Polyrun::WorkerPing.ping!(location: qloc)
28
+ Polyrun::SpecQuality.start_example!(location: qloc) if Polyrun::SpecQuality.started?
23
29
  begin
24
30
  run_before_hooks_from_chain(ancestor_chain)
25
31
  instance_eval(&block)
@@ -29,14 +35,21 @@ module Polyrun
29
35
  rescue => e
30
36
  @reporter.error(group_name, description, e)
31
37
  ensure
38
+ Polyrun::SpecQuality.finish_example!(location: qloc) if Polyrun::SpecQuality.started?
32
39
  run_after_hooks_from_chain(ancestor_chain)
33
40
  reset_capybara_if_enabled!
34
41
  @_let_cache = {}
42
+ Polyrun::WorkerPing.ping!(location: qloc)
35
43
  end
36
44
  end
37
45
 
38
46
  private
39
47
 
48
+ def quick_example_location(block)
49
+ loc = block&.source_location
50
+ loc ? "#{loc[0]}:#{loc[1]}" : nil
51
+ end
52
+
40
53
  def merge_lets_from_chain(ancestor_chain)
41
54
  @merged_lets = {}
42
55
  ancestor_chain.each do |g|
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Polyrun/FileLength -- quick runner + spec quality wiring
1
2
  require "pathname"
2
3
 
3
4
  require_relative "assertions"
@@ -49,6 +50,14 @@ module Polyrun
49
50
  @capybara_enabled = false
50
51
  end
51
52
 
53
+ def install_spec_quality!(root: nil, output_path: nil)
54
+ require_relative "../spec_quality"
55
+ return unless Polyrun::SpecQuality.enabled?
56
+
57
+ r = root || File.expand_path(Dir.pwd)
58
+ Polyrun::SpecQuality.start!(root: r, output_path: output_path)
59
+ end
60
+
52
61
  def describe(name, &block)
53
62
  group = ExampleGroup.new(name)
54
63
  group.instance_eval(&block) if block
@@ -102,6 +111,7 @@ module Polyrun
102
111
  end
103
112
 
104
113
  quick_start_coverage_if_configured!
114
+ quick_start_spec_quality_if_configured!
105
115
 
106
116
  collector = load_quick_files!(files)
107
117
  return 1 unless collector
@@ -148,6 +158,16 @@ module Polyrun
148
158
  end
149
159
  end
150
160
 
161
+ def quick_start_spec_quality_if_configured!
162
+ return unless Polyrun::SpecQuality.spec_quality_requested_for_quick?(Dir.pwd)
163
+ return if Polyrun::SpecQuality.started?
164
+
165
+ require_relative "../spec_quality"
166
+ Polyrun::SpecQuality.start!(
167
+ root: File.expand_path(Dir.pwd)
168
+ )
169
+ end
170
+
151
171
  def quick_start_coverage_if_configured!
152
172
  return unless Polyrun::Coverage::Collector.coverage_requested_for_quick?(Dir.pwd)
153
173
  return if Polyrun::Coverage::Collector.started?
@@ -187,3 +207,4 @@ module Polyrun
187
207
  end
188
208
  end
189
209
  end
210
+ # rubocop:enable Polyrun/FileLength
data/lib/polyrun/rspec.rb CHANGED
@@ -44,5 +44,31 @@ module Polyrun
44
44
  config.add_formatter Polyrun::Reporting::RspecFailureFragmentFormatter
45
45
  end
46
46
  end
47
+
48
+ # Writes {WorkerPing} after suite start, before/after each example (+location+ is file:line from metadata).
49
+ # Keeps +--worker-idle-timeout+ sensitive to example progress (not only a background thread).
50
+ def install_spec_quality!(only_if: nil, root: nil, output_path: nil)
51
+ pred = only_if || -> { Polyrun::SpecQuality.enabled? }
52
+ return unless pred.call
53
+
54
+ require_relative "spec_quality/rspec_hook"
55
+ Polyrun::SpecQuality::RspecHook.install!(only_if: pred, root: root, output_path: output_path)
56
+ end
57
+
58
+ def install_worker_ping!
59
+ require "rspec/core"
60
+ require_relative "worker_ping"
61
+ ::RSpec.configure do |config|
62
+ config.before(:suite) { Polyrun::WorkerPing.ping! }
63
+ config.before(:each) do |example|
64
+ Polyrun::WorkerPing.ping!(location: example.metadata[:location] || example.location)
65
+ end
66
+ config.after(:each) do |example|
67
+ Polyrun::WorkerPing.ping!(location: example.metadata[:location] || example.location)
68
+ end
69
+ end
70
+
71
+ Polyrun::WorkerPing.ensure_interval_ping_thread!
72
+ end
47
73
  end
48
74
  end
@@ -0,0 +1,134 @@
1
+ require "yaml"
2
+
3
+ module Polyrun
4
+ module SpecQuality
5
+ # Loads +config/polyrun_spec_quality.yml+ and +ENV+ overrides.
6
+ module Config
7
+ DEFAULT_CONFIG_RELATIVE = File.join("config", "polyrun_spec_quality.yml").freeze
8
+
9
+ DEFAULTS = {
10
+ "track_under" => %w[lib app],
11
+ "min_line_churn" => 50,
12
+ "min_query_count" => 20,
13
+ "hot_line_example_overlap" => 10,
14
+ "strict" => false,
15
+ "sample" => 1.0,
16
+ "ignore_examples" => [],
17
+ "ignore_paths" => [],
18
+ "ignore_query_patterns" => [],
19
+ "profile" => %w[cpu mem],
20
+ "sql_counter" => false,
21
+ "minimum_unique_lines_per_example" => nil,
22
+ "max_zero_hit_examples" => nil,
23
+ "max_hot_line_overlap" => nil
24
+ }.freeze
25
+
26
+ module_function
27
+
28
+ def enabled?(env = ENV)
29
+ return false if disabled?(env)
30
+
31
+ truthy?(env["POLYRUN_SPEC_QUALITY"]) || truthy?(env["POLYRUN_SPEC_QUALITY_FRAGMENTS"])
32
+ end
33
+
34
+ def disabled?(env = ENV)
35
+ truthy?(env["POLYRUN_SPEC_QUALITY_DISABLE"])
36
+ end
37
+
38
+ def load(root:, config_path: nil, env: ENV, **overrides)
39
+ root = File.expand_path(root)
40
+ file_cfg = load_yaml(root, config_path)
41
+ merged = DEFAULTS.merge(stringify_keys(file_cfg))
42
+ apply_env!(merged, env)
43
+ merged.merge!(stringify_keys(overrides).transform_keys(&:to_s))
44
+ merged["root"] = root
45
+ merged["strict"] = resolve_strict(merged, env)
46
+ merged["sample"] = resolve_sample(merged, env)
47
+ normalize_config!(merged)
48
+ merged
49
+ end
50
+
51
+ def load_yaml(root, config_path)
52
+ path = config_path || File.join(root, DEFAULT_CONFIG_RELATIVE)
53
+ path = File.expand_path(path)
54
+ return {} unless File.file?(path)
55
+
56
+ data = YAML.load_file(path)
57
+ data.is_a?(Hash) ? data : {}
58
+ end
59
+
60
+ def apply_env!(cfg, env)
61
+ cfg["strict"] = true if truthy?(env["POLYRUN_SPEC_QUALITY_STRICT"])
62
+ if env.key?("POLYRUN_SPEC_QUALITY_SAMPLE")
63
+ cfg["sample"] = Float(env["POLYRUN_SPEC_QUALITY_SAMPLE"])
64
+ end
65
+ if env.key?("POLYRUN_SPEC_QUALITY_SQL_COUNTER")
66
+ cfg["sql_counter"] = truthy?(env["POLYRUN_SPEC_QUALITY_SQL_COUNTER"])
67
+ end
68
+ prof = env["POLYRUN_SPEC_QUALITY_PROFILE"]
69
+ cfg["profile"] = prof.split(",").map(&:strip).reject(&:empty?) if prof && !prof.strip.empty?
70
+ end
71
+
72
+ def resolve_strict(cfg, env)
73
+ return true if truthy?(env["POLYRUN_SPEC_QUALITY_STRICT"])
74
+
75
+ cfg["strict"] == true || truthy?(cfg["strict"])
76
+ end
77
+
78
+ def resolve_sample(cfg, env)
79
+ v = cfg["sample"]
80
+ f = v.is_a?(Numeric) ? v.to_f : Float(v)
81
+ f.clamp(0.0, 1.0)
82
+ rescue ArgumentError, TypeError
83
+ 1.0
84
+ end
85
+
86
+ # rubocop:disable Metrics/AbcSize -- config key normalization
87
+ def normalize_config!(cfg)
88
+ cfg["track_under"] = Array(cfg["track_under"]).map(&:to_s).reject(&:empty?)
89
+ cfg["track_under"] = %w[lib] if cfg["track_under"].empty?
90
+ cfg["ignore_examples"] = Array(cfg["ignore_examples"]).map(&:to_s)
91
+ cfg["ignore_paths"] = Array(cfg["ignore_paths"]).map(&:to_s)
92
+ cfg["ignore_query_patterns"] = Array(cfg["ignore_query_patterns"]).map(&:to_s)
93
+ cfg["profile"] = Array(cfg["profile"]).map(&:to_s).reject(&:empty?)
94
+ %w[min_line_churn min_query_count hot_line_example_overlap].each do |k|
95
+ cfg[k] = Integer(cfg[k]) if cfg[k]
96
+ end
97
+ end
98
+ # rubocop:enable Metrics/AbcSize
99
+
100
+ def ignored_example?(location, ignore_examples)
101
+ loc = location.to_s
102
+ return false if loc.empty?
103
+
104
+ Array(ignore_examples).any? do |pat|
105
+ if pat.start_with?("/") && pat.end_with?("/") && pat.size > 2
106
+ loc.match?(Regexp.new(pat[1..-2]))
107
+ else
108
+ loc.include?(pat)
109
+ end
110
+ rescue RegexpError
111
+ loc.include?(pat)
112
+ end
113
+ end
114
+
115
+ def stringify_keys(obj)
116
+ case obj
117
+ when Hash
118
+ obj.each_with_object({}) { |(k, v), out| out[k.to_s] = stringify_keys(v) }
119
+ when Array
120
+ obj.map { |e| stringify_keys(e) }
121
+ else
122
+ obj
123
+ end
124
+ end
125
+
126
+ def truthy?(value)
127
+ return false if value.nil?
128
+
129
+ %w[1 true yes on].include?(value.to_s.strip.downcase)
130
+ end
131
+ private_class_method :truthy?
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,39 @@
1
+ require "fileutils"
2
+
3
+ require "json"
4
+
5
+ module Polyrun
6
+ module SpecQuality
7
+ module Fragment
8
+ module_function
9
+
10
+ def default_fragment_path(env = ENV)
11
+ dir = env.fetch("POLYRUN_SPEC_QUALITY_FRAGMENT_DIR", default_fragment_dir)
12
+ base = Polyrun::Coverage::CollectorFragmentMeta.fragment_default_basename_from_env(env)
13
+ File.expand_path(File.join(dir, "polyrun-spec-quality-fragment-#{base}.jsonl"))
14
+ end
15
+
16
+ def default_fragment_dir
17
+ File.join(Dir.pwd, "coverage")
18
+ end
19
+
20
+ def glob_pattern(cwd = Dir.pwd)
21
+ File.join(cwd, "coverage", "polyrun-spec-quality-fragment-*.jsonl")
22
+ end
23
+
24
+ def ensure_fragment_dir!(path)
25
+ FileUtils.mkdir_p(File.dirname(path))
26
+ end
27
+
28
+ def truncate_fragment!(path)
29
+ ensure_fragment_dir!(path)
30
+ File.write(path, "")
31
+ end
32
+
33
+ def append_row!(path, row)
34
+ ensure_fragment_dir!(path)
35
+ File.open(path, "a") { |f| f.puts(JSON.generate(row)) }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,78 @@
1
+ require "json"
2
+
3
+ module Polyrun
4
+ module SpecQuality
5
+ module Merge
6
+ module_function
7
+
8
+ def merge_files(paths)
9
+ examples = {}
10
+ paths.each do |path|
11
+ merge_file_into!(examples, path)
12
+ end
13
+ build_merged_payload(examples, paths.size)
14
+ end
15
+
16
+ def merge_file_into!(examples, path)
17
+ File.foreach(path) do |line|
18
+ line = line.strip
19
+ next if line.empty?
20
+
21
+ row = JSON.parse(line)
22
+ key = row["example"].to_s
23
+ next if key.empty?
24
+
25
+ examples[key] = row
26
+ end
27
+ end
28
+
29
+ def build_merged_payload(examples, fragment_count)
30
+ hot_lines = aggregate_hot_lines(examples)
31
+ {
32
+ "examples" => examples,
33
+ "hot_lines" => hot_lines,
34
+ "shard_summary" => shard_summary(examples),
35
+ "meta" => {
36
+ "polyrun_version" => Polyrun::VERSION,
37
+ "fragment_count" => fragment_count,
38
+ "example_count" => examples.size
39
+ }
40
+ }
41
+ end
42
+
43
+ def shard_summary(examples)
44
+ by_shard = Hash.new { |h, k| h[k] = {"examples" => 0, "zero_hit" => 0, "line_churn" => 0} }
45
+ examples.each do |_loc, row|
46
+ shard = row["polyrun_shard_index"]
47
+ shard = shard.nil? ? "?" : shard.to_s
48
+ by_shard[shard]["examples"] += 1
49
+ by_shard[shard]["zero_hit"] += 1 if row["unique_lines"].to_i.zero?
50
+ by_shard[shard]["line_churn"] += row["line_churn"].to_i
51
+ end
52
+ by_shard
53
+ end
54
+
55
+ def aggregate_hot_lines(examples)
56
+ by_line = Hash.new { |h, k| h[k] = {"examples" => [], "total_hits" => 0} }
57
+ examples.each do |example_loc, row|
58
+ Array(row["lines"]).each do |entry|
59
+ path, line_no, delta = entry
60
+ key = "#{path}:#{line_no}"
61
+ by_line[key]["examples"] << example_loc
62
+ by_line[key]["total_hits"] += delta.to_i
63
+ end
64
+ end
65
+ by_line.transform_values do |v|
66
+ v["example_count"] = v["examples"].uniq.size
67
+ v
68
+ end
69
+ end
70
+
71
+ def merge_and_write(paths, output_path)
72
+ merged = merge_files(paths)
73
+ File.write(output_path, JSON.pretty_generate(merged))
74
+ merged
75
+ end
76
+ end
77
+ end
78
+ end