polyrun 1.5.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -0
  3. data/README.md +2 -2
  4. data/docs/SETUP_PROFILE.md +2 -0
  5. data/lib/polyrun/cli/coverage_commands.rb +1 -1
  6. data/lib/polyrun/cli/failure_commands.rb +1 -1
  7. data/lib/polyrun/cli/help.rb +20 -17
  8. data/lib/polyrun/cli/helpers.rb +16 -0
  9. data/lib/polyrun/cli/init_command.rb +8 -1
  10. data/lib/polyrun/cli/partition_diagnostics.rb +22 -0
  11. data/lib/polyrun/cli/plan_command.rb +47 -18
  12. data/lib/polyrun/cli/queue_command.rb +25 -2
  13. data/lib/polyrun/cli/run_queue_command.rb +145 -0
  14. data/lib/polyrun/cli/run_shards_command.rb +6 -1
  15. data/lib/polyrun/cli/run_shards_parallel_children.rb +2 -1
  16. data/lib/polyrun/cli/run_shards_parallel_wait.rb +5 -1
  17. data/lib/polyrun/cli/run_shards_plan_boot_phases.rb +47 -2
  18. data/lib/polyrun/cli/run_shards_plan_options.rb +14 -4
  19. data/lib/polyrun/cli/run_shards_planning.rb +20 -12
  20. data/lib/polyrun/cli/run_shards_run.rb +22 -5
  21. data/lib/polyrun/cli/spec_quality_commands.rb +140 -0
  22. data/lib/polyrun/cli.rb +16 -2
  23. data/lib/polyrun/coverage/example_diff.rb +122 -0
  24. data/lib/polyrun/coverage/merge/formatters_html.rb +5 -5
  25. data/lib/polyrun/data/factory_counts.rb +14 -1
  26. data/lib/polyrun/database/clone_shards.rb +2 -0
  27. data/lib/polyrun/database/shard.rb +2 -1
  28. data/lib/polyrun/minitest.rb +9 -0
  29. data/lib/polyrun/partition/hrw.rb +40 -3
  30. data/lib/polyrun/partition/paths_build.rb +8 -3
  31. data/lib/polyrun/partition/plan.rb +88 -19
  32. data/lib/polyrun/partition/plan_lpt.rb +49 -7
  33. data/lib/polyrun/partition/plan_sharding.rb +8 -0
  34. data/lib/polyrun/partition/reports.rb +139 -0
  35. data/lib/polyrun/partition/timing_diagnostics.rb +139 -0
  36. data/lib/polyrun/partition/timing_keys.rb +2 -1
  37. data/lib/polyrun/queue/duration.rb +30 -0
  38. data/lib/polyrun/queue/file_store.rb +114 -3
  39. data/lib/polyrun/quick/example_runner.rb +2 -0
  40. data/lib/polyrun/quick/runner.rb +21 -0
  41. data/lib/polyrun/rspec.rb +10 -0
  42. data/lib/polyrun/spec_quality/config.rb +134 -0
  43. data/lib/polyrun/spec_quality/fragment.rb +39 -0
  44. data/lib/polyrun/spec_quality/merge.rb +78 -0
  45. data/lib/polyrun/spec_quality/minitest_hook.rb +42 -0
  46. data/lib/polyrun/spec_quality/plan_loader.rb +47 -0
  47. data/lib/polyrun/spec_quality/profile.rb +91 -0
  48. data/lib/polyrun/spec_quality/report.rb +261 -0
  49. data/lib/polyrun/spec_quality/rspec_hook.rb +55 -0
  50. data/lib/polyrun/spec_quality/sql_counter.rb +34 -0
  51. data/lib/polyrun/spec_quality.rb +205 -0
  52. data/lib/polyrun/templates/POLYRUN.md +6 -0
  53. data/lib/polyrun/templates/ci_matrix.polyrun.yml +4 -0
  54. data/lib/polyrun/templates/polyrun_hooks_spec_quality.rb +12 -0
  55. data/lib/polyrun/templates/polyrun_spec_quality.yml +20 -0
  56. data/lib/polyrun/templates/rails_prepare.polyrun.yml +5 -0
  57. data/lib/polyrun/timing/merge.rb +5 -5
  58. data/lib/polyrun/timing/rspec_example_formatter.rb +14 -7
  59. data/lib/polyrun/timing/stats.rb +76 -0
  60. data/lib/polyrun/timing/summary.rb +5 -2
  61. data/lib/polyrun/timing/variance_report.rb +51 -0
  62. data/lib/polyrun/version.rb +1 -1
  63. metadata +22 -1
@@ -0,0 +1,139 @@
1
+ require "set"
2
+
3
+ module Polyrun
4
+ module Partition
5
+ # Stale / missing timing coverage before cost-based partition.
6
+ module TimingDiagnostics
7
+ SUSPICIOUS_BASENAME = /system|feature|integration|playwright|capybara/i
8
+
9
+ module_function
10
+
11
+ # @return [Hash] analysis result with :missing_files, :stale_entries, :coverage, etc.
12
+ # rubocop:disable Metrics/AbcSize -- timing coverage scan
13
+ def analyze(items:, costs:, timing_path:, root:, granularity: :file)
14
+ root = File.expand_path(root || Dir.pwd)
15
+ g = TimingKeys.normalize_granularity(granularity)
16
+ item_keys = items.map { |p| lookup_key(p, root, g) }
17
+ cost_keys = costs&.keys || []
18
+
19
+ if g == :example
20
+ item_keys_set = item_keys.to_set
21
+ cost_keys_set = cost_keys.to_set
22
+ cost_file_keys_set = cost_keys.map { |k| file_from_locator(k) }.uniq.to_set
23
+ known = item_keys.count do |ik|
24
+ cost_keys_set.include?(ik) || cost_file_keys_set.include?(file_from_locator(ik))
25
+ end
26
+ missing = item_keys.reject do |ik|
27
+ cost_keys_set.include?(ik) || cost_file_keys_set.include?(file_from_locator(ik))
28
+ end
29
+ stale = cost_keys.reject { |k| item_keys_set.include?(k) }
30
+ else
31
+ known = item_keys.count { |k| costs&.key?(k) }
32
+ missing = item_keys.reject { |k| costs&.key?(k) }
33
+ stale = cost_keys.reject { |k| item_keys.include?(k) }
34
+ end
35
+ total = item_keys.size
36
+
37
+ coverage = total.zero? ? 1.0 : known.to_f / total
38
+ default_weight = default_weight_for(costs)
39
+ suspicious = missing.select { |k| suspicious_path?(k) }
40
+
41
+ {
42
+ missing_files: missing,
43
+ stale_entries: stale,
44
+ coverage: coverage,
45
+ known_files: known,
46
+ total_files: total,
47
+ timing_file_age: timing_file_age(timing_path),
48
+ default_weight: default_weight,
49
+ suspicious_missing: suspicious
50
+ }
51
+ end
52
+ # rubocop:enable Metrics/AbcSize
53
+
54
+ # rubocop:disable Metrics/AbcSize -- stale/missing timing warnings
55
+ def emit_warnings!(analysis)
56
+ cov = analysis[:coverage]
57
+ if cov < 0.50
58
+ Polyrun::Log.warn "polyrun: timing coverage #{format_percent(cov)} — binpack quality low; run full timing capture first"
59
+ elsif cov < 0.80
60
+ Polyrun::Log.warn "polyrun: timing coverage #{format_percent(cov)} (< 80%)"
61
+ end
62
+
63
+ dw = analysis[:default_weight]
64
+ Polyrun::Log.warn "polyrun: default weight for missing files: #{format("%.4f", dw)}s (mean of known costs)"
65
+
66
+ if analysis[:timing_file_age]
67
+ Polyrun::Log.warn "polyrun: timing file age: #{analysis[:timing_file_age]}"
68
+ end
69
+
70
+ missing = analysis[:missing_files]
71
+ unless missing.empty?
72
+ Polyrun::Log.warn "polyrun: #{missing.size} file(s) without timing data"
73
+ missing.first(10).each { |p| Polyrun::Log.warn " missing: #{p}" }
74
+ Polyrun::Log.warn " ..." if missing.size > 10
75
+ end
76
+
77
+ stale = analysis[:stale_entries]
78
+ unless stale.empty?
79
+ Polyrun::Log.warn "polyrun: #{stale.size} timing entry(ies) for files not in suite"
80
+ stale.first(5).each { |p| Polyrun::Log.warn " stale: #{p}" }
81
+ Polyrun::Log.warn " ..." if stale.size > 5
82
+ end
83
+
84
+ suspicious = analysis[:suspicious_missing]
85
+ return if suspicious.empty?
86
+
87
+ Polyrun::Log.warn "polyrun: suspicious missing timing (#{suspicious.size} slow-path pattern(s)):"
88
+ suspicious.first(5).each { |p| Polyrun::Log.warn " suspicious: #{p}" }
89
+ end
90
+ # rubocop:enable Metrics/AbcSize
91
+
92
+ def lookup_key(path, root, granularity)
93
+ TimingKeys.normalize_locator(path.to_s, root, granularity)
94
+ end
95
+
96
+ def file_from_locator(key)
97
+ s = key.to_s
98
+ m = s.match(/\A(.+):(\d+)\z/)
99
+ m ? m[1] : s
100
+ end
101
+
102
+ def suspicious_path?(key)
103
+ base = File.basename(file_from_locator(key))
104
+ base.match?(SUSPICIOUS_BASENAME)
105
+ end
106
+
107
+ def default_weight_for(costs)
108
+ vals = costs&.values || []
109
+ return 1.0 if vals.empty?
110
+
111
+ vals.sum / vals.size.to_f
112
+ end
113
+
114
+ def timing_file_age(timing_path)
115
+ return nil unless timing_path
116
+
117
+ abs = File.expand_path(timing_path.to_s, Dir.pwd)
118
+ return nil unless File.file?(abs)
119
+
120
+ age_sec = Time.now - File.mtime(abs)
121
+ format_age(age_sec)
122
+ end
123
+
124
+ def format_age(sec)
125
+ if sec < 3600
126
+ format("%.0fm ago", sec / 60.0)
127
+ elsif sec < 86_400
128
+ format("%.1fh ago", sec / 3600.0)
129
+ else
130
+ format("%.1fd ago", sec / 86_400.0)
131
+ end
132
+ end
133
+
134
+ def format_percent(ratio)
135
+ format("%.1f%%", ratio * 100.0)
136
+ end
137
+ end
138
+ end
139
+ end
@@ -1,6 +1,7 @@
1
1
  require "json"
2
2
 
3
3
  require_relative "../log"
4
+ require_relative "../timing/stats"
4
5
 
5
6
  module Polyrun
6
7
  module Partition
@@ -70,7 +71,7 @@ module Polyrun
70
71
  out = {}
71
72
  data.each do |k, v|
72
73
  key = normalize_locator(k.to_s, root, g)
73
- fv = v.to_f
74
+ fv = Polyrun::Timing::Stats.binpack_weight(v)
74
75
  if out.key?(key) && out[key] != fv
75
76
  Polyrun::Log.warn(
76
77
  "polyrun: timing JSON duplicate key #{key.inspect} after normalize (#{out[key]} vs #{fv}); using #{fv}"
@@ -0,0 +1,30 @@
1
+ require "json"
2
+ require "optparse"
3
+ require "shellwords"
4
+
5
+ module Polyrun
6
+ module Queue
7
+ # Parse duration strings like 10m, 1h, 600s into seconds.
8
+ module Duration
9
+ module_function
10
+
11
+ def parse_seconds(text)
12
+ s = text.to_s.strip
13
+ return Float(s) if s.match?(/\A\d+(\.\d+)?\z/)
14
+
15
+ m = s.match(/\A(\d+(?:\.\d+)?)(s|m|h|d)\z/i)
16
+ raise Polyrun::Error, "invalid duration: #{text.inspect}" unless m
17
+
18
+ val = Float(m[1])
19
+ case m[2].downcase
20
+ when "s" then val
21
+ when "m" then val * 60
22
+ when "h" then val * 3600
23
+ when "d" then val * 86_400
24
+ else
25
+ raise Polyrun::Error, "invalid duration: #{text.inspect}"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Polyrun/FileLength, Metrics/ClassLength -- file-backed queue chunks + leases
1
2
  require "fileutils"
2
3
  require "json"
3
4
  require "securerandom"
@@ -5,6 +6,13 @@ require "time"
5
6
  module Polyrun
6
7
  module Queue
7
8
  # File-backed queue (spec_queue.md): +queue.json+, +pending/*.json+ chunks, +done.jsonl+, +leases.json+ (OS flock).
9
+ #
10
+ # Path lifecycle (lease transitions):
11
+ # init! → paths in pending chunks only
12
+ # claim! → pending −batch → active lease in leases.json
13
+ # ack! → lease removed; paths appended to done.jsonl
14
+ # reclaim! → stale or matching lease → paths returned to pending
15
+ # reclaim_lease! → one lease by id → paths returned to pending
8
16
  class FileStore
9
17
  CHUNK_SIZE = 500
10
18
 
@@ -77,19 +85,121 @@ module Polyrun
77
85
  true
78
86
  end
79
87
 
80
- def status
88
+ def status(detailed: false)
81
89
  with_lock do
82
90
  meta = load_meta!
83
- {
91
+ leases = read_leases
92
+ base = {
84
93
  "pending" => Integer(meta["pending_count"]),
85
94
  "done" => Integer(meta["done_count"]),
86
- "leases" => read_leases.keys.size
95
+ "leases" => leases.keys.size
87
96
  }
97
+ return base unless detailed
98
+
99
+ base.merge("lease_details" => lease_details(leases))
100
+ end
101
+ end
102
+
103
+ # Reclaim leases older than +older_than+ seconds and/or matching +worker_id+ when set.
104
+ # @return [Integer] number of paths returned to pending
105
+ def reclaim!(older_than: nil, worker_id: nil)
106
+ reclaimed_paths = 0
107
+ with_lock do
108
+ meta = load_meta!
109
+ leases = read_leases
110
+ keep = {}
111
+ leases.each do |lease_id, lease|
112
+ if reclaim_lease?(lease, older_than: older_than, worker_id: worker_id)
113
+ paths = lease["paths"] || []
114
+ return_paths_to_pending!(meta, paths)
115
+ reclaimed_paths += paths.size
116
+ append_ledger(
117
+ "RECLAIM" => lease_id,
118
+ "worker_id" => lease["worker_id"],
119
+ "paths" => paths
120
+ )
121
+ else
122
+ keep[lease_id] = lease
123
+ end
124
+ end
125
+ write_leases!(keep)
126
+ write_meta!(meta)
127
+ end
128
+ reclaimed_paths
129
+ end
130
+
131
+ def reclaim_lease!(lease_id)
132
+ reclaimed = 0
133
+ with_lock do
134
+ leases = read_leases
135
+ lease = leases[lease_id]
136
+ return 0 unless lease
137
+
138
+ leases.delete(lease_id)
139
+ write_leases!(leases)
140
+ meta = load_meta!
141
+ paths = lease["paths"] || []
142
+ return_paths_to_pending!(meta, paths)
143
+ write_meta!(meta)
144
+ reclaimed = paths.size
145
+ append_ledger("RECLAIM" => lease_id, "worker_id" => lease["worker_id"], "paths" => paths)
88
146
  end
147
+ reclaimed
89
148
  end
90
149
 
91
150
  private
92
151
 
152
+ def reclaim_lease?(lease, older_than:, worker_id:)
153
+ if worker_id && lease["worker_id"].to_s != worker_id.to_s
154
+ return false
155
+ end
156
+ if older_than
157
+ claimed = Time.parse(lease["claimed_at"].to_s)
158
+ return (Time.now - claimed) >= older_than
159
+ end
160
+ !!worker_id
161
+ rescue ArgumentError
162
+ true
163
+ end
164
+
165
+ def lease_details(leases)
166
+ now = Time.now
167
+ leases.map do |lease_id, lease|
168
+ claimed = Time.parse(lease["claimed_at"].to_s)
169
+ {
170
+ "lease_id" => lease_id,
171
+ "worker_id" => lease["worker_id"],
172
+ "paths_count" => (lease["paths"] || []).size,
173
+ "claimed_at" => lease["claimed_at"],
174
+ "age_seconds" => (now - claimed).round(1)
175
+ }
176
+ rescue ArgumentError
177
+ {
178
+ "lease_id" => lease_id,
179
+ "worker_id" => lease["worker_id"],
180
+ "paths_count" => (lease["paths"] || []).size,
181
+ "claimed_at" => lease["claimed_at"],
182
+ "age_seconds" => nil
183
+ }
184
+ end
185
+ end
186
+
187
+ def return_paths_to_pending!(meta, paths)
188
+ return if paths.empty?
189
+
190
+ meta["pending_count"] = Integer(meta["pending_count"]) + paths.size
191
+ files = sorted_chunk_files
192
+ if files.empty?
193
+ FileUtils.mkdir_p(pending_dir)
194
+ atomic_write(File.join(pending_dir, "000001.json"), JSON.generate(paths.map(&:to_s)))
195
+ else
196
+ head = files.first
197
+ chunk = JSON.parse(File.read(head))
198
+ chunk = paths.map(&:to_s) + chunk
199
+ atomic_write(head, JSON.generate(chunk))
200
+ end
201
+ end
202
+
93
203
  def queue_path
94
204
  File.join(@root, "queue.json")
95
205
  end
@@ -197,3 +307,4 @@ module Polyrun
197
307
  end
198
308
 
199
309
  require_relative "file_store_pending"
310
+ # rubocop:enable Polyrun/FileLength, Metrics/ClassLength
@@ -25,6 +25,7 @@ module Polyrun
25
25
  extend_capybara_if_enabled!
26
26
  qloc = quick_example_location(block)
27
27
  Polyrun::WorkerPing.ping!(location: qloc)
28
+ Polyrun::SpecQuality.start_example!(location: qloc) if Polyrun::SpecQuality.started?
28
29
  begin
29
30
  run_before_hooks_from_chain(ancestor_chain)
30
31
  instance_eval(&block)
@@ -34,6 +35,7 @@ module Polyrun
34
35
  rescue => e
35
36
  @reporter.error(group_name, description, e)
36
37
  ensure
38
+ Polyrun::SpecQuality.finish_example!(location: qloc) if Polyrun::SpecQuality.started?
37
39
  run_after_hooks_from_chain(ancestor_chain)
38
40
  reset_capybara_if_enabled!
39
41
  @_let_cache = {}
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Polyrun/FileLength -- quick runner + spec quality wiring
1
2
  require "pathname"
2
3
 
3
4
  require_relative "assertions"
@@ -49,6 +50,14 @@ module Polyrun
49
50
  @capybara_enabled = false
50
51
  end
51
52
 
53
+ def install_spec_quality!(root: nil, output_path: nil)
54
+ require_relative "../spec_quality"
55
+ return unless Polyrun::SpecQuality.enabled?
56
+
57
+ r = root || File.expand_path(Dir.pwd)
58
+ Polyrun::SpecQuality.start!(root: r, output_path: output_path)
59
+ end
60
+
52
61
  def describe(name, &block)
53
62
  group = ExampleGroup.new(name)
54
63
  group.instance_eval(&block) if block
@@ -102,6 +111,7 @@ module Polyrun
102
111
  end
103
112
 
104
113
  quick_start_coverage_if_configured!
114
+ quick_start_spec_quality_if_configured!
105
115
 
106
116
  collector = load_quick_files!(files)
107
117
  return 1 unless collector
@@ -148,6 +158,16 @@ module Polyrun
148
158
  end
149
159
  end
150
160
 
161
+ def quick_start_spec_quality_if_configured!
162
+ return unless Polyrun::SpecQuality.spec_quality_requested_for_quick?(Dir.pwd)
163
+ return if Polyrun::SpecQuality.started?
164
+
165
+ require_relative "../spec_quality"
166
+ Polyrun::SpecQuality.start!(
167
+ root: File.expand_path(Dir.pwd)
168
+ )
169
+ end
170
+
151
171
  def quick_start_coverage_if_configured!
152
172
  return unless Polyrun::Coverage::Collector.coverage_requested_for_quick?(Dir.pwd)
153
173
  return if Polyrun::Coverage::Collector.started?
@@ -187,3 +207,4 @@ module Polyrun
187
207
  end
188
208
  end
189
209
  end
210
+ # rubocop:enable Polyrun/FileLength
data/lib/polyrun/rspec.rb CHANGED
@@ -21,6 +21,8 @@ module Polyrun
21
21
  if output_path
22
22
  op = output_path
23
23
  Class.new(Polyrun::Timing::RSpecExampleFormatter) do
24
+ ::RSpec::Core::Formatters.register self, :example_finished, :close
25
+
24
26
  define_method(:timing_output_path) { op }
25
27
  end
26
28
  else
@@ -47,6 +49,14 @@ module Polyrun
47
49
 
48
50
  # Writes {WorkerPing} after suite start, before/after each example (+location+ is file:line from metadata).
49
51
  # Keeps +--worker-idle-timeout+ sensitive to example progress (not only a background thread).
52
+ def install_spec_quality!(only_if: nil, root: nil, output_path: nil)
53
+ pred = only_if || -> { Polyrun::SpecQuality.enabled? }
54
+ return unless pred.call
55
+
56
+ require_relative "spec_quality/rspec_hook"
57
+ Polyrun::SpecQuality::RspecHook.install!(only_if: pred, root: root, output_path: output_path)
58
+ end
59
+
50
60
  def install_worker_ping!
51
61
  require "rspec/core"
52
62
  require_relative "worker_ping"
@@ -0,0 +1,134 @@
1
+ require "yaml"
2
+
3
+ module Polyrun
4
+ module SpecQuality
5
+ # Loads +config/polyrun_spec_quality.yml+ and +ENV+ overrides.
6
+ module Config
7
+ DEFAULT_CONFIG_RELATIVE = File.join("config", "polyrun_spec_quality.yml").freeze
8
+
9
+ DEFAULTS = {
10
+ "track_under" => %w[lib app],
11
+ "min_line_churn" => 50,
12
+ "min_query_count" => 20,
13
+ "hot_line_example_overlap" => 10,
14
+ "strict" => false,
15
+ "sample" => 1.0,
16
+ "ignore_examples" => [],
17
+ "ignore_paths" => [],
18
+ "ignore_query_patterns" => [],
19
+ "profile" => %w[cpu mem],
20
+ "sql_counter" => false,
21
+ "minimum_unique_lines_per_example" => nil,
22
+ "max_zero_hit_examples" => nil,
23
+ "max_hot_line_overlap" => nil
24
+ }.freeze
25
+
26
+ module_function
27
+
28
+ def enabled?(env = ENV)
29
+ return false if disabled?(env)
30
+
31
+ truthy?(env["POLYRUN_SPEC_QUALITY"]) || truthy?(env["POLYRUN_SPEC_QUALITY_FRAGMENTS"])
32
+ end
33
+
34
+ def disabled?(env = ENV)
35
+ truthy?(env["POLYRUN_SPEC_QUALITY_DISABLE"])
36
+ end
37
+
38
+ def load(root:, config_path: nil, env: ENV, **overrides)
39
+ root = File.expand_path(root)
40
+ file_cfg = load_yaml(root, config_path)
41
+ merged = DEFAULTS.merge(stringify_keys(file_cfg))
42
+ apply_env!(merged, env)
43
+ merged.merge!(stringify_keys(overrides).transform_keys(&:to_s))
44
+ merged["root"] = root
45
+ merged["strict"] = resolve_strict(merged, env)
46
+ merged["sample"] = resolve_sample(merged, env)
47
+ normalize_config!(merged)
48
+ merged
49
+ end
50
+
51
+ def load_yaml(root, config_path)
52
+ path = config_path || File.join(root, DEFAULT_CONFIG_RELATIVE)
53
+ path = File.expand_path(path)
54
+ return {} unless File.file?(path)
55
+
56
+ data = YAML.load_file(path)
57
+ data.is_a?(Hash) ? data : {}
58
+ end
59
+
60
+ def apply_env!(cfg, env)
61
+ cfg["strict"] = true if truthy?(env["POLYRUN_SPEC_QUALITY_STRICT"])
62
+ if env.key?("POLYRUN_SPEC_QUALITY_SAMPLE")
63
+ cfg["sample"] = Float(env["POLYRUN_SPEC_QUALITY_SAMPLE"])
64
+ end
65
+ if env.key?("POLYRUN_SPEC_QUALITY_SQL_COUNTER")
66
+ cfg["sql_counter"] = truthy?(env["POLYRUN_SPEC_QUALITY_SQL_COUNTER"])
67
+ end
68
+ prof = env["POLYRUN_SPEC_QUALITY_PROFILE"]
69
+ cfg["profile"] = prof.split(",").map(&:strip).reject(&:empty?) if prof && !prof.strip.empty?
70
+ end
71
+
72
+ def resolve_strict(cfg, env)
73
+ return true if truthy?(env["POLYRUN_SPEC_QUALITY_STRICT"])
74
+
75
+ cfg["strict"] == true || truthy?(cfg["strict"])
76
+ end
77
+
78
+ def resolve_sample(cfg, env)
79
+ v = cfg["sample"]
80
+ f = v.is_a?(Numeric) ? v.to_f : Float(v)
81
+ f.clamp(0.0, 1.0)
82
+ rescue ArgumentError, TypeError
83
+ 1.0
84
+ end
85
+
86
+ # rubocop:disable Metrics/AbcSize -- config key normalization
87
+ def normalize_config!(cfg)
88
+ cfg["track_under"] = Array(cfg["track_under"]).map(&:to_s).reject(&:empty?)
89
+ cfg["track_under"] = %w[lib] if cfg["track_under"].empty?
90
+ cfg["ignore_examples"] = Array(cfg["ignore_examples"]).map(&:to_s)
91
+ cfg["ignore_paths"] = Array(cfg["ignore_paths"]).map(&:to_s)
92
+ cfg["ignore_query_patterns"] = Array(cfg["ignore_query_patterns"]).map(&:to_s)
93
+ cfg["profile"] = Array(cfg["profile"]).map(&:to_s).reject(&:empty?)
94
+ %w[min_line_churn min_query_count hot_line_example_overlap].each do |k|
95
+ cfg[k] = Integer(cfg[k]) if cfg[k]
96
+ end
97
+ end
98
+ # rubocop:enable Metrics/AbcSize
99
+
100
+ def ignored_example?(location, ignore_examples)
101
+ loc = location.to_s
102
+ return false if loc.empty?
103
+
104
+ Array(ignore_examples).any? do |pat|
105
+ if pat.start_with?("/") && pat.end_with?("/") && pat.size > 2
106
+ loc.match?(Regexp.new(pat[1..-2]))
107
+ else
108
+ loc.include?(pat)
109
+ end
110
+ rescue RegexpError
111
+ loc.include?(pat)
112
+ end
113
+ end
114
+
115
+ def stringify_keys(obj)
116
+ case obj
117
+ when Hash
118
+ obj.each_with_object({}) { |(k, v), out| out[k.to_s] = stringify_keys(v) }
119
+ when Array
120
+ obj.map { |e| stringify_keys(e) }
121
+ else
122
+ obj
123
+ end
124
+ end
125
+
126
+ def truthy?(value)
127
+ return false if value.nil?
128
+
129
+ %w[1 true yes on].include?(value.to_s.strip.downcase)
130
+ end
131
+ private_class_method :truthy?
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,39 @@
1
+ require "fileutils"
2
+
3
+ require "json"
4
+
5
+ module Polyrun
6
+ module SpecQuality
7
+ module Fragment
8
+ module_function
9
+
10
+ def default_fragment_path(env = ENV)
11
+ dir = env.fetch("POLYRUN_SPEC_QUALITY_FRAGMENT_DIR", default_fragment_dir)
12
+ base = Polyrun::Coverage::CollectorFragmentMeta.fragment_default_basename_from_env(env)
13
+ File.expand_path(File.join(dir, "polyrun-spec-quality-fragment-#{base}.jsonl"))
14
+ end
15
+
16
+ def default_fragment_dir
17
+ File.join(Dir.pwd, "coverage")
18
+ end
19
+
20
+ def glob_pattern(cwd = Dir.pwd)
21
+ File.join(cwd, "coverage", "polyrun-spec-quality-fragment-*.jsonl")
22
+ end
23
+
24
+ def ensure_fragment_dir!(path)
25
+ FileUtils.mkdir_p(File.dirname(path))
26
+ end
27
+
28
+ def truncate_fragment!(path)
29
+ ensure_fragment_dir!(path)
30
+ File.write(path, "")
31
+ end
32
+
33
+ def append_row!(path, row)
34
+ ensure_fragment_dir!(path)
35
+ File.open(path, "a") { |f| f.puts(JSON.generate(row)) }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,78 @@
1
+ require "json"
2
+
3
+ module Polyrun
4
+ module SpecQuality
5
+ module Merge
6
+ module_function
7
+
8
+ def merge_files(paths)
9
+ examples = {}
10
+ paths.each do |path|
11
+ merge_file_into!(examples, path)
12
+ end
13
+ build_merged_payload(examples, paths.size)
14
+ end
15
+
16
+ def merge_file_into!(examples, path)
17
+ File.foreach(path) do |line|
18
+ line = line.strip
19
+ next if line.empty?
20
+
21
+ row = JSON.parse(line)
22
+ key = row["example"].to_s
23
+ next if key.empty?
24
+
25
+ examples[key] = row
26
+ end
27
+ end
28
+
29
+ def build_merged_payload(examples, fragment_count)
30
+ hot_lines = aggregate_hot_lines(examples)
31
+ {
32
+ "examples" => examples,
33
+ "hot_lines" => hot_lines,
34
+ "shard_summary" => shard_summary(examples),
35
+ "meta" => {
36
+ "polyrun_version" => Polyrun::VERSION,
37
+ "fragment_count" => fragment_count,
38
+ "example_count" => examples.size
39
+ }
40
+ }
41
+ end
42
+
43
+ def shard_summary(examples)
44
+ by_shard = Hash.new { |h, k| h[k] = {"examples" => 0, "zero_hit" => 0, "line_churn" => 0} }
45
+ examples.each do |_loc, row|
46
+ shard = row["polyrun_shard_index"]
47
+ shard = shard.nil? ? "?" : shard.to_s
48
+ by_shard[shard]["examples"] += 1
49
+ by_shard[shard]["zero_hit"] += 1 if row["unique_lines"].to_i.zero?
50
+ by_shard[shard]["line_churn"] += row["line_churn"].to_i
51
+ end
52
+ by_shard
53
+ end
54
+
55
+ def aggregate_hot_lines(examples)
56
+ by_line = Hash.new { |h, k| h[k] = {"examples" => [], "total_hits" => 0} }
57
+ examples.each do |example_loc, row|
58
+ Array(row["lines"]).each do |entry|
59
+ path, line_no, delta = entry
60
+ key = "#{path}:#{line_no}"
61
+ by_line[key]["examples"] << example_loc
62
+ by_line[key]["total_hits"] += delta.to_i
63
+ end
64
+ end
65
+ by_line.transform_values do |v|
66
+ v["example_count"] = v["examples"].uniq.size
67
+ v
68
+ end
69
+ end
70
+
71
+ def merge_and_write(paths, output_path)
72
+ merged = merge_files(paths)
73
+ File.write(output_path, JSON.pretty_generate(merged))
74
+ merged
75
+ end
76
+ end
77
+ end
78
+ end