polyrun 1.5.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -0
  3. data/README.md +2 -2
  4. data/docs/SETUP_PROFILE.md +2 -0
  5. data/lib/polyrun/cli/coverage_commands.rb +1 -1
  6. data/lib/polyrun/cli/failure_commands.rb +1 -1
  7. data/lib/polyrun/cli/help.rb +20 -17
  8. data/lib/polyrun/cli/helpers.rb +16 -0
  9. data/lib/polyrun/cli/init_command.rb +8 -1
  10. data/lib/polyrun/cli/partition_diagnostics.rb +22 -0
  11. data/lib/polyrun/cli/plan_command.rb +47 -18
  12. data/lib/polyrun/cli/queue_command.rb +25 -2
  13. data/lib/polyrun/cli/run_queue_command.rb +145 -0
  14. data/lib/polyrun/cli/run_shards_command.rb +6 -1
  15. data/lib/polyrun/cli/run_shards_parallel_children.rb +2 -1
  16. data/lib/polyrun/cli/run_shards_parallel_wait.rb +5 -1
  17. data/lib/polyrun/cli/run_shards_plan_boot_phases.rb +47 -2
  18. data/lib/polyrun/cli/run_shards_plan_options.rb +14 -4
  19. data/lib/polyrun/cli/run_shards_planning.rb +20 -12
  20. data/lib/polyrun/cli/run_shards_run.rb +22 -5
  21. data/lib/polyrun/cli/spec_quality_commands.rb +140 -0
  22. data/lib/polyrun/cli.rb +16 -2
  23. data/lib/polyrun/coverage/example_diff.rb +122 -0
  24. data/lib/polyrun/coverage/merge/formatters_html.rb +5 -5
  25. data/lib/polyrun/data/factory_counts.rb +14 -1
  26. data/lib/polyrun/database/clone_shards.rb +2 -0
  27. data/lib/polyrun/database/shard.rb +2 -1
  28. data/lib/polyrun/minitest.rb +9 -0
  29. data/lib/polyrun/partition/hrw.rb +40 -3
  30. data/lib/polyrun/partition/paths_build.rb +8 -3
  31. data/lib/polyrun/partition/plan.rb +88 -19
  32. data/lib/polyrun/partition/plan_lpt.rb +49 -7
  33. data/lib/polyrun/partition/plan_sharding.rb +8 -0
  34. data/lib/polyrun/partition/reports.rb +139 -0
  35. data/lib/polyrun/partition/timing_diagnostics.rb +139 -0
  36. data/lib/polyrun/partition/timing_keys.rb +2 -1
  37. data/lib/polyrun/queue/duration.rb +30 -0
  38. data/lib/polyrun/queue/file_store.rb +114 -3
  39. data/lib/polyrun/quick/example_runner.rb +2 -0
  40. data/lib/polyrun/quick/runner.rb +21 -0
  41. data/lib/polyrun/rspec.rb +10 -0
  42. data/lib/polyrun/spec_quality/config.rb +134 -0
  43. data/lib/polyrun/spec_quality/fragment.rb +39 -0
  44. data/lib/polyrun/spec_quality/merge.rb +78 -0
  45. data/lib/polyrun/spec_quality/minitest_hook.rb +42 -0
  46. data/lib/polyrun/spec_quality/plan_loader.rb +47 -0
  47. data/lib/polyrun/spec_quality/profile.rb +91 -0
  48. data/lib/polyrun/spec_quality/report.rb +261 -0
  49. data/lib/polyrun/spec_quality/rspec_hook.rb +55 -0
  50. data/lib/polyrun/spec_quality/sql_counter.rb +34 -0
  51. data/lib/polyrun/spec_quality.rb +205 -0
  52. data/lib/polyrun/templates/POLYRUN.md +6 -0
  53. data/lib/polyrun/templates/ci_matrix.polyrun.yml +4 -0
  54. data/lib/polyrun/templates/polyrun_hooks_spec_quality.rb +12 -0
  55. data/lib/polyrun/templates/polyrun_spec_quality.yml +20 -0
  56. data/lib/polyrun/templates/rails_prepare.polyrun.yml +5 -0
  57. data/lib/polyrun/timing/merge.rb +5 -5
  58. data/lib/polyrun/timing/rspec_example_formatter.rb +14 -7
  59. data/lib/polyrun/timing/stats.rb +76 -0
  60. data/lib/polyrun/timing/summary.rb +5 -2
  61. data/lib/polyrun/timing/variance_report.rb +51 -0
  62. data/lib/polyrun/version.rb +1 -1
  63. metadata +22 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d4fc5867eb97f45848d6da2b7ac8d0c3906de5cd0df849f02042aaaee1e9bbf
4
- data.tar.gz: a10e216d02b76c722627ab5d0c73e5fe061c118d14f356bdc620655a7e83454a
3
+ metadata.gz: 756cf8e1b8e2176c4097520752dfdd4b856d03b134a999462641f90a9f217e71
4
+ data.tar.gz: 284e536d886b4bfc913415b0e3301a31ca98ef3446da1b24163bcc6af485e642
5
5
  SHA512:
6
- metadata.gz: fd37e0e3c6f3afccb8da9dba32b0b836d6b0ddf25181b0bbc2b3d20be561d93518af54ffb929cb62511259a788ad080f9f7080252231d3554aee5ab056c3c841
7
- data.tar.gz: 92cb1e5ded19005ccca5975754525857ae5f1b9561947b9a6d1b736e62c098a61e377e3465dba1add9d265277f44672911be48e27097b9c3813ef188341f3a94
6
+ metadata.gz: 445f64e0e62be50ab6c63026f27ad8f5f093521bbeed4a05b0ce7adc6a13bcb0496f1489d8243b403f859826e599194c3399a761a9210cbbf5d8027f3fdba52e
7
+ data.tar.gz: 554a292c2c7ac40e3df11d886ca7a71c10b81f1fa144af00c5c1d20b950ff6d999306be1e3fa75a9982450b2d202bc2bee34d5201ab39dbb386b1b73b42f1403
data/CHANGELOG.md CHANGED
@@ -2,6 +2,40 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 2.1.2 (2026-07-05)
6
+
7
+ - Fix per-example timing JSON when RSpec supplies `absolute_file_path` metadata; fix formatter registration when `install_example_timing!` uses a custom output path.
8
+
9
+ ## 2.1.1 (2026-07-05)
10
+
11
+ - Fix HTML coverage report `Encoding::CompatibilityError` when source files contain UTF-8; read templates, assets, and source lines as UTF-8 in `formatters_html.rb`.
12
+
13
+ ## 2.1.0 (2026-07-03)
14
+
15
+ - Add experimental per-example spec quality (`Polyrun::SpecQuality`): `POLYRUN_SPEC_QUALITY=1`, worker JSONL fragments, `merge-spec-quality`, `report-spec-quality`, and `run-shards --merge-spec-quality`.
16
+ - Add `Polyrun::RSpec.install_spec_quality!`, `Polyrun::Minitest.install_spec_quality!`, and Quick example hooks; `polyrun init --profile spec-quality`.
17
+ - Add `Polyrun::Coverage::ExampleDiff` for per-example line hit deltas from `Coverage.peek_result`; record per-example factory counts via `Data::FactoryCounts`.
18
+ - Report zero-hit examples, line churn, hot lines, shard attribution, optional partition hints (`report-spec-quality --plan`), optional CPU/GC/IO profiling (stdlib), and CI gate thresholds in `config/polyrun_spec_quality.yml`.
19
+ - Document spec quality in `docs/SPEC_QUALITY.md`.
20
+
21
+ ## 2.0.0 (2026-07-03)
22
+
23
+ - Add `polyrun run-queue` to init a file-backed queue and run N workers that claim batches until drained; `--on-failure exit|requeue` reclaims open leases when a worker exits before ack.
24
+ - Add `polyrun queue reclaim` with `--older-than DURATION` (`10m`, `1h`, …) and optional `--worker ID`; add `Queue::FileStore#reclaim!` and `#reclaim_lease!`.
25
+ - Add partition imbalance and dominant-file reports after `plan` and `run-shards` (`Partition::Reports`); warn on stale or missing timing coverage before cost-based partition (`Partition::TimingDiagnostics`).
26
+ - Add partition strategies `lazy_robin` (sorted round-robin with timing diagnostics), `preserve_order_round_robin` (paths-file order), `stable_cost_binpack` (stable assignment with LPT fallback), and `weighted_hrw` (rendezvous with `shard_weights`).
27
+ - Add `partition.stable_assignment_file` and `partition.shard_weights` for stable binpack and weighted HRW; wire into `plan` and `run-shards`.
28
+ - Add `Polyrun::Timing::Stats` for rich timing entries (`last_seconds`, `mean`, `p95`, `runs`, `failures`, `timeouts`); `merge-timing` merges via `Stats.merge_entries` and emits `Timing::VarianceReport` warnings.
29
+ - Add `POLYRUN_HRW_FAST_SCORE` for faster deterministic HRW scoring.
30
+ - Auto-select `cost_binpack` when `partition.timing_file` or `--timing` is set and strategy is not explicit; load timing for `lazy_robin` and diagnostics even when strategy stays round-robin.
31
+ - Fix `paths_build` glob stages to match against the remaining pool (`File.fnmatch?`) instead of re-globbing the cwd.
32
+ - Warn when a database URL has no database segment (shard suffix skipped); name `shard_index` in `CloneShards` errors.
33
+ - Refactor LPT forced-item handling into a single pass before free-item balance.
34
+ - Add Makefile, Prayfile, and root `polyrun.yml`; run repo specs via polyrun in CI.
35
+ - BREAKING: `partition.timing_file` without an explicit `strategy` now implies `cost_binpack` instead of `round_robin`.
36
+ - BREAKING: `merge-timing` output entries are objects with timing stats, not bare scalar seconds; tools that read merged JSON must accept objects or use `Stats.binpack_weight`.
37
+ - BREAKING: `paths_build` glob stages filter the staged pool only; membership and order can differ from prior re-glob behavior.
38
+
5
39
  ## 1.5.0 (2026-05-04)
6
40
 
7
41
  - Add `run-shards --worker-timeout SEC` and `POLYRUN_WORKER_TIMEOUT_SEC` (wall time per worker since spawn); stop stuck workers; record exit 124 for that shard.
data/README.md CHANGED
@@ -23,7 +23,7 @@ Capybara and Playwright stay in your application; Polyrun does not replace brows
23
23
  2. Add a `polyrun.yml` beside the app, or pass `-c` to point at one. Configure `partition` (paths, shard index and total, strategy), and optionally `databases` (Postgres template and `shard_db_pattern`), `prepare`, and `coverage`. If you use `partition.paths_build`, Polyrun can write `partition.paths_file` (for example `spec/spec_paths.txt`) from globs and ordered stages—substring priorities for integration specs, or a regex stage for “Rails-heavy files first”—without a per-project Ruby script. That step runs before `plan` and `run-shards`. Use `bin/polyrun build-paths` to refresh the paths file only.
24
24
  3. Run prepare once before fan-out—for example `script/ci_prepare` for Vite or webpack builds, and `Polyrun::Prepare::Assets` digest markers. See `examples/TESTING_REQUIREMENTS.md`.
25
25
  4. Run workers with `bin/polyrun run-shards --workers N -- bundle exec rspec`: N separate OS processes, each running RSpec with its own file list from `partition.paths_file`, or `spec/spec_paths.txt`, or else `spec/**/*_spec.rb`. Stderr shows where paths came from; after a successful multi-worker run it reminds you to run merge-coverage unless you use `parallel-rspec` or `run-shards --merge-coverage`.
26
- 5. Merge artifacts with `bin/polyrun merge-coverage` on `coverage/polyrun-fragment-*.json` (one fragment per `POLYRUN_SHARD_INDEX` when coverage is on), or use `bin/polyrun parallel-rspec` or `run-shards --merge-coverage` so Polyrun runs merge for you. Optional: `merge-timing`, `report-timing`, `report-junit`.
26
+ 5. Merge artifacts with `bin/polyrun merge-coverage` on `coverage/polyrun-fragment-*.json` (one fragment per `POLYRUN_SHARD_INDEX` when coverage is on), or use `bin/polyrun parallel-rspec` or `run-shards --merge-coverage` so Polyrun runs merge for you. Optional: `merge-timing`, `report-timing`, `report-junit`, `merge-spec-quality`, `report-spec-quality` (experimental per-example spec quality; see [docs/SPEC_QUALITY.md](docs/SPEC_QUALITY.md)).
27
27
 
28
28
  ### Hooks (`hooks:` in `polyrun.yml`)
29
29
 
@@ -189,7 +189,7 @@ bin/polyrun quick spec/polyrun_quick/smoke.rb
189
189
 
190
190
  Shard index and total in CI (`Polyrun::Env::Ci`): when set, `POLYRUN_SHARD_INDEX` and `POLYRUN_SHARD_TOTAL` take precedence. When `CI` is truthy, `CI_NODE_INDEX` / `CI_NODE_TOTAL` and other parallel-job environment variables are read if present. If your runner does not export those, set `POLYRUN_SHARD_*` from the job matrix.
191
191
 
192
- File queue (`polyrun queue …`): batches live on disk under a lock file; paths move from `pending` to `leases` on claim and to `done` on ack. There is no lease TTL: if a worker dies after claiming, paths remain in `leases` until you recover them (manually or with a future reclaim command).
192
+ File queue (`polyrun queue …`): batches live on disk under a lock file; paths move from `pending` to `leases` on claim and to `done` on ack. Stale leases can be returned to `pending` with `polyrun queue reclaim --older-than 10m` (or `--worker ID`). Use `polyrun queue status --json` for lease ages. `polyrun run-queue` reclaims a worker's open lease when that worker exits before ack.
193
193
 
194
194
  ## Examples
195
195
 
@@ -45,6 +45,8 @@ Rule: anything expensive (compile, `yarn`, Playwright download) belongs in prepa
45
45
  | Plain glob | `partition.paths_build.all_glob: spec/**/*_spec.rb` and empty or minimal `stages` |
46
46
  | Ordered stages | `partition.paths_build.stages`: regex (e.g. slow integration first) or `sort_by_substring_order` for stable ordering |
47
47
 
48
+ `paths_build` controls **membership** in `partition.paths_file`, not shard assignment order. Default `round_robin` sorts paths alphabetically before mod assignment. Use `strategy: preserve_order_round_robin` to honor paths-file line order. Set `partition.timing_file` without `strategy` to auto-select `cost_binpack`; use `lazy_robin` for round-robin assignment with timing diagnostics.
49
+
48
50
  Refresh list: `polyrun -c polyrun.yml build-paths` (also runs automatically before `plan` / `run-shards` when configured).
49
51
 
50
52
  ## 6. Coverage and CI reports
@@ -92,7 +92,7 @@ module Polyrun
92
92
  def merge_coverage_after_shards(output:, format_list:, config_path:)
93
93
  files = merge_coverage_fragment_json_files
94
94
  if files.empty?
95
- Polyrun::Log.warn "polyrun run-shards: --merge-coverage: no coverage/polyrun-fragment-*.json found (enable Polyrun coverage in spec_helper?)"
95
+ Polyrun::Log.warn "polyrun run-shards: --merge-coverage: no coverage fragments found under coverage (enable coverage collection in your test setup)"
96
96
  return 0
97
97
  end
98
98
 
@@ -57,7 +57,7 @@ module Polyrun
57
57
  pattern = Polyrun::Reporting::FailureMerge.default_fragment_glob
58
58
  files = Dir.glob(pattern).sort
59
59
  if files.empty?
60
- Polyrun::Log.warn "polyrun run-shards: --merge-failures: no #{Polyrun::Reporting::FailureMerge::FRAGMENT_GLOB} under fragment dir (enable Polyrun::RSpec.install_failure_fragments! in spec_helper?)"
60
+ Polyrun::Log.warn "polyrun run-shards: --merge-failures: no failure fragments found under tmp/polyrun_failures (enable failure fragments in your test setup)"
61
61
  return nil
62
62
  end
63
63
 
@@ -13,20 +13,20 @@ module Polyrun
13
13
  -h, --help
14
14
 
15
15
  Trace timing (stderr): DEBUG=1 or POLYRUN_DEBUG=1
16
- Branch coverage in JSON fragments: POLYRUN_COVERAGE_BRANCHES=1 (stdlib Coverage; merge-coverage merges branches)
17
- polyrun quick coverage: POLYRUN_COVERAGE=1 or (config/polyrun_coverage.yml + POLYRUN_QUICK_COVERAGE=1); POLYRUN_COVERAGE_DISABLE=1 skips
18
- Merge wall time (stderr): POLYRUN_PROFILE_MERGE=1 (or verbose / DEBUG)
16
+ Coverage: POLYRUN_COVERAGE=1 (or config/polyrun_coverage.yml + POLYRUN_QUICK_COVERAGE=1); POLYRUN_COVERAGE_DISABLE=1 skips; POLYRUN_COVERAGE_BRANCHES=1 for branch data in fragments
17
+ Merge profiling (stderr): POLYRUN_PROFILE_MERGE=1 (or verbose / DEBUG)
19
18
  Post-merge formats (run-shards): POLYRUN_MERGE_FORMATS (default: json,lcov,cobertura,console,html)
20
- Skip optional script/build_spec_paths.rb before start: POLYRUN_SKIP_BUILD_SPEC_PATHS=1
21
- Skip start auto-prepare / auto DB provision: POLYRUN_START_SKIP_PREPARE=1, POLYRUN_START_SKIP_DATABASES=1
22
- Skip writing paths_file from partition.paths_build: POLYRUN_SKIP_PATHS_BUILD=1
23
- Warn if merge-coverage wall time exceeds N seconds (default 10): POLYRUN_MERGE_SLOW_WARN_SECONDS (0 disables)
24
- Failure fragments (run-shards --merge-failures): POLYRUN_MERGE_FAILURES=1; parent sets POLYRUN_FAILURE_FRAGMENTS=1 in workers; POLYRUN_FAILURE_FRAGMENT_DIR, POLYRUN_MERGED_FAILURES_OUT, POLYRUN_MERGED_FAILURES_FORMAT; after_suite sets POLYRUN_MERGED_FAILURES_PATH when merge ran
25
- Parallel RSpec workers: POLYRUN_WORKERS default 5, max 10 (run-shards / parallel-rspec / start); distinct from POLYRUN_SHARD_PROCESSES / ci-shard --shard-processes (local processes per CI matrix job)
26
- Per-worker wall timeout: run-shards --worker-timeout SEC or POLYRUN_WORKER_TIMEOUT_SEC (max time since each worker spawn). Parent polls all live workers together. Exit 124; remaining workers stopped.
27
- Per-worker idle timeout: --worker-idle-timeout SEC or POLYRUN_WORKER_IDLE_TIMEOUT_SEC counts only after a successful ping timestamp (positive float in POLYRUN_WORKER_PING_FILE); empty or unreadable pings do not satisfy idle enforcement—use wall timeout until the first ping. RSpec/Minitest/Quick installers call Polyrun::WorkerPing.ping! per example/suite. Ping files live under tmp/polyrun/ (gitignored via tmp/); parent unlinks each after its worker exits. Exit 125. Optional outer cap: --worker-timeout (exit 124). Optional periodic pings: POLYRUN_WORKER_PING_THREAD=1 (POLYRUN_WORKER_PING_INTERVAL_SEC); WorkerPing.ensure_interval_ping_thread! (installers invoke it—call yourself if wiring workers without install_worker_ping!).
28
- If Polyrun::Log.stderr is null or redirected away, set POLYRUN_ORCHESTRATION_STDERR=1 to also print timeout/SIGINT summary lines to process stderr.
29
- Partition timing granularity (default file): POLYRUN_TIMING_GRANULARITY=file|example (experimental per-example; see partition.timing_granularity)
19
+ Start skips: POLYRUN_SKIP_BUILD_SPEC_PATHS=1, POLYRUN_START_SKIP_PREPARE=1, POLYRUN_START_SKIP_DATABASES=1
20
+ Paths build skip: POLYRUN_SKIP_PATHS_BUILD=1
21
+ Slow merge warning (seconds, default 10; 0 disables): POLYRUN_MERGE_SLOW_WARN_SECONDS
22
+ Failure merge: run-shards --merge-failures (enable failure fragments in test setup); POLYRUN_MERGE_FAILURES=1, POLYRUN_FAILURE_FRAGMENT_DIR, POLYRUN_MERGED_FAILURES_OUT
23
+ Parallel workers: POLYRUN_WORKERS default 5, max 10 (run-shards / parallel-rspec / start). CI local processes per job: POLYRUN_SHARD_PROCESSES or ci-shard --shard-processes (not POLYRUN_WORKERS)
24
+ Per-worker wall timeout: --worker-timeout SEC or POLYRUN_WORKER_TIMEOUT_SEC. Exit 124; parent stops remaining workers.
25
+ Per-worker idle timeout: --worker-idle-timeout SEC or POLYRUN_WORKER_IDLE_TIMEOUT_SEC after a progress ping (POLYRUN_WORKER_PING_FILE). Enable pings in test setup. Exit 125. Optional periodic pings: POLYRUN_WORKER_PING_THREAD=1 (POLYRUN_WORKER_PING_INTERVAL_SEC).
26
+ Orchestration warnings on process stderr: POLYRUN_ORCHESTRATION_STDERR=1
27
+ Spec quality (opt-in): POLYRUN_SPEC_QUALITY=1; run-shards --merge-spec-quality; merge-spec-quality / report-spec-quality
28
+ Partition timing granularity (default file): POLYRUN_TIMING_GRANULARITY=file|example (experimental; see partition.timing_granularity)
29
+ Partition strategies: round_robin (default, sorted), preserve_order_round_robin (paths-file order), lazy_robin (sorted RR + timing diagnostics), cost_binpack (LPT), hrw. partition.timing_file without strategy implies cost_binpack.
30
30
 
31
31
  commands:
32
32
  version print version
@@ -34,21 +34,24 @@ module Polyrun
34
34
  prepare run prepare recipe: default | assets (optional prepare.command overrides bin/rails assets:precompile) | shell (prepare.command required)
35
35
  merge-coverage merge SimpleCov JSON fragments (json/lcov/cobertura/console)
36
36
  merge-failures merge per-shard failure JSONL fragments or RSpec JSON files (jsonl/json)
37
- run-shards fan out N parallel OS processes (POLYRUN_SHARD_*; not Ruby threads); optional --merge-coverage / --merge-failures
37
+ run-shards fan out N parallel OS processes (POLYRUN_SHARD_*; not Ruby threads); optional --merge-coverage / --merge-failures / --merge-spec-quality
38
38
  parallel-rspec run-shards + merge-coverage (defaults to: bundle exec rspec after --)
39
39
  start parallel-rspec; auto-runs prepare (shell/assets) and db:setup-* when polyrun.yml configures them; legacy script/build_spec_paths.rb if paths_build absent
40
40
  ci-shard-run CI matrix: build-paths + plan for POLYRUN_SHARD_INDEX / POLYRUN_SHARD_TOTAL (or config), then run your command with that shard's paths after --; optional --shard-processes M or --workers M (POLYRUN_SHARD_PROCESSES; not POLYRUN_WORKERS) for N×M jobs × processes on this host
41
41
  ci-shard-rspec same as ci-shard-run -- bundle exec rspec; optional --shard-processes / --workers / -- [rspec-only flags]
42
42
  build-paths write partition.paths_file from partition.paths_build (same as auto step before plan/run-shards)
43
43
  init write a starter polyrun.yml or POLYRUN.md from built-in templates (see docs/SETUP_PROFILE.md)
44
- queue file-backed batch queue: init (optional --shard/--total etc. as plan, then claim/ack); M workers share one dir; no duplicate paths across claims
45
- quick run Polyrun::Quick (describe/it, before/after, let, expect…to, assert_*; optional capybara!)
44
+ queue file-backed batch queue: init (optional --shard/--total etc. as plan, then claim/ack/reclaim/status --json)
45
+ run-queue init queue and run N workers that claim batches until drained
46
+ quick quick test runner (describe/it, before/after, let, expect…to, assert_*; optional capybara!)
46
47
  hook run <phase> run one shell hook from polyrun.yml hooks: (e.g. before_suite); optional --shard/--total
47
48
  report-coverage write all coverage formats from one JSON file
48
49
  report-junit RSpec JSON or Polyrun testcase JSON → JUnit XML (CI)
49
50
  report-timing print slow-file summary from merged timing JSON
50
51
  merge-timing merge polyrun_timing_*.json shards
51
- config print effective config by dotted path (see Polyrun::Config::Effective; same tree as YAML plus merged prepare.env, resolved partition shard fields, workers)
52
+ merge-spec-quality merge polyrun-spec-quality-fragment-*.jsonl shards
53
+ report-spec-quality spec quality report from merged JSON (zero-hit, hot lines, churn)
54
+ config print effective config by dotted path (loaded YAML plus merged prepare.env, resolved partition shard fields, workers)
52
55
  env print shard + database env (see polyrun.yml databases)
53
56
  db:setup-template migrate template DB (PostgreSQL)
54
57
  db:setup-shard CREATE DATABASE shard FROM template (one POLYRUN_SHARD_INDEX)
@@ -119,6 +119,22 @@ module Polyrun
119
119
  def resolve_partition_timing_granularity(pc, cli_val)
120
120
  Polyrun::Config::Resolver.resolve_partition_timing_granularity(pc, cli_val)
121
121
  end
122
+
123
+ def load_stable_assignment(pc)
124
+ path = pc["stable_assignment_file"] || pc[:stable_assignment_file]
125
+ return nil unless path
126
+
127
+ abs = File.expand_path(path.to_s, Dir.pwd)
128
+ return nil unless File.file?(abs)
129
+
130
+ data = JSON.parse(File.read(abs))
131
+ return data if data.is_a?(Hash)
132
+
133
+ nil
134
+ rescue JSON::ParserError
135
+ Polyrun::Log.warn "polyrun: invalid stable_assignment_file JSON: #{abs}"
136
+ nil
137
+ end
122
138
  end
123
139
  end
124
140
  end
@@ -1,4 +1,5 @@
1
1
  require "optparse"
2
+ require "fileutils"
2
3
 
3
4
  module Polyrun
4
5
  class CLI
@@ -7,6 +8,7 @@ module Polyrun
7
8
  "gem" => "minimal_gem.polyrun.yml",
8
9
  "rails" => "rails_prepare.polyrun.yml",
9
10
  "ci-matrix" => "ci_matrix.polyrun.yml",
11
+ "spec-quality" => "polyrun_spec_quality.yml",
10
12
  "doc" => "POLYRUN.md"
11
13
  }.freeze
12
14
 
@@ -34,6 +36,7 @@ module Polyrun
34
36
  path = File.expand_path(dest)
35
37
  return init_refuses_overwrite(path) if File.file?(path) && !force
36
38
 
39
+ FileUtils.mkdir_p(File.dirname(path))
37
40
  File.write(path, body)
38
41
  Polyrun::Log.warn "polyrun init: wrote #{path}"
39
42
  0
@@ -92,7 +95,11 @@ module Polyrun
92
95
  end
93
96
 
94
97
  def default_init_output(profile)
95
- (profile == "doc") ? "POLYRUN.md" : "polyrun.yml"
98
+ case profile
99
+ when "doc" then "POLYRUN.md"
100
+ when "spec-quality" then "config/polyrun_spec_quality.yml"
101
+ else "polyrun.yml"
102
+ end
96
103
  end
97
104
  end
98
105
  end
@@ -0,0 +1,22 @@
1
+ module Polyrun
2
+ class CLI
3
+ # Shared stderr diagnostics after {Partition::Plan} is built.
4
+ module PartitionDiagnostics
5
+ private
6
+
7
+ def partition_emit_diagnostics!(plan:, items:, costs:, timing_path:, granularity: :file)
8
+ return unless timing_path && costs && !costs.empty?
9
+
10
+ analysis = Polyrun::Partition::TimingDiagnostics.analyze(
11
+ items: items,
12
+ costs: costs,
13
+ timing_path: timing_path,
14
+ root: plan.root,
15
+ granularity: granularity
16
+ )
17
+ Polyrun::Partition::TimingDiagnostics.emit_warnings!(analysis)
18
+ Polyrun::Partition::Reports.emit_all!(plan)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Polyrun/FileLength -- plan argv + partition emit
1
2
  require "json"
2
3
  require "optparse"
3
4
 
@@ -38,8 +39,9 @@ module Polyrun
38
39
 
39
40
  items, costs, strategy = bundle
40
41
  constraints = load_partition_constraints(pc, ctx[:constraints_path])
42
+ stable = load_stable_assignment(pc)
41
43
 
42
- manifest = plan_command_build_manifest(
44
+ manifest, plan = plan_command_build_manifest(
43
45
  items: items,
44
46
  total: ctx[:total],
45
47
  strategy: strategy,
@@ -47,7 +49,16 @@ module Polyrun
47
49
  costs: costs,
48
50
  constraints: constraints,
49
51
  shard: ctx[:shard],
50
- timing_granularity: ctx[:timing_granularity]
52
+ timing_granularity: ctx[:timing_granularity],
53
+ stable_assignment: stable,
54
+ shard_weights: pc["shard_weights"] || pc[:shard_weights]
55
+ )
56
+ partition_emit_diagnostics!(
57
+ plan: plan,
58
+ items: items,
59
+ costs: costs,
60
+ timing_path: timing_path,
61
+ granularity: ctx[:timing_granularity]
51
62
  )
52
63
  [manifest, 0]
53
64
  end
@@ -56,7 +67,12 @@ module Polyrun
56
67
  items = plan_plan_items(paths_file, argv)
57
68
  return nil if items.nil?
58
69
 
59
- loaded = plan_load_costs_and_strategy(timing_path, ctx[:strategy], ctx[:timing_granularity])
70
+ loaded = plan_load_costs_and_strategy(
71
+ timing_path,
72
+ ctx[:strategy],
73
+ ctx[:timing_granularity],
74
+ strategy_explicit: ctx[:strategy_explicit]
75
+ )
60
76
  return nil if loaded.nil?
61
77
 
62
78
  costs, strategy = loaded
@@ -68,6 +84,7 @@ module Polyrun
68
84
  shard: resolve_shard_index(pc),
69
85
  total: resolve_shard_total(pc),
70
86
  strategy: (pc["strategy"] || pc[:strategy] || "round_robin").to_s,
87
+ strategy_explicit: !!(pc["strategy"] || pc[:strategy]),
71
88
  seed: pc["seed"] || pc[:seed],
72
89
  paths_file: nil,
73
90
  timing_path: nil,
@@ -80,10 +97,13 @@ module Polyrun
80
97
  def plan_command_register_partition_options!(opts, ctx)
81
98
  opts.on("--shard INDEX", Integer) { |v| ctx[:shard] = v }
82
99
  opts.on("--total N", Integer) { |v| ctx[:total] = v }
83
- opts.on("--strategy NAME", String) { |v| ctx[:strategy] = v }
100
+ opts.on("--strategy NAME", String) do |v|
101
+ ctx[:strategy] = v
102
+ ctx[:strategy_explicit] = true
103
+ end
84
104
  opts.on("--seed VAL") { |v| ctx[:seed] = v }
85
105
  opts.on("--constraints PATH", "YAML: pin / serial_glob (see spec_queue.md)") { |v| ctx[:constraints_path] = v }
86
- opts.on("--timing PATH", "path => seconds JSON; implies cost_binpack unless strategy is cost-based or hrw") do |v|
106
+ opts.on("--timing PATH", "path => seconds JSON; implies cost_binpack unless strategy is explicit or timing-aware") do |v|
87
107
  ctx[:timing_path] = v
88
108
  end
89
109
  opts.on("--timing-granularity VAL", "file (default) or example (experimental: path:line items)") do |v|
@@ -104,7 +124,7 @@ module Polyrun
104
124
  end.parse!(argv)
105
125
  end
106
126
 
107
- def plan_command_build_manifest(items:, total:, strategy:, seed:, costs:, constraints:, shard:, timing_granularity: :file)
127
+ def plan_command_build_manifest(items:, total:, strategy:, seed:, costs:, constraints:, shard:, timing_granularity: :file, stable_assignment: nil, shard_weights: nil)
108
128
  plan = Polyrun::Debug.time("Partition::Plan.new (plan command)") do
109
129
  Polyrun::Partition::Plan.new(
110
130
  items: items,
@@ -114,7 +134,9 @@ module Polyrun
114
134
  costs: costs,
115
135
  constraints: constraints,
116
136
  root: Dir.pwd,
117
- timing_granularity: timing_granularity
137
+ timing_granularity: timing_granularity,
138
+ stable_assignment: stable_assignment,
139
+ shard_weights: shard_weights
118
140
  )
119
141
  end
120
142
  Polyrun::Debug.log_kv(
@@ -124,16 +146,13 @@ module Polyrun
124
146
  strategy: strategy,
125
147
  path_count: items.size
126
148
  )
127
- plan.manifest(shard)
149
+ [plan.manifest(shard), plan]
128
150
  end
129
151
 
130
- def plan_resolve_timing_path(pc, timing_path, strategy)
152
+ def plan_resolve_timing_path(pc, timing_path, _strategy = nil)
131
153
  return timing_path if timing_path
132
154
 
133
- tf = pc["timing_file"] || pc[:timing_file]
134
- return tf if tf && (Polyrun::Partition::Plan.cost_strategy?(strategy) || Polyrun::Partition::Plan.hrw_strategy?(strategy))
135
-
136
- nil
155
+ pc["timing_file"] || pc[:timing_file]
137
156
  end
138
157
 
139
158
  def plan_plan_items(paths_file, argv)
@@ -147,8 +166,13 @@ module Polyrun
147
166
  end
148
167
  end
149
168
 
150
- def plan_load_costs_and_strategy(timing_path, strategy, timing_granularity)
169
+ def plan_load_costs_and_strategy(timing_path, strategy, timing_granularity, strategy_explicit: false)
170
+ strategy = strategy.to_s
151
171
  if timing_path
172
+ if strategy_explicit && strategy == "round_robin"
173
+ return [nil, strategy]
174
+ end
175
+
152
176
  costs = Polyrun::Partition::Plan.load_timing_costs(
153
177
  File.expand_path(timing_path.to_s, Dir.pwd),
154
178
  granularity: timing_granularity
@@ -157,12 +181,16 @@ module Polyrun
157
181
  Polyrun::Log.warn "polyrun plan: timing file missing or has no entries: #{timing_path}"
158
182
  return nil
159
183
  end
160
- unless Polyrun::Partition::Plan.cost_strategy?(strategy) || Polyrun::Partition::Plan.hrw_strategy?(strategy)
184
+ if Polyrun::Partition::Plan.timing_load_strategy?(strategy)
185
+ return [costs, strategy]
186
+ end
187
+ unless strategy_explicit
161
188
  Polyrun::Log.warn "polyrun plan: using cost_binpack (timing data present)" if @verbose
162
- strategy = "cost_binpack"
189
+ return [costs, "cost_binpack"]
163
190
  end
164
- [costs, strategy]
165
- elsif Polyrun::Partition::Plan.cost_strategy?(strategy)
191
+
192
+ [nil, strategy]
193
+ elsif Polyrun::Partition::Plan.cost_strategy?(strategy) || Polyrun::Partition::Plan.lazy_robin_strategy?(strategy)
166
194
  Polyrun::Log.warn "polyrun plan: --timing or partition.timing_file required for strategy #{strategy}"
167
195
  nil
168
196
  else
@@ -172,3 +200,4 @@ module Polyrun
172
200
  end
173
201
  end
174
202
  end
203
+ # rubocop:enable Polyrun/FileLength
@@ -34,8 +34,10 @@ module Polyrun
34
34
  queue_cmd_ack(argv, dir, lease_id, worker)
35
35
  when "status"
36
36
  queue_cmd_status(argv, dir)
37
+ when "reclaim"
38
+ queue_cmd_reclaim(argv, dir)
37
39
  else
38
- Polyrun::Log.warn "usage: polyrun queue {init|claim|ack|status} [options]"
40
+ Polyrun::Log.warn "usage: polyrun queue {init|claim|ack|status|reclaim} [options]"
39
41
  2
40
42
  end
41
43
  end
@@ -126,13 +128,34 @@ module Polyrun
126
128
  end
127
129
 
128
130
  def queue_cmd_status(argv, dir)
131
+ json_detail = false
129
132
  OptionParser.new do |opts|
133
+ opts.banner = "usage: polyrun queue status [--dir DIR] [--json]"
130
134
  opts.on("--dir PATH") { |v| dir = v }
135
+ opts.on("--json", "Include lease details") { json_detail = true }
131
136
  end.parse!(argv)
132
- s = Polyrun::Queue::FileStore.new(dir).status
137
+ s = Polyrun::Queue::FileStore.new(dir).status(detailed: json_detail)
133
138
  Polyrun::Log.puts JSON.generate(s)
134
139
  0
135
140
  end
141
+
142
+ def queue_cmd_reclaim(argv, dir)
143
+ older_than = nil
144
+ worker = nil
145
+ OptionParser.new do |opts|
146
+ opts.banner = "usage: polyrun queue reclaim [--dir DIR] [--older-than DURATION] [--worker ID]"
147
+ opts.on("--dir PATH") { |v| dir = v }
148
+ opts.on("--older-than DUR", "e.g. 10m, 1h, 600s") { |v| older_than = Polyrun::Queue::Duration.parse_seconds(v) }
149
+ opts.on("--worker ID") { |v| worker = v }
150
+ end.parse!(argv)
151
+ unless older_than || worker
152
+ Polyrun::Log.warn "queue reclaim: need --older-than or --worker"
153
+ return 2
154
+ end
155
+ n = Polyrun::Queue::FileStore.new(dir).reclaim!(older_than: older_than, worker_id: worker)
156
+ Polyrun::Log.puts JSON.generate({"reclaimed_paths" => n})
157
+ 0
158
+ end
136
159
  end
137
160
  end
138
161
  end
@@ -0,0 +1,145 @@
1
+ require "json"
2
+ require "optparse"
3
+ require "shellwords"
4
+
5
+ require_relative "../queue/duration"
6
+
7
+ module Polyrun
8
+ class CLI
9
+ module RunQueueCommand
10
+ private
11
+
12
+ # rubocop:disable Metrics/AbcSize -- queue worker argv + spawn loop
13
+ def cmd_run_queue(argv, config_path)
14
+ dir = ".polyrun-queue"
15
+ batch = 5
16
+ on_failure = "exit"
17
+ paths_file = nil
18
+ workers = nil
19
+
20
+ sep = argv.index("--")
21
+ unless sep
22
+ Polyrun::Log.warn "polyrun run-queue: need -- before the command"
23
+ return 2
24
+ end
25
+
26
+ head = argv[0...sep]
27
+ cmd = argv[(sep + 1)..].map(&:to_s)
28
+ return 2 if cmd.empty?
29
+
30
+ cfg = Polyrun::Config.load(path: config_path || ENV["POLYRUN_CONFIG"])
31
+ pc = cfg.partition
32
+ ctx = plan_command_initial_context(pc)
33
+
34
+ OptionParser.new do |opts|
35
+ opts.banner = "usage: polyrun run-queue [--workers N] [--batch N] [--dir DIR] [--on-failure exit|requeue] [partition options] -- <command>"
36
+ opts.on("--workers N", Integer) { |v| workers = v }
37
+ opts.on("--batch N", Integer) { |v| batch = v }
38
+ opts.on("--dir PATH") { |v| dir = v }
39
+ opts.on("--paths-file PATH", String) { |v| paths_file = v }
40
+ opts.on("--on-failure MODE", "exit (default) or requeue") { |v| on_failure = v }
41
+ plan_command_register_partition_options!(opts, ctx)
42
+ end.parse!(head)
43
+
44
+ workers ||= env_int("POLYRUN_WORKERS", Polyrun::Config::DEFAULT_PARALLEL_WORKERS)
45
+ paths_file ||= pc["paths_file"] || pc[:paths_file]
46
+ unless paths_file
47
+ Polyrun::Log.warn "polyrun run-queue: need --paths-file or partition.paths_file"
48
+ return 2
49
+ end
50
+
51
+ code = Polyrun::Partition::PathsBuild.apply!(partition: pc, cwd: Dir.pwd)
52
+ return code if code != 0
53
+
54
+ store = Polyrun::Queue::FileStore.new(dir)
55
+ if File.file?(File.join(File.expand_path(dir), "queue.json"))
56
+ Polyrun::Log.warn "polyrun run-queue: queue already exists at #{dir}; remove it or use --dir"
57
+ return 2
58
+ end
59
+
60
+ ordered, code = queue_partition_manifest_and_ordered_paths(cfg, pc, ctx, paths_file)
61
+ return code if code != 0
62
+
63
+ store.init!(ordered)
64
+ Polyrun::Log.warn "polyrun run-queue: #{ordered.size} path(s), #{workers} worker(s), batch=#{batch}" if @verbose
65
+
66
+ run_t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
67
+ pids = run_queue_spawn_workers(store: store, workers: workers, batch: batch, cmd: cmd, on_failure: on_failure)
68
+ results = run_queue_wait_workers(pids, store: store, on_failure: on_failure)
69
+ wall = Process.clock_gettime(Process::CLOCK_MONOTONIC) - run_t0
70
+
71
+ stat = store.status(detailed: true)
72
+ Polyrun::Log.warn format(
73
+ "polyrun run-queue: done pending=%d done=%d leases=%d wall=%.1fs batches_ok=%d batches_fail=%d",
74
+ stat["pending"], stat["done"], stat["leases"], wall, results[:ok], results[:fail]
75
+ )
76
+
77
+ results[:fail].positive? ? 1 : 0
78
+ end
79
+ # rubocop:enable Metrics/AbcSize
80
+
81
+ def run_queue_spawn_workers(store:, workers:, batch:, cmd:, on_failure:)
82
+ pids = []
83
+ workers.times do |i|
84
+ wid = "worker-#{i}"
85
+ pid = Process.fork do
86
+ run_queue_worker_loop(store: store, worker_id: wid, batch: batch, cmd: cmd, on_failure: on_failure)
87
+ end
88
+ pids << {pid: pid, worker_id: wid}
89
+ end
90
+ pids
91
+ end
92
+
93
+ def run_queue_worker_loop(store:, worker_id:, batch:, cmd:, on_failure:)
94
+ batches_ok = 0
95
+ batches_fail = 0
96
+ loop do
97
+ claim = store.claim!(worker_id: worker_id, batch_size: batch)
98
+ paths = claim["paths"] || []
99
+ break if paths.empty?
100
+
101
+ code = run_queue_run_batch(cmd, paths)
102
+ if code == 0
103
+ store.ack!(lease_id: claim["lease_id"], worker_id: worker_id)
104
+ batches_ok += 1
105
+ elsif on_failure.to_s == "requeue"
106
+ store.reclaim_lease!(claim["lease_id"])
107
+ batches_fail += 1
108
+ exit 1
109
+ else
110
+ batches_fail += 1
111
+ exit code.zero? ? 1 : code
112
+ end
113
+ end
114
+ exit 0
115
+ rescue Polyrun::Error => e
116
+ Polyrun::Log.warn "polyrun run-queue worker #{worker_id}: #{e.message}"
117
+ exit 2
118
+ end
119
+
120
+ def run_queue_run_batch(cmd, paths)
121
+ system(*cmd, *paths) ? 0 : ($?.exitstatus || 1)
122
+ end
123
+
124
+ def run_queue_wait_workers(pids, store:, on_failure:)
125
+ ok = 0
126
+ fail = 0
127
+ pid_to_worker = pids.each_with_object({}) { |entry, h| h[entry[:pid]] = entry[:worker_id] }
128
+ while pid_to_worker.any?
129
+ pid, st = Process.wait2(-1)
130
+ worker_id = pid_to_worker.delete(pid)
131
+ next unless worker_id
132
+
133
+ if st.success?
134
+ ok += 1
135
+ else
136
+ fail += 1
137
+ reclaimed = store.reclaim!(worker_id: worker_id)
138
+ Polyrun::Log.warn "polyrun run-queue: reclaimed #{reclaimed} path(s) from #{worker_id}" if reclaimed.positive?
139
+ end
140
+ end
141
+ {ok: ok, fail: fail}
142
+ end
143
+ end
144
+ end
145
+ end
@@ -3,6 +3,7 @@ require "rbconfig"
3
3
 
4
4
  require_relative "start_bootstrap"
5
5
  require_relative "failure_commands"
6
+ require_relative "spec_quality_commands"
6
7
  require_relative "run_shards_run"
7
8
 
8
9
  module Polyrun
@@ -10,6 +11,7 @@ module Polyrun
10
11
  module RunShardsCommand
11
12
  include StartBootstrap
12
13
  include FailureCommands
14
+ include SpecQualityCommands
13
15
  include RunShardsRun
14
16
 
15
17
  private
@@ -116,11 +118,13 @@ module Polyrun
116
118
  # ENV for a worker process: POLYRUN_SHARD_* plus per-shard database URLs from polyrun.yml or DATABASE_URL.
117
119
  # When +matrix_total+ > 1 with multiple local workers, sets +POLYRUN_SHARD_MATRIX_INDEX+ / +POLYRUN_SHARD_MATRIX_TOTAL+
118
120
  # so {Coverage::Collector} can name fragments uniquely across CI matrix jobs (NxM sharding).
119
- def shard_child_env(cfg:, workers:, shard:, matrix_index: nil, matrix_total: nil, failure_fragments: false)
121
+ # rubocop:disable Metrics/AbcSize -- shard ENV: matrix, DB URLs, fragment flags
122
+ def shard_child_env(cfg:, workers:, shard:, matrix_index: nil, matrix_total: nil, failure_fragments: false, spec_quality_fragments: false)
120
123
  child_env = ENV.to_h.merge(
121
124
  Polyrun::Database::Shard.env_map(shard_index: shard, shard_total: workers)
122
125
  )
123
126
  child_env["POLYRUN_FAILURE_FRAGMENTS"] = "1" if failure_fragments
127
+ child_env["POLYRUN_SPEC_QUALITY_FRAGMENTS"] = "1" if spec_quality_fragments
124
128
  mt = matrix_total.nil? ? 0 : Integer(matrix_total)
125
129
  if mt > 1
126
130
  if matrix_index.nil?
@@ -138,6 +142,7 @@ module Polyrun
138
142
  end
139
143
  child_env
140
144
  end
145
+ # rubocop:enable Metrics/AbcSize
141
146
 
142
147
  def cmd_build_paths(config_path)
143
148
  cfg = Polyrun::Config.load(path: config_path || ENV["POLYRUN_CONFIG"])