polyrun 1.4.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/README.md +2 -2
- data/docs/SETUP_PROFILE.md +2 -0
- data/lib/polyrun/cli/ci_shard_hooks.rb +12 -4
- data/lib/polyrun/cli/ci_shard_run_command.rb +3 -1
- data/lib/polyrun/cli/help.rb +10 -2
- data/lib/polyrun/cli/helpers.rb +38 -0
- data/lib/polyrun/cli/init_command.rb +8 -1
- data/lib/polyrun/cli/partition_diagnostics.rb +22 -0
- data/lib/polyrun/cli/plan_command.rb +47 -18
- data/lib/polyrun/cli/queue_command.rb +25 -2
- data/lib/polyrun/cli/run_queue_command.rb +145 -0
- data/lib/polyrun/cli/run_shards_command.rb +6 -1
- data/lib/polyrun/cli/run_shards_parallel_children.rb +28 -35
- data/lib/polyrun/cli/run_shards_parallel_wait.rb +267 -0
- data/lib/polyrun/cli/run_shards_plan_boot_phases.rb +81 -3
- data/lib/polyrun/cli/run_shards_plan_options.rb +17 -3
- data/lib/polyrun/cli/run_shards_planning.rb +20 -12
- data/lib/polyrun/cli/run_shards_run.rb +28 -37
- data/lib/polyrun/cli/run_shards_worker_interrupt.rb +75 -0
- data/lib/polyrun/cli/spec_quality_commands.rb +140 -0
- data/lib/polyrun/cli.rb +16 -2
- data/lib/polyrun/coverage/example_diff.rb +122 -0
- data/lib/polyrun/coverage/merge/formatters_html.rb +4 -0
- data/lib/polyrun/data/factory_counts.rb +14 -1
- data/lib/polyrun/database/clone_shards.rb +2 -0
- data/lib/polyrun/database/shard.rb +2 -1
- data/lib/polyrun/hooks.rb +9 -1
- data/lib/polyrun/log.rb +16 -0
- data/lib/polyrun/minitest.rb +43 -0
- data/lib/polyrun/partition/hrw.rb +40 -3
- data/lib/polyrun/partition/paths_build.rb +8 -3
- data/lib/polyrun/partition/plan.rb +88 -19
- data/lib/polyrun/partition/plan_lpt.rb +49 -7
- data/lib/polyrun/partition/plan_sharding.rb +8 -0
- data/lib/polyrun/partition/reports.rb +139 -0
- data/lib/polyrun/partition/timing_diagnostics.rb +139 -0
- data/lib/polyrun/partition/timing_keys.rb +2 -1
- data/lib/polyrun/queue/duration.rb +30 -0
- data/lib/polyrun/queue/file_store.rb +107 -3
- data/lib/polyrun/quick/example_runner.rb +13 -0
- data/lib/polyrun/quick/runner.rb +21 -0
- data/lib/polyrun/rspec.rb +26 -0
- data/lib/polyrun/spec_quality/config.rb +134 -0
- data/lib/polyrun/spec_quality/fragment.rb +39 -0
- data/lib/polyrun/spec_quality/merge.rb +78 -0
- data/lib/polyrun/spec_quality/minitest_hook.rb +42 -0
- data/lib/polyrun/spec_quality/plan_loader.rb +47 -0
- data/lib/polyrun/spec_quality/profile.rb +91 -0
- data/lib/polyrun/spec_quality/report.rb +261 -0
- data/lib/polyrun/spec_quality/rspec_hook.rb +55 -0
- data/lib/polyrun/spec_quality/sql_counter.rb +34 -0
- data/lib/polyrun/spec_quality.rb +205 -0
- data/lib/polyrun/templates/POLYRUN.md +6 -0
- data/lib/polyrun/templates/ci_matrix.polyrun.yml +4 -0
- data/lib/polyrun/templates/polyrun_hooks_spec_quality.rb +12 -0
- data/lib/polyrun/templates/polyrun_spec_quality.yml +20 -0
- data/lib/polyrun/templates/rails_prepare.polyrun.yml +5 -0
- data/lib/polyrun/timing/merge.rb +5 -5
- data/lib/polyrun/timing/stats.rb +76 -0
- data/lib/polyrun/timing/summary.rb +5 -2
- data/lib/polyrun/timing/variance_report.rb +51 -0
- data/lib/polyrun/version.rb +1 -1
- data/lib/polyrun/worker_ping.rb +74 -0
- data/sig/polyrun/minitest.rbs +2 -0
- data/sig/polyrun/rspec.rbs +4 -0
- data/sig/polyrun/worker_ping.rbs +10 -0
- metadata +26 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4ab4d8c8553db3e8e62e83967c9090fc7e788c994d450febb0dc64d39d946f3b
|
|
4
|
+
data.tar.gz: 3edbc34c324880653c8fd6d7675a31193adfebb0f0a4439d33f74e4ab9d1c327
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1e372f737bf9d8cf76d39979f87dec6acd33e325ef1b5355acdbdb588d9542bc94d0fa0236eecb6632152c0b148560e489a9e26132bede885ddbb73628fa3830
|
|
7
|
+
data.tar.gz: c61bedba0ea3f6399ffee3d217d719182de6c3e945fbbb14aea9c811f0b75e7e968b7e30a30162fb944635a949629e0102976d0f01e2851411b7694649ac8c5f
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,44 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 2.1.0 (2026-07-03)
|
|
6
|
+
|
|
7
|
+
- Add experimental per-example spec quality (`Polyrun::SpecQuality`): `POLYRUN_SPEC_QUALITY=1`, worker JSONL fragments, `merge-spec-quality`, `report-spec-quality`, and `run-shards --merge-spec-quality`.
|
|
8
|
+
- Add `Polyrun::RSpec.install_spec_quality!`, `Polyrun::Minitest.install_spec_quality!`, and Quick example hooks; `polyrun init --profile spec-quality`.
|
|
9
|
+
- Add `Polyrun::Coverage::ExampleDiff` for per-example line hit deltas from `Coverage.peek_result`; record per-example factory counts via `Data::FactoryCounts`.
|
|
10
|
+
- Report zero-hit examples, line churn, hot lines, shard attribution, optional partition hints (`report-spec-quality --plan`), optional CPU/GC/IO profiling (stdlib), and CI gate thresholds in `config/polyrun_spec_quality.yml`.
|
|
11
|
+
- Document spec quality in `docs/SPEC_QUALITY.md`.
|
|
12
|
+
|
|
13
|
+
## 2.0.0 (2026-07-03)
|
|
14
|
+
|
|
15
|
+
- Add `polyrun run-queue` to init a file-backed queue and run N workers that claim batches until drained; `--on-failure exit|requeue` reclaims open leases when a worker exits before ack.
|
|
16
|
+
- Add `polyrun queue reclaim` with `--older-than DURATION` (`10m`, `1h`, …) and optional `--worker ID`; add `Queue::FileStore#reclaim!` and `#reclaim_lease!`.
|
|
17
|
+
- Add partition imbalance and dominant-file reports after `plan` and `run-shards` (`Partition::Reports`); warn on stale or missing timing coverage before cost-based partition (`Partition::TimingDiagnostics`).
|
|
18
|
+
- Add partition strategies `lazy_robin` (sorted round-robin with timing diagnostics), `preserve_order_round_robin` (paths-file order), `stable_cost_binpack` (stable assignment with LPT fallback), and `weighted_hrw` (rendezvous with `shard_weights`).
|
|
19
|
+
- Add `partition.stable_assignment_file` and `partition.shard_weights` for stable binpack and weighted HRW; wire into `plan` and `run-shards`.
|
|
20
|
+
- Add `Polyrun::Timing::Stats` for rich timing entries (`last_seconds`, `mean`, `p95`, `runs`, `failures`, `timeouts`); `merge-timing` merges via `Stats.merge_entries` and emits `Timing::VarianceReport` warnings.
|
|
21
|
+
- Add `POLYRUN_HRW_FAST_SCORE` for faster deterministic HRW scoring.
|
|
22
|
+
- Auto-select `cost_binpack` when `partition.timing_file` or `--timing` is set and strategy is not explicit; load timing for `lazy_robin` and diagnostics even when strategy stays round-robin.
|
|
23
|
+
- Fix `paths_build` glob stages to match against the remaining pool (`File.fnmatch?`) instead of re-globbing the cwd.
|
|
24
|
+
- Warn when a database URL has no database segment (shard suffix skipped); name `shard_index` in `CloneShards` errors.
|
|
25
|
+
- Refactor LPT forced-item handling into a single pass before free-item balance.
|
|
26
|
+
- Add Makefile, Prayfile, and root `polyrun.yml`; run repo specs via polyrun in CI.
|
|
27
|
+
- BREAKING: `partition.timing_file` without an explicit `strategy` now implies `cost_binpack` instead of `round_robin`.
|
|
28
|
+
- BREAKING: `merge-timing` output entries are objects with timing stats, not bare scalar seconds; tools that read merged JSON must accept objects or use `Stats.binpack_weight`.
|
|
29
|
+
- BREAKING: `paths_build` glob stages filter the staged pool only; membership and order can differ from prior re-glob behavior.
|
|
30
|
+
|
|
31
|
+
## 1.5.0 (2026-05-04)
|
|
32
|
+
|
|
33
|
+
- Add `run-shards --worker-timeout SEC` and `POLYRUN_WORKER_TIMEOUT_SEC` (wall time per worker since spawn); stop stuck workers; record exit 124 for that shard.
|
|
34
|
+
- Add `run-shards --worker-idle-timeout SEC` and `POLYRUN_WORKER_IDLE_TIMEOUT_SEC`; parent reads monotonic timestamps from `POLYRUN_WORKER_PING_FILE`; record exit 125 when the last ping is stale. Idle applies only after a valid positive ping (use wall timeout until the first ping).
|
|
35
|
+
- Add `Polyrun::WorkerPing` (`ping!`, `ensure_interval_ping_thread!` when `POLYRUN_WORKER_PING_THREAD`). Add `Polyrun::RSpec.install_worker_ping!` and `Polyrun::Minitest.install_worker_ping!`; Polyrun Quick calls `WorkerPing.ping!` around each example. Parent creates ping paths under `tmp/polyrun/` and unlinks files after workers exit.
|
|
36
|
+
- Poll every live shard worker together when timeouts are enabled so idle and wall limits apply to all children, not only the first waiter.
|
|
37
|
+
- Split parallel worker teardown into `RunShardsParallelWait` and `RunShardsWorkerInterrupt`; keep spawn logic in `RunShardsParallelChildren`.
|
|
38
|
+
- Add `Polyrun::Log.orchestration_warn`; when `POLYRUN_ORCHESTRATION_STDERR=1`, copy one line to process `$stderr` if `Log.stderr` is not the same object (custom/null sinks).
|
|
39
|
+
- Wire `env_worker_timeout_sec` / `env_worker_idle_timeout_sec` into `ci-shard-run` plan context. Rescue `Interrupt` around `after_suite` in `run-shards` and `ci-shard` orchestration where suite hooks run.
|
|
40
|
+
- In `Polyrun::Hooks#run_phase`, rescue `Interrupt` for Ruby DSL and shell hook phases (return 130).
|
|
41
|
+
- Document worker timeout, idle ping, and `POLYRUN_ORCHESTRATION_STDERR` in `polyrun help`. Add `sig/polyrun/worker_ping.rbs` and extend `Polyrun::RSpec` / `Polyrun::Minitest` installer signatures.
|
|
42
|
+
|
|
5
43
|
## 1.4.2 (2026-04-24)
|
|
6
44
|
|
|
7
45
|
- Add richer HTML coverage reports: summary cards, group coverage, sortable file tables, project-relative paths, and per-file source detail.
|
data/README.md
CHANGED
|
@@ -23,7 +23,7 @@ Capybara and Playwright stay in your application; Polyrun does not replace brows
|
|
|
23
23
|
2. Add a `polyrun.yml` beside the app, or pass `-c` to point at one. Configure `partition` (paths, shard index and total, strategy), and optionally `databases` (Postgres template and `shard_db_pattern`), `prepare`, and `coverage`. If you use `partition.paths_build`, Polyrun can write `partition.paths_file` (for example `spec/spec_paths.txt`) from globs and ordered stages—substring priorities for integration specs, or a regex stage for “Rails-heavy files first”—without a per-project Ruby script. That step runs before `plan` and `run-shards`. Use `bin/polyrun build-paths` to refresh the paths file only.
|
|
24
24
|
3. Run prepare once before fan-out—for example `script/ci_prepare` for Vite or webpack builds, and `Polyrun::Prepare::Assets` digest markers. See `examples/TESTING_REQUIREMENTS.md`.
|
|
25
25
|
4. Run workers with `bin/polyrun run-shards --workers N -- bundle exec rspec`: N separate OS processes, each running RSpec with its own file list from `partition.paths_file`, or `spec/spec_paths.txt`, or else `spec/**/*_spec.rb`. Stderr shows where paths came from; after a successful multi-worker run it reminds you to run merge-coverage unless you use `parallel-rspec` or `run-shards --merge-coverage`.
|
|
26
|
-
5. Merge artifacts with `bin/polyrun merge-coverage` on `coverage/polyrun-fragment-*.json` (one fragment per `POLYRUN_SHARD_INDEX` when coverage is on), or use `bin/polyrun parallel-rspec` or `run-shards --merge-coverage` so Polyrun runs merge for you. Optional: `merge-timing`, `report-timing`, `report-junit
|
|
26
|
+
5. Merge artifacts with `bin/polyrun merge-coverage` on `coverage/polyrun-fragment-*.json` (one fragment per `POLYRUN_SHARD_INDEX` when coverage is on), or use `bin/polyrun parallel-rspec` or `run-shards --merge-coverage` so Polyrun runs merge for you. Optional: `merge-timing`, `report-timing`, `report-junit`, `merge-spec-quality`, `report-spec-quality` (experimental per-example spec quality; see [docs/SPEC_QUALITY.md](docs/SPEC_QUALITY.md)).
|
|
27
27
|
|
|
28
28
|
### Hooks (`hooks:` in `polyrun.yml`)
|
|
29
29
|
|
|
@@ -189,7 +189,7 @@ bin/polyrun quick spec/polyrun_quick/smoke.rb
|
|
|
189
189
|
|
|
190
190
|
Shard index and total in CI (`Polyrun::Env::Ci`): when set, `POLYRUN_SHARD_INDEX` and `POLYRUN_SHARD_TOTAL` take precedence. When `CI` is truthy, `CI_NODE_INDEX` / `CI_NODE_TOTAL` and other parallel-job environment variables are read if present. If your runner does not export those, set `POLYRUN_SHARD_*` from the job matrix.
|
|
191
191
|
|
|
192
|
-
File queue (`polyrun queue …`): batches live on disk under a lock file; paths move from `pending` to `leases` on claim and to `done` on ack.
|
|
192
|
+
File queue (`polyrun queue …`): batches live on disk under a lock file; paths move from `pending` to `leases` on claim and to `done` on ack. Stale leases can be returned to `pending` with `polyrun queue reclaim --older-than 10m` (or `--worker ID`). Use `polyrun queue status --json` for lease ages. `polyrun run-queue` reclaims a worker's open lease when that worker exits before ack.
|
|
193
193
|
|
|
194
194
|
## Examples
|
|
195
195
|
|
data/docs/SETUP_PROFILE.md
CHANGED
|
@@ -45,6 +45,8 @@ Rule: anything expensive (compile, `yarn`, Playwright download) belongs in prepa
|
|
|
45
45
|
| Plain glob | `partition.paths_build.all_glob: spec/**/*_spec.rb` and empty or minimal `stages` |
|
|
46
46
|
| Ordered stages | `partition.paths_build.stages`: regex (e.g. slow integration first) or `sort_by_substring_order` for stable ordering |
|
|
47
47
|
|
|
48
|
+
`paths_build` controls **membership** in `partition.paths_file`, not shard assignment order. Default `round_robin` sorts paths alphabetically before mod assignment. Use `strategy: preserve_order_round_robin` to honor paths-file line order. Set `partition.timing_file` without `strategy` to auto-select `cost_binpack`; use `lazy_robin` for round-robin assignment with timing diagnostics.
|
|
49
|
+
|
|
48
50
|
Refresh list: `polyrun -c polyrun.yml build-paths` (also runs automatically before `plan` / `run-shards` when configured).
|
|
49
51
|
|
|
50
52
|
## 6. Coverage and CI reports
|
|
@@ -49,7 +49,11 @@ module Polyrun
|
|
|
49
49
|
"POLYRUN_SHARD_TOTAL" => ctx[:workers].to_s,
|
|
50
50
|
"POLYRUN_SUITE_EXIT_STATUS" => exit_code.to_s
|
|
51
51
|
)
|
|
52
|
-
|
|
52
|
+
begin
|
|
53
|
+
hook_cfg.run_phase_if_enabled(:after_suite, env_after)
|
|
54
|
+
rescue Interrupt
|
|
55
|
+
Polyrun::Log.warn "polyrun ci-shard: after_suite hook interrupted"
|
|
56
|
+
end
|
|
53
57
|
end
|
|
54
58
|
end
|
|
55
59
|
end
|
|
@@ -109,9 +113,13 @@ module Polyrun
|
|
|
109
113
|
exit_code
|
|
110
114
|
ensure
|
|
111
115
|
if suite_started
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
116
|
+
begin
|
|
117
|
+
hook_cfg.run_phase_if_enabled(:after_suite, env_orch.merge(
|
|
118
|
+
"POLYRUN_SUITE_EXIT_STATUS" => exit_code.to_s
|
|
119
|
+
))
|
|
120
|
+
rescue Interrupt
|
|
121
|
+
Polyrun::Log.warn "polyrun ci-shard: after_suite hook interrupted"
|
|
122
|
+
end
|
|
115
123
|
end
|
|
116
124
|
end
|
|
117
125
|
end
|
|
@@ -66,7 +66,9 @@ module Polyrun
|
|
|
66
66
|
merge_format: nil,
|
|
67
67
|
config_path: config_path,
|
|
68
68
|
matrix_shard_index: mx,
|
|
69
|
-
matrix_shard_total: mt
|
|
69
|
+
matrix_shard_total: mt,
|
|
70
|
+
worker_timeout_sec: env_worker_timeout_sec,
|
|
71
|
+
worker_idle_timeout_sec: env_worker_idle_timeout_sec
|
|
70
72
|
}
|
|
71
73
|
end
|
|
72
74
|
|
data/lib/polyrun/cli/help.rb
CHANGED
|
@@ -23,7 +23,12 @@ module Polyrun
|
|
|
23
23
|
Warn if merge-coverage wall time exceeds N seconds (default 10): POLYRUN_MERGE_SLOW_WARN_SECONDS (0 disables)
|
|
24
24
|
Failure fragments (run-shards --merge-failures): POLYRUN_MERGE_FAILURES=1; parent sets POLYRUN_FAILURE_FRAGMENTS=1 in workers; POLYRUN_FAILURE_FRAGMENT_DIR, POLYRUN_MERGED_FAILURES_OUT, POLYRUN_MERGED_FAILURES_FORMAT; after_suite sets POLYRUN_MERGED_FAILURES_PATH when merge ran
|
|
25
25
|
Parallel RSpec workers: POLYRUN_WORKERS default 5, max 10 (run-shards / parallel-rspec / start); distinct from POLYRUN_SHARD_PROCESSES / ci-shard --shard-processes (local processes per CI matrix job)
|
|
26
|
+
Per-worker wall timeout: run-shards --worker-timeout SEC or POLYRUN_WORKER_TIMEOUT_SEC (max time since each worker spawn). Parent polls all live workers together. Exit 124; remaining workers stopped.
|
|
27
|
+
Per-worker idle timeout: --worker-idle-timeout SEC or POLYRUN_WORKER_IDLE_TIMEOUT_SEC counts only after a successful ping timestamp (positive float in POLYRUN_WORKER_PING_FILE); empty or unreadable pings do not satisfy idle enforcement—use wall timeout until the first ping. RSpec/Minitest/Quick installers call Polyrun::WorkerPing.ping! per example/suite. Ping files live under tmp/polyrun/ (gitignored via tmp/); parent unlinks each after its worker exits. Exit 125. Optional outer cap: --worker-timeout (exit 124). Optional periodic pings: POLYRUN_WORKER_PING_THREAD=1 (POLYRUN_WORKER_PING_INTERVAL_SEC); WorkerPing.ensure_interval_ping_thread! (installers invoke it—call yourself if wiring workers without install_worker_ping!).
|
|
28
|
+
If Polyrun::Log.stderr is null or redirected away, set POLYRUN_ORCHESTRATION_STDERR=1 to also print timeout/SIGINT summary lines to process stderr.
|
|
29
|
+
Spec quality (opt-in): POLYRUN_SPEC_QUALITY=1; per-example JSONL fragments; POLYRUN_SPEC_QUALITY_SAMPLE=0.0-1.0 (default 1.0); POLYRUN_SPEC_QUALITY_STRICT=1; run-shards --merge-spec-quality sets POLYRUN_SPEC_QUALITY_FRAGMENTS=1 in workers; merge-spec-quality / report-spec-quality
|
|
26
30
|
Partition timing granularity (default file): POLYRUN_TIMING_GRANULARITY=file|example (experimental per-example; see partition.timing_granularity)
|
|
31
|
+
Partition strategies: round_robin (default, sorted), preserve_order_round_robin (paths-file order), lazy_robin (sorted RR + timing diagnostics), cost_binpack (LPT), hrw. partition.timing_file without strategy implies cost_binpack.
|
|
27
32
|
|
|
28
33
|
commands:
|
|
29
34
|
version print version
|
|
@@ -31,20 +36,23 @@ module Polyrun
|
|
|
31
36
|
prepare run prepare recipe: default | assets (optional prepare.command overrides bin/rails assets:precompile) | shell (prepare.command required)
|
|
32
37
|
merge-coverage merge SimpleCov JSON fragments (json/lcov/cobertura/console)
|
|
33
38
|
merge-failures merge per-shard failure JSONL fragments or RSpec JSON files (jsonl/json)
|
|
34
|
-
run-shards fan out N parallel OS processes (POLYRUN_SHARD_*; not Ruby threads); optional --merge-coverage / --merge-failures
|
|
39
|
+
run-shards fan out N parallel OS processes (POLYRUN_SHARD_*; not Ruby threads); optional --merge-coverage / --merge-failures / --merge-spec-quality
|
|
35
40
|
parallel-rspec run-shards + merge-coverage (defaults to: bundle exec rspec after --)
|
|
36
41
|
start parallel-rspec; auto-runs prepare (shell/assets) and db:setup-* when polyrun.yml configures them; legacy script/build_spec_paths.rb if paths_build absent
|
|
37
42
|
ci-shard-run CI matrix: build-paths + plan for POLYRUN_SHARD_INDEX / POLYRUN_SHARD_TOTAL (or config), then run your command with that shard's paths after --; optional --shard-processes M or --workers M (POLYRUN_SHARD_PROCESSES; not POLYRUN_WORKERS) for N×M jobs × processes on this host
|
|
38
43
|
ci-shard-rspec same as ci-shard-run -- bundle exec rspec; optional --shard-processes / --workers / -- [rspec-only flags]
|
|
39
44
|
build-paths write partition.paths_file from partition.paths_build (same as auto step before plan/run-shards)
|
|
40
45
|
init write a starter polyrun.yml or POLYRUN.md from built-in templates (see docs/SETUP_PROFILE.md)
|
|
41
|
-
queue file-backed batch queue: init (optional --shard/--total etc. as plan, then claim/ack)
|
|
46
|
+
queue file-backed batch queue: init (optional --shard/--total etc. as plan, then claim/ack/reclaim/status --json)
|
|
47
|
+
run-queue init queue and run N workers that claim batches until drained
|
|
42
48
|
quick run Polyrun::Quick (describe/it, before/after, let, expect…to, assert_*; optional capybara!)
|
|
43
49
|
hook run <phase> run one shell hook from polyrun.yml hooks: (e.g. before_suite); optional --shard/--total
|
|
44
50
|
report-coverage write all coverage formats from one JSON file
|
|
45
51
|
report-junit RSpec JSON or Polyrun testcase JSON → JUnit XML (CI)
|
|
46
52
|
report-timing print slow-file summary from merged timing JSON
|
|
47
53
|
merge-timing merge polyrun_timing_*.json shards
|
|
54
|
+
merge-spec-quality merge polyrun-spec-quality-fragment-*.jsonl shards
|
|
55
|
+
report-spec-quality spec quality report from merged JSON (zero-hit, hot lines, churn)
|
|
48
56
|
config print effective config by dotted path (see Polyrun::Config::Effective; same tree as YAML plus merged prepare.env, resolved partition shard fields, workers)
|
|
49
57
|
env print shard + database env (see polyrun.yml databases)
|
|
50
58
|
db:setup-template migrate template DB (PostgreSQL)
|
data/lib/polyrun/cli/helpers.rb
CHANGED
|
@@ -11,6 +11,28 @@ module Polyrun
|
|
|
11
11
|
Polyrun::Config::Resolver.env_int(name, fallback)
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
+
# Per-worker wall clock (from spawn) for run-shards / ci-shard fan-out; unset or invalid means no limit.
|
|
15
|
+
def env_worker_timeout_sec
|
|
16
|
+
s = ENV["POLYRUN_WORKER_TIMEOUT_SEC"].to_s.strip
|
|
17
|
+
return nil if s.empty?
|
|
18
|
+
|
|
19
|
+
f = Float(s, exception: false)
|
|
20
|
+
return nil if f.nil? || f <= 0
|
|
21
|
+
|
|
22
|
+
f
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Max seconds without a new monotonic timestamp ping in the worker (see +polyrun/worker_ping+).
|
|
26
|
+
def env_worker_idle_timeout_sec
|
|
27
|
+
s = ENV["POLYRUN_WORKER_IDLE_TIMEOUT_SEC"].to_s.strip
|
|
28
|
+
return nil if s.empty?
|
|
29
|
+
|
|
30
|
+
f = Float(s, exception: false)
|
|
31
|
+
return nil if f.nil? || f <= 0
|
|
32
|
+
|
|
33
|
+
f
|
|
34
|
+
end
|
|
35
|
+
|
|
14
36
|
def resolve_shard_index(pc)
|
|
15
37
|
Polyrun::Config::Resolver.resolve_shard_index(pc)
|
|
16
38
|
end
|
|
@@ -97,6 +119,22 @@ module Polyrun
|
|
|
97
119
|
def resolve_partition_timing_granularity(pc, cli_val)
|
|
98
120
|
Polyrun::Config::Resolver.resolve_partition_timing_granularity(pc, cli_val)
|
|
99
121
|
end
|
|
122
|
+
|
|
123
|
+
def load_stable_assignment(pc)
|
|
124
|
+
path = pc["stable_assignment_file"] || pc[:stable_assignment_file]
|
|
125
|
+
return nil unless path
|
|
126
|
+
|
|
127
|
+
abs = File.expand_path(path.to_s, Dir.pwd)
|
|
128
|
+
return nil unless File.file?(abs)
|
|
129
|
+
|
|
130
|
+
data = JSON.parse(File.read(abs))
|
|
131
|
+
return data if data.is_a?(Hash)
|
|
132
|
+
|
|
133
|
+
nil
|
|
134
|
+
rescue JSON::ParserError
|
|
135
|
+
Polyrun::Log.warn "polyrun: invalid stable_assignment_file JSON: #{abs}"
|
|
136
|
+
nil
|
|
137
|
+
end
|
|
100
138
|
end
|
|
101
139
|
end
|
|
102
140
|
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require "optparse"
|
|
2
|
+
require "fileutils"
|
|
2
3
|
|
|
3
4
|
module Polyrun
|
|
4
5
|
class CLI
|
|
@@ -7,6 +8,7 @@ module Polyrun
|
|
|
7
8
|
"gem" => "minimal_gem.polyrun.yml",
|
|
8
9
|
"rails" => "rails_prepare.polyrun.yml",
|
|
9
10
|
"ci-matrix" => "ci_matrix.polyrun.yml",
|
|
11
|
+
"spec-quality" => "polyrun_spec_quality.yml",
|
|
10
12
|
"doc" => "POLYRUN.md"
|
|
11
13
|
}.freeze
|
|
12
14
|
|
|
@@ -34,6 +36,7 @@ module Polyrun
|
|
|
34
36
|
path = File.expand_path(dest)
|
|
35
37
|
return init_refuses_overwrite(path) if File.file?(path) && !force
|
|
36
38
|
|
|
39
|
+
FileUtils.mkdir_p(File.dirname(path))
|
|
37
40
|
File.write(path, body)
|
|
38
41
|
Polyrun::Log.warn "polyrun init: wrote #{path}"
|
|
39
42
|
0
|
|
@@ -92,7 +95,11 @@ module Polyrun
|
|
|
92
95
|
end
|
|
93
96
|
|
|
94
97
|
def default_init_output(profile)
|
|
95
|
-
|
|
98
|
+
case profile
|
|
99
|
+
when "doc" then "POLYRUN.md"
|
|
100
|
+
when "spec-quality" then "config/polyrun_spec_quality.yml"
|
|
101
|
+
else "polyrun.yml"
|
|
102
|
+
end
|
|
96
103
|
end
|
|
97
104
|
end
|
|
98
105
|
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module Polyrun
|
|
2
|
+
class CLI
|
|
3
|
+
# Shared stderr diagnostics after {Partition::Plan} is built.
|
|
4
|
+
module PartitionDiagnostics
|
|
5
|
+
private
|
|
6
|
+
|
|
7
|
+
def partition_emit_diagnostics!(plan:, items:, costs:, timing_path:, granularity: :file)
|
|
8
|
+
return unless timing_path && costs && !costs.empty?
|
|
9
|
+
|
|
10
|
+
analysis = Polyrun::Partition::TimingDiagnostics.analyze(
|
|
11
|
+
items: items,
|
|
12
|
+
costs: costs,
|
|
13
|
+
timing_path: timing_path,
|
|
14
|
+
root: plan.root,
|
|
15
|
+
granularity: granularity
|
|
16
|
+
)
|
|
17
|
+
Polyrun::Partition::TimingDiagnostics.emit_warnings!(analysis)
|
|
18
|
+
Polyrun::Partition::Reports.emit_all!(plan)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# rubocop:disable Polyrun/FileLength -- plan argv + partition emit
|
|
1
2
|
require "json"
|
|
2
3
|
require "optparse"
|
|
3
4
|
|
|
@@ -38,8 +39,9 @@ module Polyrun
|
|
|
38
39
|
|
|
39
40
|
items, costs, strategy = bundle
|
|
40
41
|
constraints = load_partition_constraints(pc, ctx[:constraints_path])
|
|
42
|
+
stable = load_stable_assignment(pc)
|
|
41
43
|
|
|
42
|
-
manifest = plan_command_build_manifest(
|
|
44
|
+
manifest, plan = plan_command_build_manifest(
|
|
43
45
|
items: items,
|
|
44
46
|
total: ctx[:total],
|
|
45
47
|
strategy: strategy,
|
|
@@ -47,7 +49,16 @@ module Polyrun
|
|
|
47
49
|
costs: costs,
|
|
48
50
|
constraints: constraints,
|
|
49
51
|
shard: ctx[:shard],
|
|
50
|
-
timing_granularity: ctx[:timing_granularity]
|
|
52
|
+
timing_granularity: ctx[:timing_granularity],
|
|
53
|
+
stable_assignment: stable,
|
|
54
|
+
shard_weights: pc["shard_weights"] || pc[:shard_weights]
|
|
55
|
+
)
|
|
56
|
+
partition_emit_diagnostics!(
|
|
57
|
+
plan: plan,
|
|
58
|
+
items: items,
|
|
59
|
+
costs: costs,
|
|
60
|
+
timing_path: timing_path,
|
|
61
|
+
granularity: ctx[:timing_granularity]
|
|
51
62
|
)
|
|
52
63
|
[manifest, 0]
|
|
53
64
|
end
|
|
@@ -56,7 +67,12 @@ module Polyrun
|
|
|
56
67
|
items = plan_plan_items(paths_file, argv)
|
|
57
68
|
return nil if items.nil?
|
|
58
69
|
|
|
59
|
-
loaded = plan_load_costs_and_strategy(
|
|
70
|
+
loaded = plan_load_costs_and_strategy(
|
|
71
|
+
timing_path,
|
|
72
|
+
ctx[:strategy],
|
|
73
|
+
ctx[:timing_granularity],
|
|
74
|
+
strategy_explicit: ctx[:strategy_explicit]
|
|
75
|
+
)
|
|
60
76
|
return nil if loaded.nil?
|
|
61
77
|
|
|
62
78
|
costs, strategy = loaded
|
|
@@ -68,6 +84,7 @@ module Polyrun
|
|
|
68
84
|
shard: resolve_shard_index(pc),
|
|
69
85
|
total: resolve_shard_total(pc),
|
|
70
86
|
strategy: (pc["strategy"] || pc[:strategy] || "round_robin").to_s,
|
|
87
|
+
strategy_explicit: !!(pc["strategy"] || pc[:strategy]),
|
|
71
88
|
seed: pc["seed"] || pc[:seed],
|
|
72
89
|
paths_file: nil,
|
|
73
90
|
timing_path: nil,
|
|
@@ -80,10 +97,13 @@ module Polyrun
|
|
|
80
97
|
def plan_command_register_partition_options!(opts, ctx)
|
|
81
98
|
opts.on("--shard INDEX", Integer) { |v| ctx[:shard] = v }
|
|
82
99
|
opts.on("--total N", Integer) { |v| ctx[:total] = v }
|
|
83
|
-
opts.on("--strategy NAME", String)
|
|
100
|
+
opts.on("--strategy NAME", String) do |v|
|
|
101
|
+
ctx[:strategy] = v
|
|
102
|
+
ctx[:strategy_explicit] = true
|
|
103
|
+
end
|
|
84
104
|
opts.on("--seed VAL") { |v| ctx[:seed] = v }
|
|
85
105
|
opts.on("--constraints PATH", "YAML: pin / serial_glob (see spec_queue.md)") { |v| ctx[:constraints_path] = v }
|
|
86
|
-
opts.on("--timing PATH", "path => seconds JSON; implies cost_binpack unless strategy is
|
|
106
|
+
opts.on("--timing PATH", "path => seconds JSON; implies cost_binpack unless strategy is explicit or timing-aware") do |v|
|
|
87
107
|
ctx[:timing_path] = v
|
|
88
108
|
end
|
|
89
109
|
opts.on("--timing-granularity VAL", "file (default) or example (experimental: path:line items)") do |v|
|
|
@@ -104,7 +124,7 @@ module Polyrun
|
|
|
104
124
|
end.parse!(argv)
|
|
105
125
|
end
|
|
106
126
|
|
|
107
|
-
def plan_command_build_manifest(items:, total:, strategy:, seed:, costs:, constraints:, shard:, timing_granularity: :file)
|
|
127
|
+
def plan_command_build_manifest(items:, total:, strategy:, seed:, costs:, constraints:, shard:, timing_granularity: :file, stable_assignment: nil, shard_weights: nil)
|
|
108
128
|
plan = Polyrun::Debug.time("Partition::Plan.new (plan command)") do
|
|
109
129
|
Polyrun::Partition::Plan.new(
|
|
110
130
|
items: items,
|
|
@@ -114,7 +134,9 @@ module Polyrun
|
|
|
114
134
|
costs: costs,
|
|
115
135
|
constraints: constraints,
|
|
116
136
|
root: Dir.pwd,
|
|
117
|
-
timing_granularity: timing_granularity
|
|
137
|
+
timing_granularity: timing_granularity,
|
|
138
|
+
stable_assignment: stable_assignment,
|
|
139
|
+
shard_weights: shard_weights
|
|
118
140
|
)
|
|
119
141
|
end
|
|
120
142
|
Polyrun::Debug.log_kv(
|
|
@@ -124,16 +146,13 @@ module Polyrun
|
|
|
124
146
|
strategy: strategy,
|
|
125
147
|
path_count: items.size
|
|
126
148
|
)
|
|
127
|
-
plan.manifest(shard)
|
|
149
|
+
[plan.manifest(shard), plan]
|
|
128
150
|
end
|
|
129
151
|
|
|
130
|
-
def plan_resolve_timing_path(pc, timing_path,
|
|
152
|
+
def plan_resolve_timing_path(pc, timing_path, _strategy = nil)
|
|
131
153
|
return timing_path if timing_path
|
|
132
154
|
|
|
133
|
-
|
|
134
|
-
return tf if tf && (Polyrun::Partition::Plan.cost_strategy?(strategy) || Polyrun::Partition::Plan.hrw_strategy?(strategy))
|
|
135
|
-
|
|
136
|
-
nil
|
|
155
|
+
pc["timing_file"] || pc[:timing_file]
|
|
137
156
|
end
|
|
138
157
|
|
|
139
158
|
def plan_plan_items(paths_file, argv)
|
|
@@ -147,8 +166,13 @@ module Polyrun
|
|
|
147
166
|
end
|
|
148
167
|
end
|
|
149
168
|
|
|
150
|
-
def plan_load_costs_and_strategy(timing_path, strategy, timing_granularity)
|
|
169
|
+
def plan_load_costs_and_strategy(timing_path, strategy, timing_granularity, strategy_explicit: false)
|
|
170
|
+
strategy = strategy.to_s
|
|
151
171
|
if timing_path
|
|
172
|
+
if strategy_explicit && strategy == "round_robin"
|
|
173
|
+
return [nil, strategy]
|
|
174
|
+
end
|
|
175
|
+
|
|
152
176
|
costs = Polyrun::Partition::Plan.load_timing_costs(
|
|
153
177
|
File.expand_path(timing_path.to_s, Dir.pwd),
|
|
154
178
|
granularity: timing_granularity
|
|
@@ -157,12 +181,16 @@ module Polyrun
|
|
|
157
181
|
Polyrun::Log.warn "polyrun plan: timing file missing or has no entries: #{timing_path}"
|
|
158
182
|
return nil
|
|
159
183
|
end
|
|
160
|
-
|
|
184
|
+
if Polyrun::Partition::Plan.timing_load_strategy?(strategy)
|
|
185
|
+
return [costs, strategy]
|
|
186
|
+
end
|
|
187
|
+
unless strategy_explicit
|
|
161
188
|
Polyrun::Log.warn "polyrun plan: using cost_binpack (timing data present)" if @verbose
|
|
162
|
-
|
|
189
|
+
return [costs, "cost_binpack"]
|
|
163
190
|
end
|
|
164
|
-
|
|
165
|
-
|
|
191
|
+
|
|
192
|
+
[nil, strategy]
|
|
193
|
+
elsif Polyrun::Partition::Plan.cost_strategy?(strategy) || Polyrun::Partition::Plan.lazy_robin_strategy?(strategy)
|
|
166
194
|
Polyrun::Log.warn "polyrun plan: --timing or partition.timing_file required for strategy #{strategy}"
|
|
167
195
|
nil
|
|
168
196
|
else
|
|
@@ -172,3 +200,4 @@ module Polyrun
|
|
|
172
200
|
end
|
|
173
201
|
end
|
|
174
202
|
end
|
|
203
|
+
# rubocop:enable Polyrun/FileLength
|
|
@@ -34,8 +34,10 @@ module Polyrun
|
|
|
34
34
|
queue_cmd_ack(argv, dir, lease_id, worker)
|
|
35
35
|
when "status"
|
|
36
36
|
queue_cmd_status(argv, dir)
|
|
37
|
+
when "reclaim"
|
|
38
|
+
queue_cmd_reclaim(argv, dir)
|
|
37
39
|
else
|
|
38
|
-
Polyrun::Log.warn "usage: polyrun queue {init|claim|ack|status} [options]"
|
|
40
|
+
Polyrun::Log.warn "usage: polyrun queue {init|claim|ack|status|reclaim} [options]"
|
|
39
41
|
2
|
|
40
42
|
end
|
|
41
43
|
end
|
|
@@ -126,13 +128,34 @@ module Polyrun
|
|
|
126
128
|
end
|
|
127
129
|
|
|
128
130
|
def queue_cmd_status(argv, dir)
|
|
131
|
+
json_detail = false
|
|
129
132
|
OptionParser.new do |opts|
|
|
133
|
+
opts.banner = "usage: polyrun queue status [--dir DIR] [--json]"
|
|
130
134
|
opts.on("--dir PATH") { |v| dir = v }
|
|
135
|
+
opts.on("--json", "Include lease details") { json_detail = true }
|
|
131
136
|
end.parse!(argv)
|
|
132
|
-
s = Polyrun::Queue::FileStore.new(dir).status
|
|
137
|
+
s = Polyrun::Queue::FileStore.new(dir).status(detailed: json_detail)
|
|
133
138
|
Polyrun::Log.puts JSON.generate(s)
|
|
134
139
|
0
|
|
135
140
|
end
|
|
141
|
+
|
|
142
|
+
def queue_cmd_reclaim(argv, dir)
|
|
143
|
+
older_than = nil
|
|
144
|
+
worker = nil
|
|
145
|
+
OptionParser.new do |opts|
|
|
146
|
+
opts.banner = "usage: polyrun queue reclaim [--dir DIR] [--older-than DURATION] [--worker ID]"
|
|
147
|
+
opts.on("--dir PATH") { |v| dir = v }
|
|
148
|
+
opts.on("--older-than DUR", "e.g. 10m, 1h, 600s") { |v| older_than = Polyrun::Queue::Duration.parse_seconds(v) }
|
|
149
|
+
opts.on("--worker ID") { |v| worker = v }
|
|
150
|
+
end.parse!(argv)
|
|
151
|
+
unless older_than || worker
|
|
152
|
+
Polyrun::Log.warn "queue reclaim: need --older-than or --worker"
|
|
153
|
+
return 2
|
|
154
|
+
end
|
|
155
|
+
n = Polyrun::Queue::FileStore.new(dir).reclaim!(older_than: older_than, worker_id: worker)
|
|
156
|
+
Polyrun::Log.puts JSON.generate({"reclaimed_paths" => n})
|
|
157
|
+
0
|
|
158
|
+
end
|
|
136
159
|
end
|
|
137
160
|
end
|
|
138
161
|
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
require "json"
|
|
2
|
+
require "optparse"
|
|
3
|
+
require "shellwords"
|
|
4
|
+
|
|
5
|
+
require_relative "../queue/duration"
|
|
6
|
+
|
|
7
|
+
module Polyrun
|
|
8
|
+
class CLI
|
|
9
|
+
module RunQueueCommand
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
# rubocop:disable Metrics/AbcSize -- queue worker argv + spawn loop
|
|
13
|
+
def cmd_run_queue(argv, config_path)
|
|
14
|
+
dir = ".polyrun-queue"
|
|
15
|
+
batch = 5
|
|
16
|
+
on_failure = "exit"
|
|
17
|
+
paths_file = nil
|
|
18
|
+
workers = nil
|
|
19
|
+
|
|
20
|
+
sep = argv.index("--")
|
|
21
|
+
unless sep
|
|
22
|
+
Polyrun::Log.warn "polyrun run-queue: need -- before the command"
|
|
23
|
+
return 2
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
head = argv[0...sep]
|
|
27
|
+
cmd = argv[(sep + 1)..].map(&:to_s)
|
|
28
|
+
return 2 if cmd.empty?
|
|
29
|
+
|
|
30
|
+
cfg = Polyrun::Config.load(path: config_path || ENV["POLYRUN_CONFIG"])
|
|
31
|
+
pc = cfg.partition
|
|
32
|
+
ctx = plan_command_initial_context(pc)
|
|
33
|
+
|
|
34
|
+
OptionParser.new do |opts|
|
|
35
|
+
opts.banner = "usage: polyrun run-queue [--workers N] [--batch N] [--dir DIR] [--on-failure exit|requeue] [partition options] -- <command>"
|
|
36
|
+
opts.on("--workers N", Integer) { |v| workers = v }
|
|
37
|
+
opts.on("--batch N", Integer) { |v| batch = v }
|
|
38
|
+
opts.on("--dir PATH") { |v| dir = v }
|
|
39
|
+
opts.on("--paths-file PATH", String) { |v| paths_file = v }
|
|
40
|
+
opts.on("--on-failure MODE", "exit (default) or requeue") { |v| on_failure = v }
|
|
41
|
+
plan_command_register_partition_options!(opts, ctx)
|
|
42
|
+
end.parse!(head)
|
|
43
|
+
|
|
44
|
+
workers ||= env_int("POLYRUN_WORKERS", Polyrun::Config::DEFAULT_PARALLEL_WORKERS)
|
|
45
|
+
paths_file ||= pc["paths_file"] || pc[:paths_file]
|
|
46
|
+
unless paths_file
|
|
47
|
+
Polyrun::Log.warn "polyrun run-queue: need --paths-file or partition.paths_file"
|
|
48
|
+
return 2
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
code = Polyrun::Partition::PathsBuild.apply!(partition: pc, cwd: Dir.pwd)
|
|
52
|
+
return code if code != 0
|
|
53
|
+
|
|
54
|
+
store = Polyrun::Queue::FileStore.new(dir)
|
|
55
|
+
if File.file?(File.join(File.expand_path(dir), "queue.json"))
|
|
56
|
+
Polyrun::Log.warn "polyrun run-queue: queue already exists at #{dir}; remove it or use --dir"
|
|
57
|
+
return 2
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
ordered, code = queue_partition_manifest_and_ordered_paths(cfg, pc, ctx, paths_file)
|
|
61
|
+
return code if code != 0
|
|
62
|
+
|
|
63
|
+
store.init!(ordered)
|
|
64
|
+
Polyrun::Log.warn "polyrun run-queue: #{ordered.size} path(s), #{workers} worker(s), batch=#{batch}" if @verbose
|
|
65
|
+
|
|
66
|
+
run_t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
67
|
+
pids = run_queue_spawn_workers(store: store, workers: workers, batch: batch, cmd: cmd, on_failure: on_failure)
|
|
68
|
+
results = run_queue_wait_workers(pids, store: store, on_failure: on_failure)
|
|
69
|
+
wall = Process.clock_gettime(Process::CLOCK_MONOTONIC) - run_t0
|
|
70
|
+
|
|
71
|
+
stat = store.status(detailed: true)
|
|
72
|
+
Polyrun::Log.warn format(
|
|
73
|
+
"polyrun run-queue: done pending=%d done=%d leases=%d wall=%.1fs batches_ok=%d batches_fail=%d",
|
|
74
|
+
stat["pending"], stat["done"], stat["leases"], wall, results[:ok], results[:fail]
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
results[:fail].positive? ? 1 : 0
|
|
78
|
+
end
|
|
79
|
+
# rubocop:enable Metrics/AbcSize
|
|
80
|
+
|
|
81
|
+
def run_queue_spawn_workers(store:, workers:, batch:, cmd:, on_failure:)
|
|
82
|
+
pids = []
|
|
83
|
+
workers.times do |i|
|
|
84
|
+
wid = "worker-#{i}"
|
|
85
|
+
pid = Process.fork do
|
|
86
|
+
run_queue_worker_loop(store: store, worker_id: wid, batch: batch, cmd: cmd, on_failure: on_failure)
|
|
87
|
+
end
|
|
88
|
+
pids << {pid: pid, worker_id: wid}
|
|
89
|
+
end
|
|
90
|
+
pids
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def run_queue_worker_loop(store:, worker_id:, batch:, cmd:, on_failure:)
|
|
94
|
+
batches_ok = 0
|
|
95
|
+
batches_fail = 0
|
|
96
|
+
loop do
|
|
97
|
+
claim = store.claim!(worker_id: worker_id, batch_size: batch)
|
|
98
|
+
paths = claim["paths"] || []
|
|
99
|
+
break if paths.empty?
|
|
100
|
+
|
|
101
|
+
code = run_queue_run_batch(cmd, paths)
|
|
102
|
+
if code == 0
|
|
103
|
+
store.ack!(lease_id: claim["lease_id"], worker_id: worker_id)
|
|
104
|
+
batches_ok += 1
|
|
105
|
+
elsif on_failure.to_s == "requeue"
|
|
106
|
+
store.reclaim_lease!(claim["lease_id"])
|
|
107
|
+
batches_fail += 1
|
|
108
|
+
exit 1
|
|
109
|
+
else
|
|
110
|
+
batches_fail += 1
|
|
111
|
+
exit code.zero? ? 1 : code
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
exit 0
|
|
115
|
+
rescue Polyrun::Error => e
|
|
116
|
+
Polyrun::Log.warn "polyrun run-queue worker #{worker_id}: #{e.message}"
|
|
117
|
+
exit 2
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def run_queue_run_batch(cmd, paths)
|
|
121
|
+
system(*cmd, *paths) ? 0 : ($?.exitstatus || 1)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def run_queue_wait_workers(pids, store:, on_failure:)
|
|
125
|
+
ok = 0
|
|
126
|
+
fail = 0
|
|
127
|
+
pid_to_worker = pids.each_with_object({}) { |entry, h| h[entry[:pid]] = entry[:worker_id] }
|
|
128
|
+
while pid_to_worker.any?
|
|
129
|
+
pid, st = Process.wait2(-1)
|
|
130
|
+
worker_id = pid_to_worker.delete(pid)
|
|
131
|
+
next unless worker_id
|
|
132
|
+
|
|
133
|
+
if st.success?
|
|
134
|
+
ok += 1
|
|
135
|
+
else
|
|
136
|
+
fail += 1
|
|
137
|
+
reclaimed = store.reclaim!(worker_id: worker_id)
|
|
138
|
+
Polyrun::Log.warn "polyrun run-queue: reclaimed #{reclaimed} path(s) from #{worker_id}" if reclaimed.positive?
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
{ok: ok, fail: fail}
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|