lda-ruby 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0b97c33082528d2f992c4686e85e2080746938cd898c2a0662dd357979ad650
4
- data.tar.gz: e4bd8e49f0b3f295b0f3ebfd78fc32db4835f24d93837ad566069b83046538c2
3
+ metadata.gz: a2470a73c1ba2c8807574f34a606a429adbd20d94457990667b59d241b521171
4
+ data.tar.gz: 8c7aa9b952901e15b48b974d68b339ab20e41edfcc07fa391266591b219b5771
5
5
  SHA512:
6
- metadata.gz: cebd90cdaca9d030379105509325aac12f457eb9da75f8e2fc8f2d618d1ecbf201dc6b4407ec28e8f95b1dda4a71ba864b37ea16ad38a22b7605f2b7e281f908
7
- data.tar.gz: fb452cc42435ff9382a39f878e8d7de5e3825e1b800e1cca4c2bec7684294d10fdec960f11cefc3f18a8c6381552e14ff058ecebd50884f91115d65e93880825
6
+ metadata.gz: 3fbe0fe85517e82b63bb8f28358178fd2490e1c4162342efa62864b7a5d83cc81488c83b75443bc8a539319717c588b37a27504722947c361a08636884ab4133
7
+ data.tar.gz: c240216720322fb4a539a63cff5c85a37544fbbad6281bb2b5c89787016f9dab817ab6db8295083e7c218454b23065ec06836e454b24d47e85111696e2e5dd1c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ version 0.5.0
2
+ =============
3
+
4
+ - Expanded Rust-side EM orchestration with cached corpus snapshots, managed corpus sessions, and start-aware fallback paths.
5
+ - Added Rust benchmark guardrails and deeper pure/Rust orchestration parity coverage.
6
+ - Added release-blocking precompiled gem targets for Windows (`x64-mingw-ucrt`) and Linux musl (`x86_64-linux-musl`).
7
+ - Hardened precompiled release workflows with full-matrix CI, post-publish verification, and release failure alerts.
8
+
1
9
  version 0.4.0
2
10
  =============
3
11
 
data/README.md CHANGED
@@ -66,12 +66,15 @@ For an interactive shell with Rust toolchain + bindgen dependencies:
66
66
  - `bundle exec ruby -Ilib:test test/backend_compatibility_test.rb` runs backend compatibility fixtures.
67
67
  - `LDA_RUBY_BACKEND=rust bundle exec ruby -Ilib:test test/backend_compatibility_test.rb` runs parity checks in rust mode.
68
68
  - `./bin/benchmark-backends` benchmarks available backends (`pure`, `native`, `rust`) and prints JSON.
69
+ - `./bin/check-rust-benchmark` enforces the Rust/pure benchmark ratio guardrail (configurable via env vars).
69
70
  - `./bin/docker-test-install-policies` verifies packaged-gem install behavior for `LDA_RUBY_RUST_BUILD=auto|always|never`, including runtime EM smoke checks.
70
71
  - `./bin/test-packaged-gem-fallback` verifies packaged-gem fallback behavior without Cargo (auto/never succeed, always fails) plus runtime smoke checks.
71
72
  - `./bin/test-packaged-gem-rust-enabled` verifies packaged-gem behavior with Cargo available (auto/always enable Rust, never disables Rust) plus runtime smoke checks.
72
73
  - `./bin/test-packaged-gem-manifest` verifies packaged-gem contents/metadata and rejects leaked build artifacts.
73
74
  - `./bin/release-preflight` runs unit tests + packaged-gem validation stack; set `SKIP_DOCKER=1` to skip Docker matrix checks.
74
75
  - `./bin/check-version-sync` verifies version parity between `VERSION.yml`, `lib/lda-ruby/version.rb`, and expected release tag.
76
+ - `./bin/verify-rubygems-api-key` validates that your RubyGems API key can push non-interactively (required for CI release publishes).
77
+ - `./bin/verify-release-publish --tag v0.4.0` verifies published RubyGems + GitHub release assets for a release tag.
75
78
  - `./bin/release-prepare 0.4.0` updates version/changelog files for a new release version.
76
79
  - `./bin/release-artifacts --tag v0.4.0` runs release checks, builds the source gem, and writes SHA256 checksums.
77
80
  - `./bin/release-precompiled-artifacts --tag v0.4.0 --platform x86_64-linux --skip-preflight` builds a precompiled platform gem and verifies install/runtime smoke checks.
@@ -123,7 +126,7 @@ For artifact strategy, compatibility targets, and rollout/deprecation rules, see
123
126
 
124
127
  `em("seeded")` is supported by both native and pure backends for deterministic fixture-oriented runs.
125
128
 
126
- Rust status: the extension hook layer is scaffolded in `ext/lda-ruby-rust`. Current Rust kernels include batched per-iteration corpus inference, batched per-document inference, topic-weights-per-word, topic-term-count accumulation, topic-term normalization/log-beta finalization, gamma-shift convergence reduction, topic-document average log-probability computation, and seeded topic-term initialization when `backend: :rust` is active; remaining model math still delegates to the pure Ruby backend. CI now runs dedicated rust-runtime checks and numeric parity fixtures against the pure backend.
129
+ Rust status: the extension hook layer is scaffolded in `ext/lda-ruby-rust`. Current Rust kernels include batched per-iteration corpus inference, batched per-document inference, topic-weights-per-word, topic-term-count accumulation, topic-term normalization/log-beta finalization, gamma-shift convergence reduction, topic-document average log-probability computation, seeded topic-term initialization, random topic-term initialization, and Rust-side EM orchestration paths (`run_em`, `run_em_with_start`, `run_em_with_start_seed`, session-based `run_em_on_session_with_start_seed`, settings-aware session `run_em_on_session_start`, and unified session-settings orchestration `run_em_on_session`) when `backend: :rust` is active. Session orchestration now uses shared Rust-side corpus storage and borrowed execution paths so EM session runs do not deep-clone corpus arrays per call, and the Ruby adapter now auto-recreates missing Rust sessions before EM to stay on the session path. The Rust backend still keeps the pure Ruby implementation as a compatibility fallback path if Rust orchestration is unavailable or returns invalid output. CI runs dedicated rust-runtime checks and numeric parity fixtures against the pure backend.
127
130
  `compile_rust` and `LDA_RUBY_RUST_BUILD=always` require a Rust toolchain plus Ruby development headers and `libclang`.
128
131
  Gem packaging excludes local Rust build artifacts (`ext/lda-ruby-rust/target/**`) so local cargo outputs do not leak into published gems.
129
132
 
data/VERSION.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  ---
2
2
  :major: 0
3
- :minor: 4
3
+ :minor: 5
4
4
  :patch: 0
5
5
  :build:
@@ -4,22 +4,28 @@ This document is the canonical handoff state for continuing the Ruby 3.2+/3.3+ m
4
4
 
5
5
  ## Snapshot
6
6
 
7
- - Snapshot date: 2026-02-25
8
- - Active branch: `codex/modernization`
9
- - Branch head at snapshot: `7f9b101` (`Fix macOS Rust extension linking for precompiled builds`)
10
- - Repo status at snapshot: clean working tree on `codex/modernization` (in sync with `origin/codex/modernization`)
7
+ - Snapshot date: 2026-03-02
8
+ - Active branch: `codex/rust-orchestration-phase8`
9
+ - Repo status at snapshot start: clean working tree on `codex/rust-orchestration-phase8` (ahead of `master`)
10
+ - Modernization branch: `codex/modernization` merged into `master`
11
+ - Merge commit for modernization PR: `bc11269` (`Merge pull request #18 from ealdent/codex/modernization`)
11
12
  - Latest release dry-run validation (GitHub Actions):
12
- - date: 2026-02-25
13
- - workflow run: `release.yml` run `22382692416` (`workflow_dispatch`, `publish=false`)
13
+ - date: 2026-03-02
14
+ - workflow run: `release.yml` run `22556487788` (`workflow_dispatch`, `publish=false`)
14
15
  - result: success (`validate release candidate`, `build release artifacts`, and all `build precompiled artifacts` matrix targets)
15
16
  - publish jobs skipped by design (`publish=false`)
16
- - Open pull request:
17
- - `codex/modernization` -> `master`
17
+ - Modernization pull request:
18
18
  - PR `#18`: `https://github.com/ealdent/lda-ruby/pull/18`
19
- - Latest PR CI validation (GitHub Actions):
20
- - date: 2026-02-25
21
- - workflow run: `CI` run `22383301379` (trigger: `pull_request` for PR `#18`)
22
- - result: success (all checks green, including `precompiled gem build` matrix and install policy/rust runtime jobs)
19
+ - state: merged (2026-02-25)
20
+ - Release publish attempts (`v0.4.0`):
21
+ - `release.yml` run `22383716372`: failed at RubyGems publish (`No such API key`)
22
+ - `release.yml` run `22383849236`: failed at RubyGems publish (`OTP code required`)
23
+ - rerun attempt (`22383849236`, attempt 2): failed at RubyGems publish (`OTP code required`)
24
+ - rerun attempt (`22383849236`, attempt 3): success (RubyGems publish + GitHub release publish complete)
25
+ - Release status:
26
+ - `v0.4.0` published to RubyGems (source + precompiled Linux/macOS gems)
27
+ - GitHub release `v0.4.0` published with gem + `.sha256` assets
28
+ - next release dry-run matrix is validated for Linux, Linux musl, macOS Intel, macOS Apple Silicon, and Windows targets (`22556487788`)
23
29
 
24
30
  ## Project Goal
25
31
 
@@ -82,15 +88,28 @@ Delivered:
82
88
  - topic-document probability
83
89
  - seeded initialization
84
90
  - trusted kernel-output fast path enabled in rust mode
91
+ - Rust-side EM orchestration path (`Lda::RustBackend.run_em`) retained as a compatibility fallback for precomputed beta-input execution
92
+ - Rust-side start-aware deterministic orchestration path (`Lda::RustBackend.run_em_with_start`) for `seeded`/`deterministic` EM starts
93
+ - Rust-side random-start orchestration path (`Lda::RustBackend.run_em_with_start_seed`) using explicit seed-controlled random initialization (`Lda::RustBackend.random_topic_term_probabilities`)
94
+ - Rust-managed corpus session lifecycle (`Lda::RustBackend.create_corpus_session` / `drop_corpus_session`) with session-based start-aware EM orchestration (`run_em_on_session_with_start_seed`) wired in `Lda::Backends::Rust`
95
+ - Rust-managed session settings lifecycle (`configure_corpus_session`) with settings-aware orchestration (`run_em_on_session_start`) wired in `Lda::Backends::Rust`
96
+ - Rust session execution refactor to shared session corpus storage + borrowed orchestration helpers, eliminating deep corpus clone overhead on each session EM run
97
+ - unified Rust session orchestration API (`run_em_on_session`) that applies settings + runs EM in one call inside Rust session orchestration
98
+ - `Lda::Backends::Rust` now routes cached-corpus EM through managed Rust orchestration (`run_em_on_session_with_corpus`), leaving session reuse/recovery decisions in Rust and preferring that managed path even when no active session id is cached locally; when the managed API is unavailable it still prefers direct Rust non-session orchestration (`run_em_with_start_seed`) before legacy Ruby-side beta-input fallback (`run_em`)
99
+ - direct non-session Rust orchestration now reuses the backend's cached Rust corpus snapshot instead of rebuilding corpus arrays from `@corpus` on each fallback invocation
100
+ - legacy Rust beta-input compatibility fallback now also reuses the backend's cached Rust corpus snapshot, only asking the pure-Ruby backend to synthesize the initial beta matrix
101
+ - Rust managed-session orchestration API (`run_em_on_session_with_corpus`) added to recreate missing sessions and run EM in one Rust call, and now directly falls back to start-aware array execution inside Rust if session-backed execution cannot be used
102
+ - Rust session lifecycle replacement API (`replace_corpus_session`) added so corpus reassignment can update existing Rust sessions in place (config reset + corpus swap) instead of Ruby-side drop/recreate
103
+ - `Lda::Backends::Rust` now keeps session-based orchestration on the managed Rust path (`run_em_on_session_with_corpus`) even when sessions are dropped externally
85
104
  - parity/compatibility test coverage and rust runtime CI
86
105
 
87
106
  Open in Phase 4:
88
107
 
89
- - optional deeper Rust ownership of orchestration logic (current design still intentionally delegates control flow through Ruby fallback scaffolding)
108
+ - optional deeper Rust ownership beyond current unified session orchestration (for example additional control-plane logic and lifecycle APIs)
90
109
 
91
110
  ### Phase 5 (packaging/release)
92
111
 
93
- Status: Phase 5A complete (source-gem release automation), Phase 5B complete for initial Linux/macOS precompiled gems.
112
+ Status: Phase 5A complete (source-gem release automation), Phase 5B complete for Linux/macOS/Windows/musl precompiled release matrix.
94
113
 
95
114
  Delivered:
96
115
 
@@ -101,25 +120,41 @@ Delivered:
101
120
  - packaged gem manifest/metadata gate (`bin/test-packaged-gem-manifest`)
102
121
  - single-command local gate (`bin/release-preflight`)
103
122
  - version/tag parity guard (`bin/check-version-sync`)
123
+ - RubyGems CI credential preflight helper (`bin/verify-rubygems-api-key`)
124
+ - post-publish artifact verification helper (`bin/verify-release-publish`)
104
125
  - deterministic release preparation helper (`bin/release-prepare`)
105
126
  - release artifact builder with checksum output (`bin/release-artifacts`)
106
127
  - precompiled artifact builder + runtime validator (`bin/release-precompiled-artifacts`)
107
128
  - gemspec precompiled variant support (`LDA_RUBY_GEM_VARIANT=precompiled`)
108
129
  - precompiled platform compatibility/publish policy (`docs/precompiled-platform-policy.md`)
130
+ - precompiled target expansion tracker (`docs/precompiled-target-evaluation.md`)
109
131
  - macOS Rust build linker guardrail (`dynamic_lookup`) for precompiled packaging paths
110
132
  - tag-driven release workflow (`.github/workflows/release.yml`)
133
+ - release failure alert workflow (`.github/workflows/release-failure-alert.yml`)
134
+ - tuned to create issues only for failed tag-triggered release runs, with failed job links in alert body
135
+ - now auto-closes matching alert issues when the same release run later reports success
111
136
  - maintainer release runbook (`docs/release-runbook.md`)
137
+ - manual precompiled candidate workflow now validated on both lanes (`precompiled-candidate-evaluation.yml` run `22556129503`)
138
+ - manual precompiled candidate workflow now validated with runtime checks on both lanes (`precompiled-candidate-evaluation.yml` run `22556206925`)
139
+ - Windows candidate portability hardening across native + Rust build paths:
140
+ - C portability fixes (`cokus` macros, `time_t`, `_mkdir`)
141
+ - Rust toolchain alignment to GNU target for RubyInstaller (`x64-mingw-ucrt`)
142
+ - Rust bindgen header/sysroot compatibility wiring for Windows runners
143
+ - dual Rust DLL artifact-name staging support (`lda_ruby_rust.dll` and `liblda_ruby_rust.dll`)
112
144
  - CI jobs for packaged-gem fallback, rust-enabled checks, and manifest checks
113
- - CI precompiled gem build guardrail job (`precompiled-gem-build`)
145
+ - CI precompiled gem build guardrail job (`precompiled-gem-build`) aligned to the full release-blocking precompiled matrix
146
+ - macOS precompiled CI/release lanes now pin Homebrew `llvm@18` (with fallback to `llvm`) and export `LIBCLANG_PATH` from the selected prefix to avoid bindgen breakage from Homebrew LLVM drift
114
147
  - release workflow matrix for precompiled gems:
115
148
  - `x86_64-linux`
149
+ - `x86_64-linux-musl`
116
150
  - `x86_64-darwin`
117
151
  - `arm64-darwin`
152
+ - `x64-mingw-ucrt`
153
+ - release dry-run validation for expanded precompiled matrix (`release.yml` run `22556487788`)
118
154
 
119
155
  Open in Phase 5:
120
156
 
121
- - optional expansion of precompiled targets (for example Windows and/or musl Linux)
122
- - tighter post-publish verification/alerting for multi-artifact release runs
157
+ - optional expansion of precompiled targets beyond current Linux/macOS/Windows/musl set
123
158
 
124
159
  ## Validation Commands
125
160
 
@@ -131,6 +166,8 @@ Core:
131
166
  Packaging/release checks:
132
167
 
133
168
  - `./bin/check-version-sync`
169
+ - `./bin/verify-rubygems-api-key`
170
+ - `./bin/verify-release-publish --tag v0.4.0`
134
171
  - `./bin/test-packaged-gem-manifest`
135
172
  - `./bin/test-packaged-gem-fallback`
136
173
  - `./bin/test-packaged-gem-rust-enabled`
@@ -145,6 +182,9 @@ Optional full Docker matrix:
145
182
  Performance tracking:
146
183
 
147
184
  - `./bin/benchmark-backends`
185
+ - `./bin/check-rust-benchmark`
186
+ - `docs/rust-orchestration-guardrails.md`
187
+ - CI currently enforces `BENCH_RUST_TO_PURE_MAX_RATIO=0.045` in `benchmark-guardrail`
148
188
 
149
189
  ## CI Jobs Expected
150
190
 
@@ -157,34 +197,37 @@ Performance tracking:
157
197
  - packaged gem manifest checks (`packaged-gem-manifest`)
158
198
  - precompiled gem build checks (`precompiled-gem-build`)
159
199
  - rust scaffold check (`rust-scaffold`)
200
+ - benchmark guardrail check (`benchmark-guardrail`)
160
201
  - release validation/build/publish pipeline on `v*` tags (`release.yml`)
202
+ - post-publish artifact verification (`verify_published_artifacts` in `release.yml`)
203
+ - release-failure issue alerting (`release-failure-alert`)
161
204
 
162
205
  ## Remaining Work Queue
163
206
 
164
207
  Priority 1:
165
208
 
166
- - decide whether to keep current hybrid rust-kernel architecture or move more orchestration into Rust
167
- - if moving deeper into Rust, define parity guardrails and benchmark thresholds before refactors
209
+ - continue periodic Rust-orchestration benchmark guardrail tightening (`docs/rust-orchestration-guardrails.md`) as performance data remains stable
210
+ - keep hybrid backend compatibility guarantees (Rust + native + pure fallback) while extending Rust orchestration only behind parity/guardrail checks
168
211
 
169
212
  Priority 2:
170
213
 
171
- - evaluate additional precompiled targets (Windows and/or musl Linux)
172
- - add explicit post-publish verification checks for all uploaded release artifacts
214
+ - monitor expanded precompiled release lanes (Windows + musl Linux) now that release dry-run matrix validation is green in run `22556487788`
215
+ - evaluate any additional precompiled targets beyond current release-blocking set using `docs/precompiled-target-evaluation.md`
173
216
 
174
217
  Priority 3:
175
218
 
176
- - define automated alerts/notifications for release artifact publish failures
219
+ - monitor release-failure auto-alert and auto-close behavior (`.github/workflows/release-failure-alert.yml`) and adjust signal/noise as release cadence grows
177
220
 
178
221
  ## Resume Instructions For A New Conversation
179
222
 
180
- 1. Check out `codex/modernization`.
223
+ 1. Check out `master`.
181
224
  2. Open this file first: `docs/modernization-handoff.md`.
182
225
  3. Run `SKIP_DOCKER=1 ./bin/release-preflight`.
183
226
  4. Review `docs/release-runbook.md` for release flow/rollback details.
184
227
  5. Validate precompiled packaging locally for your host:
185
228
  - `./bin/release-precompiled-artifacts --tag "$(./bin/check-version-sync --print-tag)" --skip-preflight`
186
- 6. Continue with `Priority 1` items under "Remaining Work Queue".
229
+ 6. Continue with remaining modernization queue (`Priority 1` then `Priority 2/3`).
187
230
 
188
231
  If you want the next assistant to continue immediately, use:
189
232
 
190
- "Open `docs/modernization-handoff.md`, validate with `SKIP_DOCKER=1 ./bin/release-preflight`, run `./bin/release-precompiled-artifacts --skip-preflight`, and continue the remaining modernization queue."
233
+ "Open `docs/modernization-handoff.md`, validate with `SKIP_DOCKER=1 ./bin/release-preflight`, run `./bin/release-precompiled-artifacts --skip-preflight`, and continue the `Priority 1/2/3` modernization queue."
@@ -55,9 +55,25 @@ Completed in `codex/experiment-ruby3-modernization`:
55
55
  - Rust runtime CI job added (compile + execute rust backend tests).
56
56
  - Rust/Pure numeric parity fixtures added for deterministic seeded runs.
57
57
  - `compile_rust` now stages a Ruby-loadable extension artifact to avoid `Init_` symbol mismatch from Cargo's `lib*` output naming.
58
+ - Rust-side EM orchestration path added (`Lda::RustBackend.run_em`) and retained as legacy compatibility fallback for precomputed beta-input execution.
59
+ - Rust-side deterministic-start orchestration path added (`Lda::RustBackend.run_em_with_start`) so `seeded`/`deterministic` startup can stay in Rust.
60
+ - Rust-side seed-controlled random-start orchestration path added (`Lda::RustBackend.run_em_with_start_seed` + `random_topic_term_probabilities`) so random initialization can stay in Rust while preserving deterministic replay from an explicit seed.
61
+ - Rust-side corpus session lifecycle added (`create_corpus_session`/`drop_corpus_session`) and `Lda::Backends::Rust` now prefers session-based EM orchestration (`run_em_on_session_with_start_seed`) before array-based fallback paths.
62
+ - Rust-side session settings lifecycle added (`configure_corpus_session`) and `Lda::Backends::Rust` now prefers settings-aware session orchestration (`run_em_on_session_start`) before parameter-heavy session and array-based fallbacks.
63
+ - Rust session orchestration now runs on shared Rust-side corpus session data via borrowed execution helpers, avoiding deep corpus array cloning on each session EM call.
64
+ - Unified Rust session API added (`run_em_on_session`) to apply settings and execute EM in one call inside Rust session orchestration.
65
+ - `Lda::Backends::Rust` now prefers direct Rust non-session orchestration (`run_em_with_start_seed`) before legacy `run_em(initial_beta, ...)` compatibility fallback when a session path is unavailable.
66
+ - Direct non-session Rust orchestration now reuses the backend's cached Rust corpus snapshot instead of rebuilding corpus arrays from `@corpus` on each fallback invocation.
67
+ - Rust managed-session orchestration API added (`run_em_on_session_with_corpus`) to recreate missing sessions and execute EM in one Rust call.
68
+ - Rust session lifecycle replacement API added (`replace_corpus_session`) so corpus reassignment can update existing Rust sessions in place (config reset + corpus swap) instead of Ruby-side drop/recreate.
69
+ - `Lda::Backends::Rust` now routes cached-corpus EM through `run_em_on_session_with_corpus`, leaving session reuse/recovery decisions in Rust, preferring that managed path even when no active session id is cached locally, and reducing Ruby-side fallback branching when sessions are externally dropped.
70
+ - `run_em_on_session_with_corpus` now acts as a unified Rust managed-corpus entrypoint: it attempts session-backed execution first, then falls back to direct start-aware array execution inside Rust when a managed session cannot be used.
71
+ - Legacy `run_em(initial_beta, ...)` compatibility fallback now reuses the Rust backend's cached corpus snapshot and only relies on the pure-Ruby backend to synthesize the initial beta matrix.
58
72
  - Dockerized rust runtime workflow added for local parity with CI (`Dockerfile.rust`, `bin/docker-test-rust`).
59
73
  - Gem packaging now excludes local Rust cargo build artifacts (`target/**`) for clean release builds.
60
74
  - Backend benchmark driver added (`bin/benchmark-backends`) to track pure/native/rust runtime deltas.
75
+ - Rust orchestration guardrail policy documented (`docs/rust-orchestration-guardrails.md`) with benchmark threshold checker (`bin/check-rust-benchmark`).
76
+ - CI benchmark guardrail job added (`benchmark-guardrail`) to enforce Rust/pure runtime ratio on Ubuntu (currently `BENCH_RUST_TO_PURE_MAX_RATIO=0.045`).
61
77
  - Source install path now has explicit Rust build policy via `LDA_RUBY_RUST_BUILD=auto|always|never`.
62
78
  - Docker install-policy matrix script added (`bin/docker-test-install-policies`) to verify source install behavior across environments.
63
79
  - CI now runs install-policy matrix checks on Ubuntu.
@@ -71,7 +87,12 @@ Completed in `codex/experiment-ruby3-modernization`:
71
87
  - Release artifact helper added (`bin/release-artifacts`) to build source gem artifacts with SHA256 checksums.
72
88
  - Precompiled platform artifact helper added (`bin/release-precompiled-artifacts`) to build + validate native gems.
73
89
  - Tag-driven release workflow added (`.github/workflows/release.yml`) with dry-run support and environment-gated publish jobs.
74
- - CI precompiled guardrail job added (`precompiled-gem-build`) for Linux/macOS packaging checks.
90
+ - RubyGems credential preflight helper added (`bin/verify-rubygems-api-key`) for CI-safe publish key validation.
91
+ - Post-publish verification helper added (`bin/verify-release-publish`) to validate RubyGems + GitHub release artifacts by tag.
92
+ - CI precompiled guardrail job added (`precompiled-gem-build`) for full release-blocking platform packaging checks (Linux, Linux musl, macOS Intel, macOS Apple Silicon, Windows).
93
+ - macOS precompiled CI/release lanes now pin Homebrew `llvm@18` (with fallback to `llvm`) and export `LIBCLANG_PATH` from the selected prefix to avoid bindgen breakage from Homebrew LLVM drift.
94
+ - Release workflow post-publish verification job added (`verify_published_artifacts`).
95
+ - Release failure alert workflow added (`.github/workflows/release-failure-alert.yml`) to open issue alerts for failed tag-triggered `release.yml` runs and auto-close matching alerts when reruns succeed.
75
96
  - Maintainer release runbook added (`docs/release-runbook.md`) with publish and rollback/yank procedures.
76
97
  - Precompiled platform support policy added (`docs/precompiled-platform-policy.md`).
77
98
 
@@ -107,7 +128,7 @@ For an up-to-date resume snapshot (phase status + exact remaining queue), see `d
107
128
 
108
129
  - Phase 5A (source-gem release automation): complete.
109
130
  - Keep source build path available.
110
- - Phase 5B (precompiled/native gem publishing): complete for initial Linux/macOS targets via `bin/release-precompiled-artifacts` and release workflow matrix builds.
131
+ - Phase 5B (precompiled/native gem publishing): complete for Linux/macOS/Windows/musl release matrix targets via `bin/release-precompiled-artifacts` and release workflow matrix builds.
111
132
 
112
133
  ## Tooling suggestions
113
134
 
@@ -11,6 +11,8 @@ Each release version publishes a split package set:
11
11
  - `lda-ruby-<version>-x86_64-linux.gem`
12
12
  - `lda-ruby-<version>-x86_64-darwin.gem`
13
13
  - `lda-ruby-<version>-arm64-darwin.gem`
14
+ - `lda-ruby-<version>-x64-mingw-ucrt.gem`
15
+ - `lda-ruby-<version>-x86_64-linux-musl.gem`
14
16
 
15
17
  The source gem remains the universal fallback. Platform gems are additive and are expected to install without local build tools.
16
18
  Precompiled artifacts are built on matching host runners (no cross-compilation in current workflow).
@@ -20,8 +22,10 @@ Precompiled artifacts are built on matching host runners (no cross-compilation i
20
22
  - Supported Ruby versions: 3.2 and 3.3 (plus future versions validated by CI).
21
23
  - Release-blocking precompiled targets:
22
24
  - Linux `x86_64-linux`
25
+ - Linux musl `x86_64-linux-musl`
23
26
  - macOS Intel `x86_64-darwin`
24
27
  - macOS Apple Silicon `arm64-darwin`
28
+ - Windows `x64-mingw-ucrt`
25
29
  - Other platforms:
26
30
  - Install from source gem.
27
31
  - Runtime remains supported through native/pure fallback paths.
@@ -47,10 +51,18 @@ Release automation requirements:
47
51
  - `.github/workflows/release.yml` builds source + precompiled artifacts.
48
52
  - Release workflow matrix must include all release-blocking precompiled targets.
49
53
  - Publish jobs push all built gems and attach checksums to GitHub releases.
54
+ - Post-publish verification job must validate RubyGems entries and GitHub release assets for the tagged version.
50
55
 
51
56
  Continuous integration guardrail:
52
57
 
53
- - `.github/workflows/ci.yml` runs `release-precompiled-artifacts` for representative Linux/macOS targets on every branch/PR.
58
+ - `.github/workflows/ci.yml` runs `release-precompiled-artifacts` for the full release-blocking precompiled matrix (Linux, Linux musl, macOS Intel, macOS Apple Silicon, Windows) on every branch/PR.
59
+ - macOS precompiled lanes pin Homebrew `llvm@18` (falling back to `llvm` if unavailable) and export `LIBCLANG_PATH` from the selected prefix to keep bindgen stable across Homebrew formula updates.
60
+ - `.github/workflows/precompiled-candidate-evaluation.yml` is used for additional platform candidate checks.
61
+ - `.github/workflows/release.yml` dry-run validates the full release-blocking matrix before publish.
62
+
63
+ Latest release-matrix validation:
64
+
65
+ - [release dry-run 22556487788](https://github.com/ealdent/lda-ruby/actions/runs/22556487788) succeeded for Linux, Linux musl, macOS Intel, macOS Apple Silicon, and Windows targets.
54
66
 
55
67
  ## Rollout / Expansion Rules
56
68
 
@@ -59,7 +71,8 @@ When adding a new precompiled platform:
59
71
  1. Add target to release workflow matrix.
60
72
  2. Add or update CI coverage for that platform family.
61
73
  3. Update this policy and the release runbook support matrix.
62
- 4. Validate a dry-run release with `workflow_dispatch` before shipping.
74
+ 4. Record feasibility evidence and rollout notes in `docs/precompiled-target-evaluation.md`.
75
+ 5. Validate a dry-run release with `workflow_dispatch` before shipping.
63
76
 
64
77
  When deprecating a precompiled platform:
65
78
 
@@ -0,0 +1,67 @@
1
+ # Precompiled Target Evaluation (Priority 2)
2
+
3
+ This document tracks current feasibility for expanding precompiled gem targets beyond the Phase 5B baseline.
4
+
5
+ Current release-blocking precompiled targets:
6
+
7
+ - `x86_64-linux`
8
+ - `x86_64-darwin`
9
+ - `arm64-darwin`
10
+ - `x64-mingw-ucrt`
11
+ - `x86_64-linux-musl`
12
+
13
+ Reference implementation constraints:
14
+
15
+ - `bin/release-precompiled-artifacts` only supports host-matching platform builds (no cross-compilation).
16
+ - Release workflow currently uses matching host runners for each precompiled target.
17
+
18
+ ## Candidate: Windows (`x64-mingw-ucrt`)
19
+
20
+ Status: promoted to release-blocking after release dry-run matrix success.
21
+
22
+ Feasibility notes:
23
+
24
+ - GitHub Actions provides Windows runners, so host-matching builds are possible in principle.
25
+ - Existing release tooling is bash-first and assumes POSIX shell ergonomics throughout.
26
+ - Runtime smoke and packaged-gem checks were validated in candidate runs before promotion.
27
+ - Candidate runs:
28
+ - [run 22555475302](https://github.com/ealdent/lda-ruby/actions/runs/22555475302): failed in native extension compile (`rake compile`) with `cokus.h` macro collision and `time_t` mismatch.
29
+ - [run 22555550326](https://github.com/ealdent/lda-ruby/actions/runs/22555550326): progressed further, failed on `utils.c` `mkdir(name, mode)` mismatch (Windows `_mkdir` required).
30
+ - [run 22556009214](https://github.com/ealdent/lda-ruby/actions/runs/22556009214): Rust bindgen/toolchain parsing fixed; build then failed on Windows DLL name staging expectation.
31
+ - [run 22556129503](https://github.com/ealdent/lda-ruby/actions/runs/22556129503): Windows candidate build + artifact upload succeeded after GNU toolchain alignment, bindgen header/sysroot setup, and dual DLL name staging support.
32
+ - [run 22556206925](https://github.com/ealdent/lda-ruby/actions/runs/22556206925): Windows candidate remained green with packaged-gem runtime smoke checks enabled.
33
+ - [run 22556487788](https://github.com/ealdent/lda-ruby/actions/runs/22556487788): release workflow dry-run succeeded with `windows-x64-mingw-ucrt` included in release matrix.
34
+
35
+ Required validation to promote:
36
+
37
+ 1. Completed: release dry-run matrix validation passed.
38
+
39
+ ## Candidate: musl Linux (`x86_64-linux-musl`)
40
+
41
+ Status: promoted to release-blocking after release dry-run matrix success.
42
+
43
+ Feasibility notes:
44
+
45
+ - Current workflow uses `ubuntu-latest` (glibc), not musl.
46
+ - Current artifact script rejects cross-platform builds, so a musl artifact requires either:
47
+ - a musl-hosted builder, or
48
+ - a dedicated musl-native build container/workflow path treated as host-equivalent for packaging.
49
+ - Local validation signal (2026-03-01): Alpine container dry-run succeeded for host-matching `aarch64-linux-musl` with:
50
+ - `./bin/release-precompiled-artifacts --platform <detected-musl-platform> --skip-preflight --skip-runtime-checks`
51
+ - Candidate workflow runs (2026-03-01):
52
+ - [run 22555475302](https://github.com/ealdent/lda-ruby/actions/runs/22555475302): built `x86_64-linux-musl` successfully but artifact upload path was misconfigured.
53
+ - [run 22555550326](https://github.com/ealdent/lda-ruby/actions/runs/22555550326): musl candidate built and uploaded artifacts successfully with corrected glob path (`pkg/lda-ruby-*-linux-musl.gem*`).
54
+ - [run 22556129503](https://github.com/ealdent/lda-ruby/actions/runs/22556129503): musl candidate build + artifact upload remained green alongside the fixed Windows lane.
55
+ - [run 22556206925](https://github.com/ealdent/lda-ruby/actions/runs/22556206925): musl candidate remained green with packaged-gem runtime smoke checks enabled.
56
+ - [run 22556487788](https://github.com/ealdent/lda-ruby/actions/runs/22556487788): release workflow dry-run succeeded with `linux-musl-x86_64` included in release matrix.
57
+
58
+ Required validation to promote:
59
+
60
+ 1. Completed: release dry-run matrix validation passed.
61
+
62
+ ## Recommendation
63
+
64
+ Current expansion step is complete for Windows and musl. Any additional target should follow the same sequence:
65
+ 1. Add candidate workflow coverage.
66
+ 2. Verify candidate runtime checks.
67
+ 3. Validate one release dry-run with the new matrix lane before promotion.
@@ -2,7 +2,7 @@
2
2
 
3
3
  This runbook defines the maintainer workflow for shipping `lda-ruby` source and precompiled platform gem releases.
4
4
 
5
- Authoritative platform/support policy is maintained in `docs/precompiled-platform-policy.md`.
5
+ Authoritative platform/support policy is maintained in `docs/precompiled-platform-policy.md`; expansion feasibility notes live in `docs/precompiled-target-evaluation.md`.
6
6
 
7
7
  ## Scope
8
8
 
@@ -34,7 +34,7 @@ Authoritative platform/support policy is maintained in `docs/precompiled-platfor
34
34
 
35
35
  GitHub repository secret:
36
36
 
37
- - `RUBYGEMS_API_KEY`: API key with push rights for `lda-ruby`.
37
+ - `RUBYGEMS_API_KEY`: API key with push rights for `lda-ruby` and non-interactive publish support (no OTP prompt during `gem push`).
38
38
 
39
39
  GitHub Actions environment:
40
40
 
@@ -69,7 +69,15 @@ GitHub Actions environment:
69
69
 
70
70
  Note: `release-precompiled-artifacts` only supports building for the current host platform (no cross-compilation).
71
71
 
72
- 5. Commit and merge to `master`.
72
+ 5. Verify RubyGems API key behavior before tagging:
73
+
74
+ ```bash
75
+ ./bin/verify-rubygems-api-key
76
+ ```
77
+
78
+ This check intentionally attempts a duplicate push of an existing gem version. A duplicate-rejected response is expected and confirms non-interactive auth works.
79
+
80
+ 6. Commit and merge to `master`.
73
81
 
74
82
  ## Dry-Run Path (No Publish)
75
83
 
@@ -84,10 +92,16 @@ Behavior:
84
92
 
85
93
  Latest verified dry-run reference:
86
94
 
87
- - date: 2026-02-25
88
- - workflow run: `https://github.com/ealdent/lda-ruby/actions/runs/22382692416`
95
+ - date: 2026-03-02
96
+ - workflow run: `https://github.com/ealdent/lda-ruby/actions/runs/22556487788`
89
97
  - dispatch parameters: `release_tag=v0.4.0`, `publish=false`
90
98
  - result: success across `validate`, `build_artifacts`, and full `build_precompiled_artifacts` matrix
99
+ - verified precompiled lanes:
100
+ - `linux-x86_64`
101
+ - `linux-musl-x86_64`
102
+ - `macos-x86_64`
103
+ - `macos-arm64`
104
+ - `windows-x64-mingw-ucrt`
91
105
 
92
106
  Optional local dry-run equivalent:
93
107
 
@@ -96,6 +110,21 @@ Optional local dry-run equivalent:
96
110
  ./bin/release-precompiled-artifacts --tag v0.4.0 --skip-preflight
97
111
  ```
98
112
 
113
+ Candidate expansion workflow:
114
+
115
+ - For future platform evaluation beyond current release-blocking targets, run `.github/workflows/precompiled-candidate-evaluation.yml` via `workflow_dispatch`.
116
+ - Record outcome artifacts/logs in `docs/precompiled-target-evaluation.md`.
117
+
118
+ ## Known Publish Incident (`v0.4.0`)
119
+
120
+ - date: 2026-02-25
121
+ - release runs:
122
+ - `https://github.com/ealdent/lda-ruby/actions/runs/22383716372`
123
+ - `https://github.com/ealdent/lda-ruby/actions/runs/22383849236` (attempt 1 + rerun attempt 2 + rerun attempt 3)
124
+ - result: artifact build stages passed, `publish to RubyGems` failed with OTP-required auth (`You have enabled multifactor authentication but no OTP code provided.`)
125
+ - recovery action: rotated `release` environment secret `RUBYGEMS_API_KEY` to a CI-safe key and reran run `22383849236`.
126
+ - recovery result: rerun attempt 3 succeeded; RubyGems `0.4.0` and GitHub release `v0.4.0` published.
127
+
99
128
  ## Publish Path (Tag-Driven)
100
129
 
101
130
  1. Ensure the release commit is on `master`.
@@ -111,13 +140,17 @@ Optional local dry-run equivalent:
111
140
  3. Monitor `.github/workflows/release.yml`:
112
141
  - `validate`
113
142
  - `build_artifacts`
114
- - `build_precompiled_artifacts` (linux + macOS matrix)
143
+ - `build_precompiled_artifacts` (linux + linux-musl + macOS + windows matrix)
115
144
  - environment-gated `publish_rubygems`
116
145
  - environment-gated `publish_github_release`
146
+ - `verify_published_artifacts`
147
+ - on failed tag-triggered `release.yml` runs, `.github/workflows/release-failure-alert.yml` opens a triage issue with failed job links
148
+ - if the same release run later succeeds (for example via rerun), the alert issue is auto-closed by `.github/workflows/release-failure-alert.yml`
117
149
  4. Approve the protected `release` environment when prompted.
118
150
  5. Confirm published outputs:
119
151
  - RubyGems shows `lda-ruby` `0.4.0` source gem and platform gems
120
152
  - GitHub release `v0.4.0` exists with all gem and `.sha256` attachments
153
+ - workflow job `verify_published_artifacts` succeeds
121
154
 
122
155
  ## Rollback and Recovery
123
156
 
@@ -151,6 +184,8 @@ If an incorrect gem is published:
151
184
  - `cargo not found` in rust-enabled checks: ensure Rust toolchain is installed or run in Docker.
152
185
  - `libclang` not found while building precompiled gems: install LLVM/libclang and set `LIBCLANG_PATH` if needed.
153
186
  - Linux `Install Rust bindgen dependencies` can take several minutes on fresh runners due apt package index and package installs.
187
+ - RubyGems publish asks for OTP (`You have enabled multi-factor authentication but no OTP code provided`): run `./bin/verify-rubygems-api-key`, then rotate `RUBYGEMS_API_KEY` to a CI-safe key if OTP is requested.
188
+ - Post-publish verification fails: run `./bin/verify-release-publish --tag vX.Y.Z` and fix missing RubyGems entries or GitHub release assets before considering the release complete.
154
189
  - macOS Rust link errors (`symbol(s) not found` for Ruby APIs): ensure build path preserves `-C link-arg=-Wl,-undefined,dynamic_lookup` in `RUSTFLAGS`.
155
190
  - Tag/version mismatch: run `./bin/check-version-sync --tag vX.Y.Z`.
156
191
  - Artifact mismatch during release: rebuild with `./bin/release-artifacts --tag vX.Y.Z`.
@@ -0,0 +1,50 @@
1
+ # Rust Orchestration Guardrails
2
+
3
+ This document defines the minimum parity and performance gates for deeper Rust orchestration refactors.
4
+
5
+ ## Numeric parity guardrails
6
+
7
+ Required tests:
8
+
9
+ - `bundle exec ruby -Ilib:test test/backend_compatibility_test.rb`
10
+ - `bundle exec ruby -Ilib:test test/rust_orchestration_test.rb`
11
+
12
+ Current parity expectations:
13
+
14
+ - Rust vs pure backend fixture parity remains exact within existing tolerances used by tests.
15
+ - Session-based orchestration paths (`run_em_on_session`, `run_em_on_session_with_start_seed`, `run_em_on_session_start`, `run_em_on_session_with_corpus`) must match direct non-session orchestration for equivalent settings/seeds.
16
+ - `Lda::Backends::Rust` cached-corpus EM should prefer the managed Rust session entrypoint (`run_em_on_session_with_corpus`) even when no active session id is cached locally, rather than branching in Ruby between session-only, recovery, and direct paths.
17
+ - `Lda::Backends::Rust` non-session fallback should prefer Rust start-aware orchestration (`run_em_with_start_seed`) before legacy beta-input orchestration (`run_em`).
18
+ - Direct non-session fallback should reuse the backend's cached Rust corpus snapshot rather than rebuilding corpus arrays from `@corpus` for each invocation.
19
+ - Legacy beta-input compatibility fallback should also reuse the backend's cached Rust corpus snapshot rather than rebuilding full EM corpus input in Ruby.
20
+ - Rust backend corpus/session lifecycle must not leak session count across corpus replacement.
21
+ - Missing-session recovery in managed session orchestration (`run_em_on_session_with_corpus`) must recreate a usable session and keep parity with direct orchestration.
22
+ - Managed Rust corpus orchestration (`run_em_on_session_with_corpus`) must keep parity with direct orchestration even when it falls back internally from session-backed execution to start-seeded array execution.
23
+ - Corpus reassignment through Rust session replacement lifecycle (`replace_corpus_session`) must preserve stable session count and route subsequent EM runs over updated corpus data.
24
+ - Unknown start-mode handling in seed-aware Rust orchestration must match Ruby's non-seeded fallback behavior when given the same explicit seed.
25
+
26
+ ## Benchmark guardrail
27
+
28
+ Run:
29
+
30
+ - `./bin/check-rust-benchmark`
31
+
32
+ Default benchmark policy:
33
+
34
+ - `BENCH_RUST_TO_PURE_MAX_RATIO=0.045`
35
+ - i.e., Rust mean runtime must be no worse than 4.5% of pure mean runtime on the benchmark fixture/config.
36
+ - CI benchmark guardrail job enforces the same ratio with `BENCH_RUNS=1` for runtime stability.
37
+ - latest tightening evidence (2026-03-05): local Docker guardrail check with `BENCH_RUNS=3` observed Rust/Pure ratio `0.0368` (`rust=0.0758s`, `pure=2.0569s`), and prior CI streak data on `codex/rust-orchestration-phase8` (`22555725309` .. `22557953998`) observed `[0.0252, 0.0288]`, supporting a tighter `0.045` threshold with headroom.
38
+
39
+ Configurable environment knobs:
40
+
41
+ - `BENCH_RUNS` (default `5`)
42
+ - `BENCH_START` (default `seeded`)
43
+ - `BENCH_TOPICS` (default `8`)
44
+ - `BENCH_MAX_ITER` (default `20`)
45
+ - `BENCH_EM_MAX_ITER` (default `40`)
46
+ - `BENCH_RUST_TO_PURE_MAX_RATIO` (default `0.045`)
47
+
48
+ ## When to tighten thresholds
49
+
50
+ Tighten benchmark thresholds only after collecting multiple stable runs on the same host/environment and updating this document with the new target ratio.
data/ext/lda-ruby/cokus.c CHANGED
@@ -45,14 +45,14 @@
45
45
 
46
46
  #include "cokus.h"
47
47
 
48
- static uint32 state[N+1]; // state vector + 1 extra to not violate ANSI C
48
+ static uint32 state[COKUS_N+1]; // state vector + 1 extra to not violate ANSI C
49
49
  static uint32 *next; // next random value is computed from here
50
50
  static int left = -1; // can *next++ this many times before reloading
51
51
 
52
52
  void seedMT(uint32 seed)
53
53
  {
54
54
  //
55
- // We initialize state[0..(N-1)] via the generator
55
+ // We initialize state[0..(COKUS_N-1)] via the generator
56
56
  //
57
57
  // x_new = (69069 * x_old) mod 2^32
58
58
  //
@@ -100,28 +100,28 @@ void seedMT(uint32 seed)
100
100
  register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state;
101
101
  register int j;
102
102
 
103
- for(left=0, *s++=x, j=N; --j;
103
+ for(left=0, *s++=x, j=COKUS_N; --j;
104
104
  *s++ = (x*=69069U) & 0xFFFFFFFFU);
105
105
  }
106
106
 
107
107
 
108
108
  uint32 reloadMT(void)
109
109
  {
110
- register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1;
110
+ register uint32 *p0=state, *p2=state+2, *pM=state+COKUS_M, s0, s1;
111
111
  register int j;
112
112
 
113
113
  if(left < -1)
114
114
  seedMT(4357U);
115
115
 
116
- left=N-1, next=state+1;
116
+ left=COKUS_N-1, next=state+1;
117
117
 
118
- for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++)
119
- *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
118
+ for(s0=state[0], s1=state[1], j=COKUS_N-COKUS_M+1; --j; s0=s1, s1=*p2++)
119
+ *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? COKUS_K : 0U);
120
120
 
121
- for(pM=state, j=M; --j; s0=s1, s1=*p2++)
122
- *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
121
+ for(pM=state, j=COKUS_M; --j; s0=s1, s1=*p2++)
122
+ *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? COKUS_K : 0U);
123
123
 
124
- s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
124
+ s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? COKUS_K : 0U);
125
125
  s1 ^= (s1 >> 11);
126
126
  s1 ^= (s1 << 7) & 0x9D2C5680U;
127
127
  s1 ^= (s1 << 15) & 0xEFC60000U;
@@ -142,4 +142,3 @@ uint32 randomMT(void)
142
142
  y ^= (y >> 18);
143
143
  return(y);
144
144
  }
145
-