seqpro 0.19.0__tar.gz → 0.20.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/publish.yaml +47 -1
- {seqpro-0.19.0 → seqpro-0.20.0}/CHANGELOG.md +22 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/CLAUDE.md +6 -1
- {seqpro-0.19.0 → seqpro-0.20.0}/PKG-INFO +1 -1
- seqpro-0.20.0/crates/seqpro-core/Cargo.toml +20 -0
- seqpro-0.20.0/docs/superpowers/plans/2026-06-24-ragged-getitem-fastpaths.md +914 -0
- seqpro-0.20.0/docs/superpowers/specs/2026-06-24-ragged-getitem-fastpaths-design.md +258 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/pyproject.toml +1 -1
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/_core.py +226 -33
- {seqpro-0.19.0 → seqpro-0.20.0}/src/lib.rs +53 -38
- {seqpro-0.19.0 → seqpro-0.20.0}/src/translate.rs +150 -113
- seqpro-0.20.0/tests/bench_ragged_getitem.py +52 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_rag_to_packed.py +6 -6
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_core.py +7 -5
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_record_indexing.py +12 -3
- seqpro-0.20.0/tests/test_ragged_slice_fastpath.py +221 -0
- seqpro-0.19.0/crates/seqpro-core/Cargo.toml +0 -14
- {seqpro-0.19.0 → seqpro-0.20.0}/.claude/skills/zensical/SKILL.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.gitattributes +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/bench.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/bump.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/docs.yml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/lint.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/merge.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/release-pipeline.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/release.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.github/workflows/test.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.gitignore +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/.pre-commit-config.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/Cargo.lock +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/Cargo.toml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/LICENSE +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/README.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/benches/bench_tokenize_translate.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/benches/kshuffle.rs +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/benchmarks/bench_ragged_backends.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/benchmarks/bench_to_packed.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/crates/seqpro-core/src/lib.rs +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/crates/seqpro-core/src/ragged.rs +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/api/alphabets.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/api/bed.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/api/gtf.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/api/index.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/api/ragged.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/api/types.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/index.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/ragged.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/roadmap/rust-ragged.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-04-ragged-record-array.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-05-documentation-site.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-05-narwhals-coord-schema.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-05-ragged-zip-and-record-introspection.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-20-kshuffle-optimization.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-20-kshuffle-pooled-buffers-and-k2-fast-path.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-20-kshuffle-wilson-single-pass.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-20-release-pipeline.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-28-translate-lut-validation.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-31-flat-buffer-to-padded.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-05-31-rag-to-packed.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-05-translate-unknown-codon-policy.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-12-tokenize-lut-codspeed.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-18-rust-tokenize-translate.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-19-rust-ragged-core.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-20-rust-ragged-records.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-20-rust-ragged-spec-c.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-21-ragged-throughput-gate.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-21-rust-ragged-consumer-audit.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/plans/2026-06-23-ragged-subclass-getitem.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-04-ragged-record-array-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-05-documentation-site-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-05-narwhals-coord-schema-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-05-ragged-zip-and-record-introspection-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-20-kshuffle-optimization-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-20-kshuffle-pooled-buffers-and-k2-fast-path-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-20-kshuffle-wilson-single-pass-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-20-release-pipeline-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-28-translate-lut-and-validation-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-31-flat-buffer-to-padded-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-05-31-rag-to-packed-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-05-translate-unknown-codon-policy-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-12-tokenize-lut-codspeed-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-18-rust-tokenize-translate-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-19-rust-ragged-core-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-20-rust-ragged-nested-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-20-rust-ragged-records-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-21-ragged-throughput-gate-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-21-rust-ragged-audit-ledger.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-21-rust-ragged-consumer-audit-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/docs/superpowers/specs/2026-06-23-ragged-subclass-getitem-design.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/meta.yaml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/pixi.lock +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/pixi.toml +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/__init__.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_analyzers.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_cleaners.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_coords.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_encoders.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_modifiers.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_numba.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_types.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/_utils.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/alphabets/__init__.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/alphabets/_alphabets.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/bed.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/experimental/_experimental.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/experimental/_visualizers.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/gtf.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/py.typed +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/__init__.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/_ak_interop.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/_ingest.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/_layout.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/_ops.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/_types.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/rag/_utils.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/transforms/__init__.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/transforms/augmentation.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/transforms/tmm.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/python/seqpro/xr/__init__.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/scratch_bench_rc.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/scratch_bench_to_padded.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/skills/seqpro/SKILL.md +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/src/kmer_encode.rs +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/src/kshuffle.rs +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/src/kshuffle_ref.rs +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/src/ragged.rs +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/_shape_fixtures.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/bed/test_pyranges.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/bed/test_read.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/bed/test_sort.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/bed/test_with_length.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/bench_translate_lut.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/conftest.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_analyzers.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_bench_tokenize.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_concatenate.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_coords.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_core_ragged_surface.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_encoders.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ingest.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_modifiers.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ohe.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_core_records.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_nested_consumers.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_nested_diff.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_rc.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_subclass.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_ragged_to_padded.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_shape_matrix.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_tokenize.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_transforms.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_translate.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/tests/test_translate_rust.py +0 -0
- {seqpro-0.19.0 → seqpro-0.20.0}/zensical.toml +0 -0
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
# conditions below. Re-apply the workflow_call inputs (`dry_run`), delete
|
|
10
10
|
# the `on.workflow_run` block, remove all `if: ${{ github.event_name ... }}`
|
|
11
11
|
# guards, and wrap the `uv publish` step with the dry_run gate before
|
|
12
|
-
# committing.
|
|
12
|
+
# committing. Also re-add the hand-written `crates-io` job at the bottom
|
|
13
|
+
# (maturin does not generate it) — it publishes the pyo3-free seqpro-core
|
|
14
|
+
# crate to crates.io on a version bump.
|
|
13
15
|
name: Publish
|
|
14
16
|
|
|
15
17
|
on:
|
|
@@ -268,3 +270,47 @@ jobs:
|
|
|
268
270
|
echo
|
|
269
271
|
ls -1 wheels-*/* | sed 's/^/- /'
|
|
270
272
|
} >> "$GITHUB_STEP_SUMMARY"
|
|
273
|
+
|
|
274
|
+
# Publish the pyo3-free seqpro-core crate to crates.io. Bump-detected and
|
|
275
|
+
# idempotent: it reads the version from crates/seqpro-core/Cargo.toml and
|
|
276
|
+
# publishes only when that version is not already on crates.io, so an
|
|
277
|
+
# unchanged version is a no-op (success), and bumping the crate's version is
|
|
278
|
+
# what triggers a release. Independent of the Python wheel/PyPI publish above.
|
|
279
|
+
crates-io:
|
|
280
|
+
name: Publish seqpro-core to crates.io
|
|
281
|
+
runs-on: ubuntu-latest
|
|
282
|
+
steps:
|
|
283
|
+
- uses: actions/checkout@v6
|
|
284
|
+
with:
|
|
285
|
+
ref: ${{ inputs.tag }}
|
|
286
|
+
- name: Install Rust
|
|
287
|
+
uses: dtolnay/rust-toolchain@stable
|
|
288
|
+
- name: Detect seqpro-core version and whether it is already published
|
|
289
|
+
id: ver
|
|
290
|
+
run: |
|
|
291
|
+
version=$(grep -m1 '^version' crates/seqpro-core/Cargo.toml \
|
|
292
|
+
| sed -E 's/version *= *"([^"]+)".*/\1/')
|
|
293
|
+
if [ -z "$version" ]; then
|
|
294
|
+
echo "::error::could not read seqpro-core version from crates/seqpro-core/Cargo.toml"
|
|
295
|
+
exit 1
|
|
296
|
+
fi
|
|
297
|
+
echo "version=$version" >> "$GITHUB_OUTPUT"
|
|
298
|
+
# crates.io requires a descriptive User-Agent or it returns 403.
|
|
299
|
+
code=$(curl -s -o /dev/null -w '%{http_code}' \
|
|
300
|
+
-H 'User-Agent: seqpro-release-pipeline (https://github.com/ML4GLand/SeqPro)' \
|
|
301
|
+
"https://crates.io/api/v1/crates/seqpro-core/$version")
|
|
302
|
+
if [ "$code" = "200" ]; then
|
|
303
|
+
echo "published=true" >> "$GITHUB_OUTPUT"
|
|
304
|
+
echo "seqpro-core $version is already on crates.io — no bump detected, skipping." >> "$GITHUB_STEP_SUMMARY"
|
|
305
|
+
else
|
|
306
|
+
echo "published=false" >> "$GITHUB_OUTPUT"
|
|
307
|
+
echo "seqpro-core $version not found on crates.io (HTTP $code) — will publish." >> "$GITHUB_STEP_SUMMARY"
|
|
308
|
+
fi
|
|
309
|
+
- name: Dry-run publish
|
|
310
|
+
if: inputs.dry_run == true && steps.ver.outputs.published == 'false'
|
|
311
|
+
run: cargo publish -p seqpro-core --dry-run
|
|
312
|
+
- name: Publish to crates.io
|
|
313
|
+
if: inputs.dry_run == false && steps.ver.outputs.published == 'false'
|
|
314
|
+
env:
|
|
315
|
+
CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO }}
|
|
316
|
+
run: cargo publish -p seqpro-core
|
|
@@ -1,3 +1,25 @@
|
|
|
1
|
+
## 0.20.0 (2026-06-25)
|
|
2
|
+
|
|
3
|
+
### Feat
|
|
4
|
+
|
|
5
|
+
- **rag**: to_numpy(validate=False) skips the uniformity scan
|
|
6
|
+
- **rag**: contiguous-slice fast path for record R=2 getitem
|
|
7
|
+
- **rag**: contiguous-slice fast path for record R=1 getitem
|
|
8
|
+
- **rag**: contiguous-slice fast path for opaque-string getitem
|
|
9
|
+
- **rag**: contiguous-slice fast path for R=2 getitem
|
|
10
|
+
- **rag**: contiguous-slice fast path for R=1 getitem
|
|
11
|
+
|
|
12
|
+
### Fix
|
|
13
|
+
|
|
14
|
+
- **rag**: preserve middle fixed dim in record R=2 slice shape-tail
|
|
15
|
+
|
|
16
|
+
### Perf
|
|
17
|
+
|
|
18
|
+
- **rust**: release the GIL during PyO3 kernel compute
|
|
19
|
+
- **rag**: lean from_offsets — elide ascontiguousarray, gate size-check behind validate
|
|
20
|
+
|
|
21
|
+
## 0.19.1 (2026-06-23)
|
|
22
|
+
|
|
1
23
|
## 0.19.0 (2026-06-23)
|
|
2
24
|
|
|
3
25
|
### Feat
|
|
@@ -60,7 +60,7 @@ Most functions require callers to specify `length_axis` (which axis is sequence
|
|
|
60
60
|
|
|
61
61
|
### Rust extension (`src/`)
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
Several modules are compiled and registered in the `seqpro` PyO3 module (`lib.rs`): `kshuffle.rs` (k-mer shuffle, `_k_shuffle`, called by `_modifiers.k_shuffle`), `translate.rs` (codon translation, the `_translate_*` functions), and `ragged.rs` (re-exports `seqpro-core::ragged` for the `_ragged_*` functions). `kmer_encode.rs` provides a rolling k-mer integer encoder used internally. Kernels expect contiguous arrays (e.g. k-shuffle takes a `uint8` ndarray with the last dimension as sequence length) and release the GIL via `py.detach()` during compute.
|
|
64
64
|
|
|
65
65
|
### Ragged arrays (`rag/`)
|
|
66
66
|
|
|
@@ -74,6 +74,11 @@ Only `kshuffle.rs` is compiled. It's exposed as `seqpro._k_shuffle` (called by `
|
|
|
74
74
|
- Conventional commits are enforced — use `feat:`, `fix:`, `ci:`, `bump:`, `refactor:`, `docs:`, etc. prefixes.
|
|
75
75
|
- **Validation is opt-in and front-loaded.** Add fast-fail/input validation via a `validate=` flag (or equivalent single opt-in), not per-feature `error` modes. There must be one obvious way to ask "is this input clean?" — don't duplicate the check across parameters.
|
|
76
76
|
- **No naive NumPy in hot paths.** Never use raw Python loops or naive NumPy (e.g. per-segment `np.concatenate`, Python `for` over sequences) where a Numba kernel is faster and leaner — unless the NumPy version is *verifiably* comparable in time and memory. When Numba is a poor fit (graph algorithms like k-shuffle), use the Rust/PyO3 extension (`src/`).
|
|
77
|
+
- **PyO3 perf tips for `src/` boundary code** (per the [PyO3 performance guide](https://github.com/PyO3/pyo3/blob/main/guide/src/performance.md)). When writing or editing a `#[pyfunction]`:
|
|
78
|
+
- **Detach the interpreter during compute.** Any kernel doing real work (>~1ms, especially rayon-parallel ones) must run inside `py.detach(|| ...)`. Pattern: take slices/views (`as_slice`/`as_array`) and do all Python-touching work *while attached*, run the compute in `detach` capturing only `Ungil` slices/views (never a `PyReadonlyArray`/`Bound`/`Py`), then `into_pyarray(py)` *after* re-attaching. Add a `py: Python` param if the function lacks one.
|
|
79
|
+
- Use the existing `py` token / `Bound::py()` — never `Python::attach` when a token is already in scope.
|
|
80
|
+
- Prefer `cast::<T>()` over `extract::<T>()` for *native* Python types (`PyList`/`PyTuple`/etc.). Does not apply to numpy `PyReadonlyArray` extraction.
|
|
81
|
+
- Pass Rust tuples (not `Bound<PyTuple>`) when calling back into Python, to hit the faster `vectorcall` path.
|
|
77
82
|
|
|
78
83
|
## Skills
|
|
79
84
|
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "seqpro-core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
description = "pyo3-free Rust kernels for seqpro's ragged (offsets+data) sequence arrays."
|
|
6
|
+
license = "MIT"
|
|
7
|
+
repository = "https://github.com/ML4GLand/SeqPro"
|
|
8
|
+
readme = false
|
|
9
|
+
keywords = ["bioinformatics", "genomics", "ragged", "sequences"]
|
|
10
|
+
categories = ["science", "data-structures"]
|
|
11
|
+
|
|
12
|
+
[lib]
|
|
13
|
+
name = "seqpro_core"
|
|
14
|
+
|
|
15
|
+
[dependencies]
|
|
16
|
+
ndarray = { version = "0.17", features = ["rayon"] }
|
|
17
|
+
rayon = "1.11.0"
|
|
18
|
+
|
|
19
|
+
[dev-dependencies]
|
|
20
|
+
proptest = "1.4"
|