PyPI - cfdnalab - Versions diffs - 0.1.0__tar.gz - Mend

cfdnalab 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

cfdnalab-0.1.0/.gitignore +50 -0
cfdnalab-0.1.0/CHANGELOG +7 -0
cfdnalab-0.1.0/LICENSE +21 -0
cfdnalab-0.1.0/PKG-INFO +186 -0
cfdnalab-0.1.0/README.md +161 -0
cfdnalab-0.1.0/pyproject.toml +58 -0
cfdnalab-0.1.0/src/cfdnalab/__init__.py +23 -0
cfdnalab-0.1.0/src/cfdnalab/ends.py +1150 -0
cfdnalab-0.1.0/src/cfdnalab/midpoints.py +710 -0

cfdnalab-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,50 @@
+# Generated by Cargo
+# will have compiled files and executables
+debug
+target
+# These are backup files generated by rustfmt
+**/*.rs.bk
+# MSVC Windows builds of rustc generate these, which store debugging information
+*.pdb
+# Generated by cargo mutants
+# Contains mutation testing data
+**/mutants.out*/
+# RustRover
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+src/commands/wps_peaks/snyder_code.md
+src/commands/gc_bias/window_streaming_logic.md
+src/commands/prepare_windows/test_to_make.md
+.DS_Store
+website/docs/generated/
+__pycache__/
+.pytest_cache/
+.venv/
+build/
+dist/
+*.egg-info/
+test_scores.md
+add_verification_comment.py
+plans_and_specs/lionheart-main/
+.gwf/
+other_software/
+downstream_tests/tmp/
+.Rproj.user

cfdnalab-0.1.0/CHANGELOG ADDED Viewed

@@ -0,0 +1,7 @@
+# Changelog
+**NOTE**: This is the changelog for the **Python** package that provides output loaders for the main output files from the main `cfDNAlab` command line tool. The changelog for the CLI tool is found [here](https://github.com/BesenbacherLab/cfDNAlab/blob/main/CHANGELOG).
+## py-cfDNAlab 0.1.0
+ - Adds zarr loaders for the outputs of `cfdna midpoints` and `cfdna ends`.

cfdnalab-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Ludvig Renbo Olsen
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

cfdnalab-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,186 @@
+Metadata-Version: 2.4
+Name: cfdnalab
+Version: 0.1.0
+Summary: Python helpers for loading and interacting with cfDNAlab output files
+Project-URL: Repository, https://github.com/BesenbacherLab/cfDNAlab/tree/main/py-cfdnalab
+Project-URL: Issues, https://github.com/BesenbacherLab/cfDNAlab/issues
+Project-URL: Changelog, https://github.com/BesenbacherLab/cfDNAlab/blob/main/py-cfdnalab/CHANGELOG
+Author-email: Ludvig <mail@ludvigolsen.dk>
+License-Expression: MIT
+License-File: LICENSE
+Keywords: cell-free-dna,cfdna,fragmentomics,whole-genome-sequencing
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Requires-Python: >=3.11
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: scipy
+Requires-Dist: zarr<4,>=3
+Provides-Extra: test
+Requires-Dist: pytest; extra == 'test'
+Description-Content-Type: text/markdown
+# cfDNAlab | Python Loaders <img src="https://raw.githubusercontent.com/BesenbacherLab/cfDNAlab/refs/heads/main/cfdnalab_logo_little_guy_172x200_144dpi.png" align="right" height="155" />
+Python helpers for loading [**cfDNAlab**](https://github.com/BesenbacherLab/cfDNAlab) output files.
+This package does not install or run the cfDNAlab command-line tool. The CLI is distributed separately as the Rust `cfdna` binary. Use this Python package after running cfDNAlab to load and analyze output files.
+The first supported output types are midpoint and end-motif Zarr outputs: `<prefix>.midpoint_profiles.zarr` and `<prefix>.end_motifs.zarr`.
+<br>
+## Install
+These instructions only installs the Python loader package. To install the `cfdna` command-line tool, see the [main repository](https://github.com/BesenbacherLab/cfDNAlab).
+Install with pip:
+```bash
+pip install cfdnalab
+```
+Install the current development version from GitHub:
+```bash
+pip install "cfdnalab @ git+https://github.com/BesenbacherLab/cfDNAlab.git#subdirectory=py-cfdnalab"
+```
+<br>
+## Load Midpoint Profiles
+```python
+import cfdnalab as cfl
+midpoints = cfl.read_midpoints("sample.midpoint_profiles.zarr")
+```
+### Inspect Metadata
+```python
+groups = midpoints.groups()
+length_bins = midpoints.length_bins()
+positions = midpoints.positions()
+```
+`groups()` returns `group_idx`, `group_name`, and `eligible_intervals`. `length_bins()` and `positions()` return the corresponding bin indices and half-open bp coordinates.
+### Extract One Profile
+Use `group_idx()` and `length_bin_idx()` when selecting by names or bp lengths:
+```python
+group_idx = midpoints.group_idx("LYL1")
+length_bin_idx = midpoints.length_bin_idx(167)
+profile = midpoints.data_frame_for_profile(
+    group_idx=group_idx,
+    length_bin_idx=length_bin_idx,
+)
+```
+The returned data frame has one row per midpoint position bin.
+### Filter By Eligible Intervals
+```python
+min_intervals = 100
+for _, group in midpoints.groups().iterrows():
+    if group["eligible_intervals"] < min_intervals:
+        continue
+    profile = midpoints.data_frame_for_profile(
+        group_idx=group["group_idx"],
+        length_bin_idx=0,
+    )
+```
+### Extract NumPy Arrays
+```python
+profile = midpoints.array_for_profile(group_idx=0, length_bin_idx=0)
+group_counts = midpoints.array_from_group_idx(group_idx=0)
+length_counts = midpoints.array_from_length_bin(length_bin_idx=0)
+```
+`array()` loads the full 3D count tensor into RAM:
+```python
+counts = midpoints.array()
+```
+Prefer the slice helpers when possible.
+<br>
+## Load End-Motif Counts
+```python
+import cfdnalab as cfl
+ends = cfl.read_end_motifs("sample.end_motifs.zarr")
+```
+### Storage Mode - Sparse or Dense
+Start by checking whether the counts were stored as a dense matrix or sparse COO arrays.
+```python
+ends.storage_mode()
+```
+For sparse output, `sparse_coo_data_frame()` is usually the easiest way to inspect or plot the non-zero motif counts. Use `sparse_coo()` or the sparse slice helpers when you want SciPy sparse matrices. Dense helpers require `allow_densify=True` on sparse stores so large sparse outputs are not accidentally expanded in memory.
+For dense output, the `dense_data_frame*()` methods are usually the most convenient starting point. Use `dense_counts_zarr_array()` when you want the on-disk Zarr array and `dense_counts_matrix()` when you want the full NumPy matrix in memory.
+`sparse_coo_data_frame()` is only available for sparse output.
+### Inspect End-Motif Metadata
+```python
+motifs = ends.motif_metadata()
+ends.has_motif("_AA")
+```
+`read_end_motifs()` returns a mode-specific object.
+- Windowed output has `windows()`.
+- Grouped output has `groups()` and `group_idx()`.
+- Global output has `dense_counts_vec()` and `dense_data_frame()`.
+### Extract End-Motif Counts
+```python
+motif_idx = ends.motif_idx("_AA")
+motif_counts = ends.dense_data_frame_for_motif_idx(motif_idx)
+```
+Sparse output stays sparse unless you ask for dense arrays:
+```python
+sparse_counts = ends.sparse_coo()
+sparse_payload = ends.sparse_coo_data_frame()
+motif_array = ends.dense_counts_for_motif("_AA", allow_densify=True)
+```
+For dense windowed output:
+```python
+windows = ends.windows()
+window_counts = ends.dense_data_frame_for_window(window_idx=0)
+```
+For dense grouped output:
+```python
+groups = ends.groups()
+group_counts = ends.dense_data_frame_for_group("t-cells")
+```
+For sparse stores, prefer `sparse_coo()`, `sparse_coo_data_frame()`, and the sparse slice helpers when working with large end-motif outputs. Use `allow_densify=True` only when the dense result is small enough to fit comfortably in memory.

cfdnalab-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,161 @@
+# cfDNAlab | Python Loaders <img src="https://raw.githubusercontent.com/BesenbacherLab/cfDNAlab/refs/heads/main/cfdnalab_logo_little_guy_172x200_144dpi.png" align="right" height="155" />
+Python helpers for loading [**cfDNAlab**](https://github.com/BesenbacherLab/cfDNAlab) output files.
+This package does not install or run the cfDNAlab command-line tool. The CLI is distributed separately as the Rust `cfdna` binary. Use this Python package after running cfDNAlab to load and analyze output files.
+The first supported output types are midpoint and end-motif Zarr outputs: `<prefix>.midpoint_profiles.zarr` and `<prefix>.end_motifs.zarr`.
+<br>
+## Install
+These instructions only installs the Python loader package. To install the `cfdna` command-line tool, see the [main repository](https://github.com/BesenbacherLab/cfDNAlab).
+Install with pip:
+```bash
+pip install cfdnalab
+```
+Install the current development version from GitHub:
+```bash
+pip install "cfdnalab @ git+https://github.com/BesenbacherLab/cfDNAlab.git#subdirectory=py-cfdnalab"
+```
+<br>
+## Load Midpoint Profiles
+```python
+import cfdnalab as cfl
+midpoints = cfl.read_midpoints("sample.midpoint_profiles.zarr")
+```
+### Inspect Metadata
+```python
+groups = midpoints.groups()
+length_bins = midpoints.length_bins()
+positions = midpoints.positions()
+```
+`groups()` returns `group_idx`, `group_name`, and `eligible_intervals`. `length_bins()` and `positions()` return the corresponding bin indices and half-open bp coordinates.
+### Extract One Profile
+Use `group_idx()` and `length_bin_idx()` when selecting by names or bp lengths:
+```python
+group_idx = midpoints.group_idx("LYL1")
+length_bin_idx = midpoints.length_bin_idx(167)
+profile = midpoints.data_frame_for_profile(
+    group_idx=group_idx,
+    length_bin_idx=length_bin_idx,
+)
+```
+The returned data frame has one row per midpoint position bin.
+### Filter By Eligible Intervals
+```python
+min_intervals = 100
+for _, group in midpoints.groups().iterrows():
+    if group["eligible_intervals"] < min_intervals:
+        continue
+    profile = midpoints.data_frame_for_profile(
+        group_idx=group["group_idx"],
+        length_bin_idx=0,
+    )
+```
+### Extract NumPy Arrays
+```python
+profile = midpoints.array_for_profile(group_idx=0, length_bin_idx=0)
+group_counts = midpoints.array_from_group_idx(group_idx=0)
+length_counts = midpoints.array_from_length_bin(length_bin_idx=0)
+```
+`array()` loads the full 3D count tensor into RAM:
+```python
+counts = midpoints.array()
+```
+Prefer the slice helpers when possible.
+<br>
+## Load End-Motif Counts
+```python
+import cfdnalab as cfl
+ends = cfl.read_end_motifs("sample.end_motifs.zarr")
+```
+### Storage Mode - Sparse or Dense
+Start by checking whether the counts were stored as a dense matrix or sparse COO arrays.
+```python
+ends.storage_mode()
+```
+For sparse output, `sparse_coo_data_frame()` is usually the easiest way to inspect or plot the non-zero motif counts. Use `sparse_coo()` or the sparse slice helpers when you want SciPy sparse matrices. Dense helpers require `allow_densify=True` on sparse stores so large sparse outputs are not accidentally expanded in memory.
+For dense output, the `dense_data_frame*()` methods are usually the most convenient starting point. Use `dense_counts_zarr_array()` when you want the on-disk Zarr array and `dense_counts_matrix()` when you want the full NumPy matrix in memory.
+`sparse_coo_data_frame()` is only available for sparse output.
+### Inspect End-Motif Metadata
+```python
+motifs = ends.motif_metadata()
+ends.has_motif("_AA")
+```
+`read_end_motifs()` returns a mode-specific object.
+- Windowed output has `windows()`.
+- Grouped output has `groups()` and `group_idx()`.
+- Global output has `dense_counts_vec()` and `dense_data_frame()`.
+### Extract End-Motif Counts
+```python
+motif_idx = ends.motif_idx("_AA")
+motif_counts = ends.dense_data_frame_for_motif_idx(motif_idx)
+```
+Sparse output stays sparse unless you ask for dense arrays:
+```python
+sparse_counts = ends.sparse_coo()
+sparse_payload = ends.sparse_coo_data_frame()
+motif_array = ends.dense_counts_for_motif("_AA", allow_densify=True)
+```
+For dense windowed output:
+```python
+windows = ends.windows()
+window_counts = ends.dense_data_frame_for_window(window_idx=0)
+```
+For dense grouped output:
+```python
+groups = ends.groups()
+group_counts = ends.dense_data_frame_for_group("t-cells")
+```
+For sparse stores, prefer `sparse_coo()`, `sparse_coo_data_frame()`, and the sparse slice helpers when working with large end-motif outputs. Use `allow_densify=True` only when the dense result is small enough to fit comfortably in memory.

cfdnalab-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,58 @@
+[project]
+name = "cfdnalab"
+version = "0.1.0"
+description = "Python helpers for loading and interacting with cfDNAlab output files"
+authors = [{name = "Ludvig", email = "mail@ludvigolsen.dk"}]
+readme = "README.md"
+license = "MIT"
+keywords = [
+    "fragmentomics",
+    "cell-free-dna",
+    "cfdna",
+    "whole-genome-sequencing"
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+]
+requires-python = ">=3.11"
+dependencies = [
+    "numpy",
+    "pandas",
+    "scipy",
+    "zarr>=3,<4",
+]
+[project.urls]
+Repository = "https://github.com/BesenbacherLab/cfDNAlab/tree/main/py-cfdnalab"
+Issues = "https://github.com/BesenbacherLab/cfDNAlab/issues"
+Changelog = "https://github.com/BesenbacherLab/cfDNAlab/blob/main/py-cfdnalab/CHANGELOG"
+[project.optional-dependencies]
+test = [
+    "pytest",
+]
+[dependency-groups]
+dev = [
+    "pytest",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/cfdnalab"]
+[tool.hatch.build.targets.sdist]
+include = [
+    "/CHANGELOG",
+    "/LICENSE",
+    "/README.md",
+    "/pyproject.toml",
+    "/src/cfdnalab",
+]

cfdnalab-0.1.0/src/cfdnalab/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Python helpers for loading cfDNAlab output files."""
+from .ends import (
+    EndMotifCounts,
+    GlobalEndMotifCounts,
+    GroupedEndMotifCounts,
+    WindowedEndMotifCounts,
+    read_end_motifs,
+)
+from .midpoints import MidpointProfiles, read_midpoints
+__version__ = "0.1.0"
+__all__ = [
+    "EndMotifCounts",
+    "GlobalEndMotifCounts",
+    "GroupedEndMotifCounts",
+    "MidpointProfiles",
+    "WindowedEndMotifCounts",
+    "__version__",
+    "read_end_motifs",
+    "read_midpoints",
+]