cfdnalab 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ # Generated by Cargo
2
+ # will have compiled files and executables
3
+ debug
4
+ target
5
+
6
+ # These are backup files generated by rustfmt
7
+ **/*.rs.bk
8
+
9
+ # MSVC Windows builds of rustc generate these, which store debugging information
10
+ *.pdb
11
+
12
+ # Generated by cargo mutants
13
+ # Contains mutation testing data
14
+ **/mutants.out*/
15
+
16
+ # RustRover
17
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
18
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
19
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
20
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
21
+ #.idea/
22
+
23
+ src/commands/wps_peaks/snyder_code.md
24
+
25
+ src/commands/gc_bias/window_streaming_logic.md
26
+
27
+ src/commands/prepare_windows/test_to_make.md
28
+
29
+ .DS_Store
30
+
31
+ website/docs/generated/
32
+
33
+ __pycache__/
34
+ .pytest_cache/
35
+ .venv/
36
+ build/
37
+ dist/
38
+ *.egg-info/
39
+
40
+ test_scores.md
41
+
42
+ add_verification_comment.py
43
+
44
+ plans_and_specs/lionheart-main/
45
+ .gwf/
46
+
47
+ other_software/
48
+
49
+ downstream_tests/tmp/
50
+ .Rproj.user
@@ -0,0 +1,7 @@
1
+ # Changelog
2
+
3
+ **NOTE**: This is the changelog for the **Python** package that provides output loaders for the main output files from the main `cfDNAlab` command line tool. The changelog for the CLI tool is found [here](https://github.com/BesenbacherLab/cfDNAlab/blob/main/CHANGELOG).
4
+
5
+ ## py-cfDNAlab 0.1.0
6
+
7
+ - Adds zarr loaders for the outputs of `cfdna midpoints` and `cfdna ends`.
cfdnalab-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ludvig Renbo Olsen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.4
2
+ Name: cfdnalab
3
+ Version: 0.1.0
4
+ Summary: Python helpers for loading and interacting with cfDNAlab output files
5
+ Project-URL: Repository, https://github.com/BesenbacherLab/cfDNAlab/tree/main/py-cfdnalab
6
+ Project-URL: Issues, https://github.com/BesenbacherLab/cfDNAlab/issues
7
+ Project-URL: Changelog, https://github.com/BesenbacherLab/cfDNAlab/blob/main/py-cfdnalab/CHANGELOG
8
+ Author-email: Ludvig <mail@ludvigolsen.dk>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: cell-free-dna,cfdna,fragmentomics,whole-genome-sequencing
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
+ Requires-Python: >=3.11
18
+ Requires-Dist: numpy
19
+ Requires-Dist: pandas
20
+ Requires-Dist: scipy
21
+ Requires-Dist: zarr<4,>=3
22
+ Provides-Extra: test
23
+ Requires-Dist: pytest; extra == 'test'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # cfDNAlab | Python Loaders <img src="https://raw.githubusercontent.com/BesenbacherLab/cfDNAlab/refs/heads/main/cfdnalab_logo_little_guy_172x200_144dpi.png" align="right" height="155" />
27
+
28
+ Python helpers for loading [**cfDNAlab**](https://github.com/BesenbacherLab/cfDNAlab) output files.
29
+
30
+ This package does not install or run the cfDNAlab command-line tool. The CLI is distributed separately as the Rust `cfdna` binary. Use this Python package after running cfDNAlab to load and analyze output files.
31
+
32
+ The first supported output types are midpoint and end-motif Zarr outputs: `<prefix>.midpoint_profiles.zarr` and `<prefix>.end_motifs.zarr`.
33
+
34
+ <br>
35
+
36
+ ## Install
37
+
38
+ These instructions only installs the Python loader package. To install the `cfdna` command-line tool, see the [main repository](https://github.com/BesenbacherLab/cfDNAlab).
39
+
40
+ Install with pip:
41
+
42
+ ```bash
43
+ pip install cfdnalab
44
+ ```
45
+
46
+ Install the current development version from GitHub:
47
+
48
+ ```bash
49
+ pip install "cfdnalab @ git+https://github.com/BesenbacherLab/cfDNAlab.git#subdirectory=py-cfdnalab"
50
+ ```
51
+
52
+ <br>
53
+
54
+ ## Load Midpoint Profiles
55
+
56
+ ```python
57
+ import cfdnalab as cfl
58
+
59
+ midpoints = cfl.read_midpoints("sample.midpoint_profiles.zarr")
60
+ ```
61
+
62
+ ### Inspect Metadata
63
+
64
+ ```python
65
+ groups = midpoints.groups()
66
+ length_bins = midpoints.length_bins()
67
+ positions = midpoints.positions()
68
+ ```
69
+
70
+ `groups()` returns `group_idx`, `group_name`, and `eligible_intervals`. `length_bins()` and `positions()` return the corresponding bin indices and half-open bp coordinates.
71
+
72
+ ### Extract One Profile
73
+
74
+ Use `group_idx()` and `length_bin_idx()` when selecting by names or bp lengths:
75
+
76
+ ```python
77
+ group_idx = midpoints.group_idx("LYL1")
78
+ length_bin_idx = midpoints.length_bin_idx(167)
79
+
80
+ profile = midpoints.data_frame_for_profile(
81
+ group_idx=group_idx,
82
+ length_bin_idx=length_bin_idx,
83
+ )
84
+ ```
85
+
86
+ The returned data frame has one row per midpoint position bin.
87
+
88
+ ### Filter By Eligible Intervals
89
+
90
+ ```python
91
+ min_intervals = 100
92
+
93
+ for _, group in midpoints.groups().iterrows():
94
+ if group["eligible_intervals"] < min_intervals:
95
+ continue
96
+
97
+ profile = midpoints.data_frame_for_profile(
98
+ group_idx=group["group_idx"],
99
+ length_bin_idx=0,
100
+ )
101
+ ```
102
+
103
+ ### Extract NumPy Arrays
104
+
105
+ ```python
106
+ profile = midpoints.array_for_profile(group_idx=0, length_bin_idx=0)
107
+ group_counts = midpoints.array_from_group_idx(group_idx=0)
108
+ length_counts = midpoints.array_from_length_bin(length_bin_idx=0)
109
+ ```
110
+
111
+ `array()` loads the full 3D count tensor into RAM:
112
+
113
+ ```python
114
+ counts = midpoints.array()
115
+ ```
116
+
117
+ Prefer the slice helpers when possible.
118
+
119
+ <br>
120
+
121
+ ## Load End-Motif Counts
122
+
123
+ ```python
124
+ import cfdnalab as cfl
125
+
126
+ ends = cfl.read_end_motifs("sample.end_motifs.zarr")
127
+ ```
128
+
129
+ ### Storage Mode - Sparse or Dense
130
+
131
+ Start by checking whether the counts were stored as a dense matrix or sparse COO arrays.
132
+
133
+ ```python
134
+ ends.storage_mode()
135
+ ```
136
+
137
+ For sparse output, `sparse_coo_data_frame()` is usually the easiest way to inspect or plot the non-zero motif counts. Use `sparse_coo()` or the sparse slice helpers when you want SciPy sparse matrices. Dense helpers require `allow_densify=True` on sparse stores so large sparse outputs are not accidentally expanded in memory.
138
+
139
+ For dense output, the `dense_data_frame*()` methods are usually the most convenient starting point. Use `dense_counts_zarr_array()` when you want the on-disk Zarr array and `dense_counts_matrix()` when you want the full NumPy matrix in memory.
140
+
141
+ `sparse_coo_data_frame()` is only available for sparse output.
142
+
143
+ ### Inspect End-Motif Metadata
144
+
145
+ ```python
146
+ motifs = ends.motif_metadata()
147
+ ends.has_motif("_AA")
148
+ ```
149
+
150
+ `read_end_motifs()` returns a mode-specific object.
151
+
152
+ - Windowed output has `windows()`.
153
+ - Grouped output has `groups()` and `group_idx()`.
154
+ - Global output has `dense_counts_vec()` and `dense_data_frame()`.
155
+
156
+ ### Extract End-Motif Counts
157
+
158
+ ```python
159
+ motif_idx = ends.motif_idx("_AA")
160
+
161
+ motif_counts = ends.dense_data_frame_for_motif_idx(motif_idx)
162
+ ```
163
+
164
+ Sparse output stays sparse unless you ask for dense arrays:
165
+
166
+ ```python
167
+ sparse_counts = ends.sparse_coo()
168
+ sparse_payload = ends.sparse_coo_data_frame()
169
+ motif_array = ends.dense_counts_for_motif("_AA", allow_densify=True)
170
+ ```
171
+
172
+ For dense windowed output:
173
+
174
+ ```python
175
+ windows = ends.windows()
176
+ window_counts = ends.dense_data_frame_for_window(window_idx=0)
177
+ ```
178
+
179
+ For dense grouped output:
180
+
181
+ ```python
182
+ groups = ends.groups()
183
+ group_counts = ends.dense_data_frame_for_group("t-cells")
184
+ ```
185
+
186
+ For sparse stores, prefer `sparse_coo()`, `sparse_coo_data_frame()`, and the sparse slice helpers when working with large end-motif outputs. Use `allow_densify=True` only when the dense result is small enough to fit comfortably in memory.
@@ -0,0 +1,161 @@
1
+ # cfDNAlab | Python Loaders <img src="https://raw.githubusercontent.com/BesenbacherLab/cfDNAlab/refs/heads/main/cfdnalab_logo_little_guy_172x200_144dpi.png" align="right" height="155" />
2
+
3
+ Python helpers for loading [**cfDNAlab**](https://github.com/BesenbacherLab/cfDNAlab) output files.
4
+
5
+ This package does not install or run the cfDNAlab command-line tool. The CLI is distributed separately as the Rust `cfdna` binary. Use this Python package after running cfDNAlab to load and analyze output files.
6
+
7
+ The first supported output types are midpoint and end-motif Zarr outputs: `<prefix>.midpoint_profiles.zarr` and `<prefix>.end_motifs.zarr`.
8
+
9
+ <br>
10
+
11
+ ## Install
12
+
13
+ These instructions only installs the Python loader package. To install the `cfdna` command-line tool, see the [main repository](https://github.com/BesenbacherLab/cfDNAlab).
14
+
15
+ Install with pip:
16
+
17
+ ```bash
18
+ pip install cfdnalab
19
+ ```
20
+
21
+ Install the current development version from GitHub:
22
+
23
+ ```bash
24
+ pip install "cfdnalab @ git+https://github.com/BesenbacherLab/cfDNAlab.git#subdirectory=py-cfdnalab"
25
+ ```
26
+
27
+ <br>
28
+
29
+ ## Load Midpoint Profiles
30
+
31
+ ```python
32
+ import cfdnalab as cfl
33
+
34
+ midpoints = cfl.read_midpoints("sample.midpoint_profiles.zarr")
35
+ ```
36
+
37
+ ### Inspect Metadata
38
+
39
+ ```python
40
+ groups = midpoints.groups()
41
+ length_bins = midpoints.length_bins()
42
+ positions = midpoints.positions()
43
+ ```
44
+
45
+ `groups()` returns `group_idx`, `group_name`, and `eligible_intervals`. `length_bins()` and `positions()` return the corresponding bin indices and half-open bp coordinates.
46
+
47
+ ### Extract One Profile
48
+
49
+ Use `group_idx()` and `length_bin_idx()` when selecting by names or bp lengths:
50
+
51
+ ```python
52
+ group_idx = midpoints.group_idx("LYL1")
53
+ length_bin_idx = midpoints.length_bin_idx(167)
54
+
55
+ profile = midpoints.data_frame_for_profile(
56
+ group_idx=group_idx,
57
+ length_bin_idx=length_bin_idx,
58
+ )
59
+ ```
60
+
61
+ The returned data frame has one row per midpoint position bin.
62
+
63
+ ### Filter By Eligible Intervals
64
+
65
+ ```python
66
+ min_intervals = 100
67
+
68
+ for _, group in midpoints.groups().iterrows():
69
+ if group["eligible_intervals"] < min_intervals:
70
+ continue
71
+
72
+ profile = midpoints.data_frame_for_profile(
73
+ group_idx=group["group_idx"],
74
+ length_bin_idx=0,
75
+ )
76
+ ```
77
+
78
+ ### Extract NumPy Arrays
79
+
80
+ ```python
81
+ profile = midpoints.array_for_profile(group_idx=0, length_bin_idx=0)
82
+ group_counts = midpoints.array_from_group_idx(group_idx=0)
83
+ length_counts = midpoints.array_from_length_bin(length_bin_idx=0)
84
+ ```
85
+
86
+ `array()` loads the full 3D count tensor into RAM:
87
+
88
+ ```python
89
+ counts = midpoints.array()
90
+ ```
91
+
92
+ Prefer the slice helpers when possible.
93
+
94
+ <br>
95
+
96
+ ## Load End-Motif Counts
97
+
98
+ ```python
99
+ import cfdnalab as cfl
100
+
101
+ ends = cfl.read_end_motifs("sample.end_motifs.zarr")
102
+ ```
103
+
104
+ ### Storage Mode - Sparse or Dense
105
+
106
+ Start by checking whether the counts were stored as a dense matrix or sparse COO arrays.
107
+
108
+ ```python
109
+ ends.storage_mode()
110
+ ```
111
+
112
+ For sparse output, `sparse_coo_data_frame()` is usually the easiest way to inspect or plot the non-zero motif counts. Use `sparse_coo()` or the sparse slice helpers when you want SciPy sparse matrices. Dense helpers require `allow_densify=True` on sparse stores so large sparse outputs are not accidentally expanded in memory.
113
+
114
+ For dense output, the `dense_data_frame*()` methods are usually the most convenient starting point. Use `dense_counts_zarr_array()` when you want the on-disk Zarr array and `dense_counts_matrix()` when you want the full NumPy matrix in memory.
115
+
116
+ `sparse_coo_data_frame()` is only available for sparse output.
117
+
118
+ ### Inspect End-Motif Metadata
119
+
120
+ ```python
121
+ motifs = ends.motif_metadata()
122
+ ends.has_motif("_AA")
123
+ ```
124
+
125
+ `read_end_motifs()` returns a mode-specific object.
126
+
127
+ - Windowed output has `windows()`.
128
+ - Grouped output has `groups()` and `group_idx()`.
129
+ - Global output has `dense_counts_vec()` and `dense_data_frame()`.
130
+
131
+ ### Extract End-Motif Counts
132
+
133
+ ```python
134
+ motif_idx = ends.motif_idx("_AA")
135
+
136
+ motif_counts = ends.dense_data_frame_for_motif_idx(motif_idx)
137
+ ```
138
+
139
+ Sparse output stays sparse unless you ask for dense arrays:
140
+
141
+ ```python
142
+ sparse_counts = ends.sparse_coo()
143
+ sparse_payload = ends.sparse_coo_data_frame()
144
+ motif_array = ends.dense_counts_for_motif("_AA", allow_densify=True)
145
+ ```
146
+
147
+ For dense windowed output:
148
+
149
+ ```python
150
+ windows = ends.windows()
151
+ window_counts = ends.dense_data_frame_for_window(window_idx=0)
152
+ ```
153
+
154
+ For dense grouped output:
155
+
156
+ ```python
157
+ groups = ends.groups()
158
+ group_counts = ends.dense_data_frame_for_group("t-cells")
159
+ ```
160
+
161
+ For sparse stores, prefer `sparse_coo()`, `sparse_coo_data_frame()`, and the sparse slice helpers when working with large end-motif outputs. Use `allow_densify=True` only when the dense result is small enough to fit comfortably in memory.
@@ -0,0 +1,58 @@
1
+ [project]
2
+ name = "cfdnalab"
3
+ version = "0.1.0"
4
+ description = "Python helpers for loading and interacting with cfDNAlab output files"
5
+ authors = [{name = "Ludvig", email = "mail@ludvigolsen.dk"}]
6
+ readme = "README.md"
7
+ license = "MIT"
8
+ keywords = [
9
+ "fragmentomics",
10
+ "cell-free-dna",
11
+ "cfdna",
12
+ "whole-genome-sequencing"
13
+ ]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
20
+ ]
21
+ requires-python = ">=3.11"
22
+ dependencies = [
23
+ "numpy",
24
+ "pandas",
25
+ "scipy",
26
+ "zarr>=3,<4",
27
+ ]
28
+
29
+ [project.urls]
30
+ Repository = "https://github.com/BesenbacherLab/cfDNAlab/tree/main/py-cfdnalab"
31
+ Issues = "https://github.com/BesenbacherLab/cfDNAlab/issues"
32
+ Changelog = "https://github.com/BesenbacherLab/cfDNAlab/blob/main/py-cfdnalab/CHANGELOG"
33
+
34
+ [project.optional-dependencies]
35
+ test = [
36
+ "pytest",
37
+ ]
38
+
39
+ [dependency-groups]
40
+ dev = [
41
+ "pytest",
42
+ ]
43
+
44
+ [build-system]
45
+ requires = ["hatchling"]
46
+ build-backend = "hatchling.build"
47
+
48
+ [tool.hatch.build.targets.wheel]
49
+ packages = ["src/cfdnalab"]
50
+
51
+ [tool.hatch.build.targets.sdist]
52
+ include = [
53
+ "/CHANGELOG",
54
+ "/LICENSE",
55
+ "/README.md",
56
+ "/pyproject.toml",
57
+ "/src/cfdnalab",
58
+ ]
@@ -0,0 +1,23 @@
1
+ """Python helpers for loading cfDNAlab output files."""
2
+
3
+ from .ends import (
4
+ EndMotifCounts,
5
+ GlobalEndMotifCounts,
6
+ GroupedEndMotifCounts,
7
+ WindowedEndMotifCounts,
8
+ read_end_motifs,
9
+ )
10
+ from .midpoints import MidpointProfiles, read_midpoints
11
+
12
+ __version__ = "0.1.0"
13
+
14
+ __all__ = [
15
+ "EndMotifCounts",
16
+ "GlobalEndMotifCounts",
17
+ "GroupedEndMotifCounts",
18
+ "MidpointProfiles",
19
+ "WindowedEndMotifCounts",
20
+ "__version__",
21
+ "read_end_motifs",
22
+ "read_midpoints",
23
+ ]