pbzarr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ # SCM syntax highlighting & preventing 3-way merges
2
+ pixi.lock merge=binary linguist-language=YAML linguist-generated=true -diff
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: macos-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - uses: prefix-dev/setup-pixi@v0.9.4
16
+ with:
17
+ pixi-version: v0.66.0
18
+ frozen: true
19
+
20
+ - run: pixi run lint
21
+ - run: pixi run fmt-check
22
+ - run: pixi run typecheck
23
+ - run: pixi run test
@@ -0,0 +1,16 @@
1
+ name: Lint PR Title
2
+
3
+ on:
4
+ pull_request_target:
5
+ types: [opened, edited, synchronize]
6
+
7
+ permissions:
8
+ pull-requests: read
9
+
10
+ jobs:
11
+ lint:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: amannn/action-semantic-pull-request@v5
15
+ env:
16
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,43 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: write
10
+ issues: write
11
+ pull-requests: write
12
+
13
+ jobs:
14
+ release-please:
15
+ runs-on: ubuntu-latest
16
+ outputs:
17
+ release_created: ${{ steps.release.outputs.release_created }}
18
+ tag_name: ${{ steps.release.outputs.tag_name }}
19
+ steps:
20
+ - uses: googleapis/release-please-action@v4
21
+ id: release
22
+ with:
23
+ release-type: python
24
+
25
+ publish:
26
+ needs: release-please
27
+ if: ${{ needs.release-please.outputs.release_created || github.event_name == 'workflow_dispatch' }}
28
+ runs-on: ubuntu-latest
29
+ environment: pypi
30
+ permissions:
31
+ id-token: write
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+
35
+ - uses: prefix-dev/setup-pixi@v0.9.4
36
+ with:
37
+ pixi-version: v0.66.0
38
+ run-install: false
39
+
40
+ - run: pixi install -e build --frozen
41
+ - run: pixi run -e build build
42
+
43
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,223 @@
1
+ # pixi environments
2
+ .pixi/*
3
+ !.pixi/config.toml
4
+ CLAUDE.md
5
+ ### Generated by gibo (https://github.com/simonwhitaker/gibo)
6
+ ### https://raw.github.com/github/gitignore/d1c5e90de7543bd7489606f813ebf69d4d49473b/Python.gitignore
7
+ benchmarks/data
8
+ # Byte-compiled / optimized / DLL files
9
+ __pycache__/
10
+ *.py[codz]
11
+ *$py.class
12
+
13
+ # C extensions
14
+ *.so
15
+
16
+ # Distribution / packaging
17
+ .Python
18
+ build/
19
+ develop-eggs/
20
+ dist/
21
+ downloads/
22
+ eggs/
23
+ .eggs/
24
+ lib/
25
+ lib64/
26
+ parts/
27
+ sdist/
28
+ var/
29
+ wheels/
30
+ share/python-wheels/
31
+ *.egg-info/
32
+ .installed.cfg
33
+ *.egg
34
+ MANIFEST
35
+
36
+ # PyInstaller
37
+ # Usually these files are written by a python script from a template
38
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
39
+ *.manifest
40
+ *.spec
41
+
42
+ # Installer logs
43
+ pip-log.txt
44
+ pip-delete-this-directory.txt
45
+
46
+ # Unit test / coverage reports
47
+ htmlcov/
48
+ .tox/
49
+ .nox/
50
+ .coverage
51
+ .coverage.*
52
+ .cache
53
+ nosetests.xml
54
+ coverage.xml
55
+ *.cover
56
+ *.py.cover
57
+ .hypothesis/
58
+ .pytest_cache/
59
+ cover/
60
+
61
+ # Translations
62
+ *.mo
63
+ *.pot
64
+
65
+ # Django stuff:
66
+ *.log
67
+ local_settings.py
68
+ db.sqlite3
69
+ db.sqlite3-journal
70
+
71
+ # Flask stuff:
72
+ instance/
73
+ .webassets-cache
74
+
75
+ # Scrapy stuff:
76
+ .scrapy
77
+
78
+ # Sphinx documentation
79
+ docs/_build/
80
+
81
+ # PyBuilder
82
+ .pybuilder/
83
+ target/
84
+
85
+ # Jupyter Notebook
86
+ .ipynb_checkpoints
87
+
88
+ # IPython
89
+ profile_default/
90
+ ipython_config.py
91
+
92
+ # pyenv
93
+ # For a library or package, you might want to ignore these files since the code is
94
+ # intended to run in multiple environments; otherwise, check them in:
95
+ # .python-version
96
+
97
+ # pipenv
98
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
99
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
100
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
101
+ # install all needed dependencies.
102
+ #Pipfile.lock
103
+
104
+ # UV
105
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
106
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
107
+ # commonly ignored for libraries.
108
+ #uv.lock
109
+
110
+ # poetry
111
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
112
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
113
+ # commonly ignored for libraries.
114
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
115
+ #poetry.lock
116
+ #poetry.toml
117
+
118
+ # pdm
119
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
120
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
121
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
122
+ #pdm.lock
123
+ #pdm.toml
124
+ .pdm-python
125
+ .pdm-build/
126
+
127
+ # pixi
128
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
129
+ #pixi.lock
130
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
131
+ # in the .venv directory. It is recommended not to include this directory in version control.
132
+ .pixi
133
+
134
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
135
+ __pypackages__/
136
+
137
+ # Celery stuff
138
+ celerybeat-schedule
139
+ celerybeat.pid
140
+
141
+ # Redis
142
+ *.rdb
143
+ *.aof
144
+ *.pid
145
+
146
+ # RabbitMQ
147
+ mnesia/
148
+ rabbitmq/
149
+ rabbitmq-data/
150
+
151
+ # ActiveMQ
152
+ activemq-data/
153
+
154
+ # SageMath parsed files
155
+ *.sage.py
156
+
157
+ # Environments
158
+ .env
159
+ .envrc
160
+ .venv
161
+ env/
162
+ venv/
163
+ ENV/
164
+ env.bak/
165
+ venv.bak/
166
+
167
+ # Spyder project settings
168
+ .spyderproject
169
+ .spyproject
170
+
171
+ # Rope project settings
172
+ .ropeproject
173
+
174
+ # mkdocs documentation
175
+ /site
176
+
177
+ # mypy
178
+ .mypy_cache/
179
+ .dmypy.json
180
+ dmypy.json
181
+
182
+ # Pyre type checker
183
+ .pyre/
184
+
185
+ # pytype static type analyzer
186
+ .pytype/
187
+
188
+ # Cython debug symbols
189
+ cython_debug/
190
+
191
+ # PyCharm
192
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
193
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
194
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
195
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
196
+ #.idea/
197
+
198
+ # Abstra
199
+ # Abstra is an AI-powered process automation framework.
200
+ # Ignore directories containing user credentials, local state, and settings.
201
+ # Learn more at https://abstra.io/docs
202
+ .abstra/
203
+
204
+ # Visual Studio Code
205
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
206
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
207
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
208
+ # you could uncomment the following to ignore the entire vscode folder
209
+ # .vscode/
210
+
211
+ # Ruff stuff:
212
+ .ruff_cache/
213
+
214
+ # PyPI configuration file
215
+ .pypirc
216
+
217
+ # Marimo
218
+ marimo/_static/
219
+ marimo/_lsp/
220
+ __marimo__/
221
+
222
+ # Streamlit
223
+ .streamlit/secrets.toml
@@ -0,0 +1,90 @@
1
+ # Changelog
2
+
3
+ ## [0.1.0](https://github.com/pbzarr/pbzarr-py/compare/v0.1.0...v0.1.0) (2026-04-07)
4
+
5
+
6
+ ### Features
7
+
8
+ * add dask ([c10b2d7](https://github.com/pbzarr/pbzarr-py/commit/c10b2d76e77ca04e618d87b267756a850ca227a8))
9
+ * add query ([b3fdaba](https://github.com/pbzarr/pbzarr-py/commit/b3fdabad19996f9189fba84c06ebe17d7ac3e9a6))
10
+ * add write ([42258a7](https://github.com/pbzarr/pbzarr-py/commit/42258a719e71cb646dcaae4b26a5f3a851421943))
11
+
12
+
13
+ ### Bug Fixes
14
+
15
+ * rename metadata keys to match spec ("pbz" → "perbase_zarr") ([2e75598](https://github.com/pbzarr/pbzarr-py/commit/2e75598d203844ce8876fd208a62c5fa750f0deb))
16
+ * update pixi.lock after pyproject.toml metadata changes ([ee0706e](https://github.com/pbzarr/pbzarr-py/commit/ee0706ea212b2d6b368bea757491736c5f129d40))
17
+ * Update platforms in pyproject.toml ([5b622d4](https://github.com/pbzarr/pbzarr-py/commit/5b622d41495f0a19b738cce1d92ebeae58b8908d))
18
+ * use run-install:false in release workflow to bypass pixi list bug ([510e681](https://github.com/pbzarr/pbzarr-py/commit/510e681c01f928152880cc278ffdba8df5e256f9))
19
+
20
+
21
+ ### Dependencies
22
+
23
+ * remove xarray ([04f75eb](https://github.com/pbzarr/pbzarr-py/commit/04f75eba5fba1fb78751d453c336f6ae38c25131))
24
+
25
+
26
+ ### Documentation
27
+
28
+ * update docstrings ([ea8280c](https://github.com/pbzarr/pbzarr-py/commit/ea8280ca3078b73ceae7f72148a1d55b4cd882c3))
29
+
30
+
31
+ ### Miscellaneous Chores
32
+
33
+ * release 0.1.0 ([04abf30](https://github.com/pbzarr/pbzarr-py/commit/04abf3042bc0fddf6aee842c5034cb3a1f724fde))
34
+ * release 0.1.0 ([336fbd1](https://github.com/pbzarr/pbzarr-py/commit/336fbd1f6e754c76603beceb103b4576da50424d))
35
+
36
+ ## [0.1.0](https://github.com/pbzarr/pbzarr-py/compare/v0.1.0...v0.1.0) (2026-04-07)
37
+
38
+
39
+ ### Features
40
+
41
+ * add dask ([c10b2d7](https://github.com/pbzarr/pbzarr-py/commit/c10b2d76e77ca04e618d87b267756a850ca227a8))
42
+ * add query ([b3fdaba](https://github.com/pbzarr/pbzarr-py/commit/b3fdabad19996f9189fba84c06ebe17d7ac3e9a6))
43
+ * add write ([42258a7](https://github.com/pbzarr/pbzarr-py/commit/42258a719e71cb646dcaae4b26a5f3a851421943))
44
+
45
+
46
+ ### Bug Fixes
47
+
48
+ * rename metadata keys to match spec ("pbz" → "perbase_zarr") ([2e75598](https://github.com/pbzarr/pbzarr-py/commit/2e75598d203844ce8876fd208a62c5fa750f0deb))
49
+ * update pixi.lock after pyproject.toml metadata changes ([ee0706e](https://github.com/pbzarr/pbzarr-py/commit/ee0706ea212b2d6b368bea757491736c5f129d40))
50
+ * Update platforms in pyproject.toml ([5b622d4](https://github.com/pbzarr/pbzarr-py/commit/5b622d41495f0a19b738cce1d92ebeae58b8908d))
51
+
52
+
53
+ ### Dependencies
54
+
55
+ * remove xarray ([04f75eb](https://github.com/pbzarr/pbzarr-py/commit/04f75eba5fba1fb78751d453c336f6ae38c25131))
56
+
57
+
58
+ ### Documentation
59
+
60
+ * update docstrings ([ea8280c](https://github.com/pbzarr/pbzarr-py/commit/ea8280ca3078b73ceae7f72148a1d55b4cd882c3))
61
+
62
+
63
+ ### Miscellaneous Chores
64
+
65
+ * release 0.1.0 ([336fbd1](https://github.com/pbzarr/pbzarr-py/commit/336fbd1f6e754c76603beceb103b4576da50424d))
66
+
67
+ ## 0.1.0 (2026-04-07)
68
+
69
+
70
+ ### Features
71
+
72
+ * add dask ([c10b2d7](https://github.com/pbzarr/pbzarr-py/commit/c10b2d76e77ca04e618d87b267756a850ca227a8))
73
+ * add query ([b3fdaba](https://github.com/pbzarr/pbzarr-py/commit/b3fdabad19996f9189fba84c06ebe17d7ac3e9a6))
74
+ * add write ([42258a7](https://github.com/pbzarr/pbzarr-py/commit/42258a719e71cb646dcaae4b26a5f3a851421943))
75
+
76
+
77
+ ### Bug Fixes
78
+
79
+ * rename metadata keys to match spec ("pbz" → "perbase_zarr") ([2e75598](https://github.com/pbzarr/pbzarr-py/commit/2e75598d203844ce8876fd208a62c5fa750f0deb))
80
+ * update pixi.lock after pyproject.toml metadata changes ([ee0706e](https://github.com/pbzarr/pbzarr-py/commit/ee0706ea212b2d6b368bea757491736c5f129d40))
81
+
82
+
83
+ ### Dependencies
84
+
85
+ * remove xarray ([04f75eb](https://github.com/pbzarr/pbzarr-py/commit/04f75eba5fba1fb78751d453c336f6ae38c25131))
86
+
87
+
88
+ ### Documentation
89
+
90
+ * update docstrings ([ea8280c](https://github.com/pbzarr/pbzarr-py/commit/ea8280ca3078b73ceae7f72148a1d55b4cd882c3))
pbzarr-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Cade Mirchandani
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pbzarr-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: pbzarr
3
+ Version: 0.1.0
4
+ Summary: A Zarr v3 convention for per-base resolution genomic data
5
+ Project-URL: Homepage, https://github.com/pbzarr/pbzarr-py
6
+ Project-URL: Repository, https://github.com/pbzarr/pbzarr-py
7
+ Project-URL: Specification, https://github.com/pbzarr/pbzarr-spec
8
+ Author-email: Cade Mirchandani <cmirchan@ucsc.edu>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: bioinformatics,depth,genomics,methylation,zarr
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
+ Requires-Python: >=3.11
20
+ Requires-Dist: numpy
21
+ Requires-Dist: zarr>=3.0
22
+ Provides-Extra: all
23
+ Requires-Dist: dask[array]; extra == 'all'
24
+ Provides-Extra: dask
25
+ Requires-Dist: dask[array]; extra == 'dask'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # pbzarr
29
+
30
+ A Python library for PBZ (Per-Base Zarr) — a Zarr v3 convention for storing per-base resolution genomic data such as read depths, methylation levels, and boolean masks.
31
+
32
+ PBZ is a modern alternative to D4 and bigWig, leveraging the Zarr ecosystem for compression, chunking, and cloud-native access.
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ pip install pbzarr
38
+ ```
39
+
40
+ With optional Dask support:
41
+
42
+ ```bash
43
+ pip install pbzarr[dask]
44
+ ```
45
+
46
+ ## Quick Start
47
+
48
+ ```python
49
+ import pbzarr
50
+
51
+ # Create a store
52
+ store = pbzarr.create(
53
+ "sample.pbz.zarr",
54
+ contigs=["chr1", "chr2"],
55
+ contig_lengths=[248_956_422, 242_193_529],
56
+ )
57
+
58
+ # Add a track
59
+ track = store.create_track("depths", dtype="uint32", columns=["sample_A", "sample_B"])
60
+
61
+ # Write data
62
+ import numpy as np
63
+ track["chr1", 0:1000] = np.random.randint(0, 100, size=(1000, 2), dtype="uint32")
64
+
65
+ # Query data
66
+ data = track.query("chr1:0-1000", columns="sample_A")
67
+ ```
68
+
69
+ ```python
70
+ # Open an existing store
71
+ store = pbzarr.open("sample.pbz.zarr")
72
+ track = store["depths"]
73
+
74
+ # Slice-based access
75
+ data = track["chr1", 0:1000, "sample_A"]
76
+
77
+ # Dask backend for lazy/parallel computation
78
+ store = pbzarr.open("sample.pbz.zarr", backend="dask")
79
+ lazy = store["depths"].query("chr1:0-1000000")
80
+ result = lazy.compute()
81
+ ```
82
+
83
+ ## Features
84
+
85
+ - **Zarr v3 only** with full codec and storage backend support
86
+ - **NumPy and Dask backends** for eager or lazy computation
87
+ - **Region query syntax**: `"chr1:1000-2000"`, tuples, or slice notation
88
+ - **Column filtering** by name or index
89
+ - **Escape hatches** to raw `zarr.Group` and `zarr.Array` objects
90
+ - **Self-describing tracks** with independent dtype, chunking, and metadata
91
+
92
+ ## Links
93
+
94
+ - [PBZ Format Specification](https://github.com/pbzarr/pbzarr-spec)
95
+ - [Rust Implementation](https://github.com/pbzarr/pbzarr-rs)
pbzarr-0.1.0/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # pbzarr
2
+
3
+ A Python library for PBZ (Per-Base Zarr) — a Zarr v3 convention for storing per-base resolution genomic data such as read depths, methylation levels, and boolean masks.
4
+
5
+ PBZ is a modern alternative to D4 and bigWig, leveraging the Zarr ecosystem for compression, chunking, and cloud-native access.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install pbzarr
11
+ ```
12
+
13
+ With optional Dask support:
14
+
15
+ ```bash
16
+ pip install pbzarr[dask]
17
+ ```
18
+
19
+ ## Quick Start
20
+
21
+ ```python
22
+ import pbzarr
23
+
24
+ # Create a store
25
+ store = pbzarr.create(
26
+ "sample.pbz.zarr",
27
+ contigs=["chr1", "chr2"],
28
+ contig_lengths=[248_956_422, 242_193_529],
29
+ )
30
+
31
+ # Add a track
32
+ track = store.create_track("depths", dtype="uint32", columns=["sample_A", "sample_B"])
33
+
34
+ # Write data
35
+ import numpy as np
36
+ track["chr1", 0:1000] = np.random.randint(0, 100, size=(1000, 2), dtype="uint32")
37
+
38
+ # Query data
39
+ data = track.query("chr1:0-1000", columns="sample_A")
40
+ ```
41
+
42
+ ```python
43
+ # Open an existing store
44
+ store = pbzarr.open("sample.pbz.zarr")
45
+ track = store["depths"]
46
+
47
+ # Slice-based access
48
+ data = track["chr1", 0:1000, "sample_A"]
49
+
50
+ # Dask backend for lazy/parallel computation
51
+ store = pbzarr.open("sample.pbz.zarr", backend="dask")
52
+ lazy = store["depths"].query("chr1:0-1000000")
53
+ result = lazy.compute()
54
+ ```
55
+
56
+ ## Features
57
+
58
+ - **Zarr v3 only** with full codec and storage backend support
59
+ - **NumPy and Dask backends** for eager or lazy computation
60
+ - **Region query syntax**: `"chr1:1000-2000"`, tuples, or slice notation
61
+ - **Column filtering** by name or index
62
+ - **Escape hatches** to raw `zarr.Group` and `zarr.Array` objects
63
+ - **Self-describing tracks** with independent dtype, chunking, and metadata
64
+
65
+ ## Links
66
+
67
+ - [PBZ Format Specification](https://github.com/pbzarr/pbzarr-spec)
68
+ - [Rust Implementation](https://github.com/pbzarr/pbzarr-rs)
pbzarr-0.1.0/SPEC.md ADDED
@@ -0,0 +1,5 @@
1
+ # PBZ Format Specification
2
+
3
+ The canonical PBZ format specification lives in the [pbzarr-spec](https://github.com/pbzarr/pbzarr-spec) repository.
4
+
5
+ This implementation targets **spec version 0.1**.