bio2zarr 0.0.8__tar.gz → 0.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bio2zarr might be problematic. Click here for more details.

Files changed (63) hide show
  1. bio2zarr-0.0.10/.github/workflows/cd.yml +86 -0
  2. bio2zarr-0.0.10/.github/workflows/ci.yml +78 -0
  3. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/.github/workflows/docs.yml +9 -3
  4. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/CHANGELOG.md +12 -1
  5. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/PKG-INFO +10 -123
  6. bio2zarr-0.0.10/README.md +8 -0
  7. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/__main__.py +2 -2
  8. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/_version.py +2 -2
  9. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/cli.py +87 -95
  10. bio2zarr-0.0.10/bio2zarr/constants.py +18 -0
  11. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/core.py +34 -16
  12. bio2zarr-0.0.10/bio2zarr/vcf2zarr/__init__.py +38 -0
  13. bio2zarr-0.0.10/bio2zarr/vcf2zarr/icf.py +1220 -0
  14. bio2zarr-0.0.10/bio2zarr/vcf2zarr/vcz.py +1017 -0
  15. bio2zarr-0.0.10/bio2zarr/vcf2zarr/verification.py +230 -0
  16. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/vcf_utils.py +22 -14
  17. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr.egg-info/PKG-INFO +10 -123
  18. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr.egg-info/SOURCES.txt +17 -3
  19. bio2zarr-0.0.10/bio2zarr.egg-info/entry_points.txt +3 -0
  20. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr.egg-info/requires.txt +1 -1
  21. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/docs/Makefile +20 -2
  22. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/docs/_config.yml +1 -10
  23. bio2zarr-0.0.10/docs/_static/asciinema-player.css +2295 -0
  24. bio2zarr-0.0.10/docs/_static/asciinema-player.min.js +1 -0
  25. bio2zarr-0.0.10/docs/_static/custom.css +5 -0
  26. bio2zarr-0.0.10/docs/_toc.yml +11 -0
  27. bio2zarr-0.0.10/docs/cast_scripts/vcf2zarr_convert.sh +3 -0
  28. bio2zarr-0.0.10/docs/cast_scripts/vcf2zarr_explode.sh +3 -0
  29. bio2zarr-0.0.10/docs/installation.md +50 -0
  30. bio2zarr-0.0.10/docs/intro.md +27 -0
  31. bio2zarr-0.0.10/docs/requirements.txt +4 -0
  32. bio2zarr-0.0.10/docs/vcf2zarr/cli_ref.md +81 -0
  33. bio2zarr-0.0.10/docs/vcf2zarr/overview.md +124 -0
  34. bio2zarr-0.0.10/docs/vcf2zarr/tutorial.md +45 -0
  35. bio2zarr-0.0.10/docs/vcfpartition/cli_ref.md +8 -0
  36. bio2zarr-0.0.10/docs/vcfpartition/overview.md +30 -0
  37. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/pyproject.toml +8 -6
  38. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/validation-data/Makefile +6 -1
  39. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/validation.py +7 -7
  40. bio2zarr-0.0.8/.github/workflows/ci.yml +0 -34
  41. bio2zarr-0.0.8/README.md +0 -124
  42. bio2zarr-0.0.8/bio2zarr/vcf.py +0 -2416
  43. bio2zarr-0.0.8/bio2zarr.egg-info/entry_points.txt +0 -4
  44. bio2zarr-0.0.8/docs/_toc.yml +0 -4
  45. bio2zarr-0.0.8/docs/cli.md +0 -10
  46. bio2zarr-0.0.8/docs/intro.md +0 -76
  47. bio2zarr-0.0.8/docs/references.bib +0 -3
  48. bio2zarr-0.0.8/docs/requirements.txt +0 -11
  49. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/.gitignore +0 -0
  50. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/.pre-commit-config.yaml +0 -0
  51. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/LICENSE +0 -0
  52. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/MANIFEST.in +0 -0
  53. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/__init__.py +0 -0
  54. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/plink.py +0 -0
  55. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/provenance.py +0 -0
  56. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr/typing.py +0 -0
  57. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr.egg-info/dependency_links.txt +0 -0
  58. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/bio2zarr.egg-info/top_level.txt +0 -0
  59. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/docs/build.sh +0 -0
  60. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/docs/logo.png +0 -0
  61. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/setup.cfg +0 -0
  62. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/validation-data/split.sh +0 -0
  63. {bio2zarr-0.0.8 → bio2zarr-0.0.10}/vcf_generator.py +0 -0
@@ -0,0 +1,86 @@
1
+ name: CD
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ tags:
8
+ - '*'
9
+ release:
10
+ types: [published]
11
+
12
+ jobs:
13
+ packaging:
14
+ if: github.repository_owner == 'sgkit-dev'
15
+ name: Packaging
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: '3.9'
22
+ - name: Install dependencies
23
+ run: |
24
+ python -m pip install --upgrade pip
25
+ python -m pip install build twine validate-pyproject[all]
26
+ - name: Check and install package
27
+ run: |
28
+ validate-pyproject pyproject.toml
29
+ python -m build
30
+ python -m twine check --strict dist/*
31
+ python -m pip install dist/*.whl
32
+ - name: Check vcf2zarr CLI
33
+ run: |
34
+ vcf2zarr --help
35
+ python -m bio2zarr vcf2zarr --help
36
+ - name: Check vcfpartition CLI
37
+ run: |
38
+ vcfpartition --help
39
+ python -m bio2zarr vcfpartition --help
40
+ - name: Store the distribution packages
41
+ uses: actions/upload-artifact@v4
42
+ with:
43
+ name: python-package-distributions
44
+ path: dist/
45
+
46
+ publish-to-pypi:
47
+ if: github.repository_owner == 'sgkit-dev' && github.event_name == 'release'
48
+ needs:
49
+ - packaging
50
+ runs-on: ubuntu-latest
51
+
52
+ environment:
53
+ name: pypi
54
+ url: https://pypi.org/p/bio2zarr
55
+ permissions:
56
+ id-token: write # IMPORTANT: mandatory for trusted publishing
57
+
58
+ steps:
59
+ - uses: actions/download-artifact@v4
60
+ with:
61
+ name: python-package-distributions
62
+ path: dist/
63
+ - uses: pypa/gh-action-pypi-publish@release/v1
64
+
65
+
66
+ publish-to-testpypi:
67
+ if: github.repository_owner == 'sgkit-dev' && github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
68
+ needs:
69
+ - packaging
70
+ runs-on: ubuntu-latest
71
+
72
+ environment:
73
+ name: testpypi
74
+ url: https://test.pypi.org/p/bio2zarr
75
+
76
+ permissions:
77
+ id-token: write # IMPORTANT: mandatory for trusted publishing
78
+
79
+ steps:
80
+ - uses: actions/download-artifact@v4
81
+ with:
82
+ name: python-package-distributions
83
+ path: dist/
84
+ - uses: pypa/gh-action-pypi-publish@release/v1
85
+ with:
86
+ repository-url: https://test.pypi.org/legacy/
@@ -0,0 +1,78 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ branches:
7
+ - main
8
+
9
+ jobs:
10
+ pre-commit:
11
+ name: Lint
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: '3.11'
18
+ - uses: pre-commit/action@v3.0.1
19
+ test:
20
+ name: Test
21
+ runs-on: ${{ matrix.os }}
22
+ strategy:
23
+ matrix:
24
+ # Use macos-13 because pip binary packages for ARM aren't
25
+ # available for many dependencies
26
+ os: [macos-13, ubuntu-latest]
27
+ python-version: ["3.9", "3.10", "3.11"]
28
+ exclude:
29
+ # Just run macos tests on one Python version
30
+ - os: macos-13
31
+ python-version: "3.9"
32
+ - os: macos-13
33
+ python-version: "3.10"
34
+ steps:
35
+ - uses: actions/checkout@v4
36
+ - name: Set up Python ${{ matrix.python-version }}
37
+ uses: actions/setup-python@v5
38
+ with:
39
+ python-version: ${{ matrix.python-version }}
40
+ - name: Install dependencies
41
+ run: |
42
+ python -m pip install --upgrade pip
43
+ python -m pip install '.[dev]'
44
+ - name: Run tests
45
+ run: |
46
+ pytest --cov=bio2zarr
47
+ - name: Upload coverage to Coveralls
48
+ uses: coverallsapp/github-action@v2.3.0
49
+ with:
50
+ github-token: ${{ secrets.GITHUB_TOKEN }}
51
+
52
+ packaging:
53
+ name: Packaging
54
+ runs-on: ubuntu-latest
55
+ steps:
56
+ - uses: actions/checkout@v4
57
+ - uses: actions/setup-python@v5
58
+ with:
59
+ python-version: '3.11'
60
+ - name: Install dependencies
61
+ run: |
62
+ python -m pip install --upgrade pip
63
+ python -m pip install build twine validate-pyproject[all]
64
+ - name: Check and install package
65
+ run: |
66
+ validate-pyproject pyproject.toml
67
+ python -m build
68
+ python -m twine check --strict dist/*
69
+ python -m pip install dist/*.whl
70
+ - name: Check vcf2zarr CLI
71
+ run: |
72
+ vcf2zarr --help
73
+ python -m bio2zarr vcf2zarr --help
74
+ - name: Check vcfpartition CLI
75
+ run: |
76
+ vcfpartition --help
77
+ python -m bio2zarr vcfpartition --help
78
+
@@ -1,15 +1,16 @@
1
- name: Build Docs
1
+ name: Docs
2
2
 
3
3
  on:
4
4
  pull_request:
5
5
  push:
6
- branches: [main, test]
6
+ branches:
7
+ - main
7
8
  tags:
8
9
  - '*'
9
10
 
10
11
  jobs:
11
12
  build-docs:
12
- name: Docs
13
+ name: Build
13
14
  runs-on: ubuntu-latest
14
15
  steps:
15
16
  - name: Cancel Previous Runs
@@ -28,6 +29,11 @@ jobs:
28
29
  run: |
29
30
  pip install --upgrade pip wheel
30
31
  pip install -r docs/requirements.txt
32
+ python3 -m bash_kernel.install
33
+
34
+ - name: Install package
35
+ run: |
36
+ python3 -m pip install .
31
37
 
32
38
  - name: Build Docs
33
39
  run: |
@@ -1,4 +1,15 @@
1
- # 0.0.7 2024-04-30
1
+ # 0.0.10 2024-05-15
2
+ - Change output format of dexplode-init and dencode-init
3
+ - Bugfix for mac progress, and change of multiprocessing startup strategy.
4
+
5
+ # 0.0.9 2024-05-02
6
+
7
+ - Change on-disk format for explode and schema
8
+ - Support older tabix indexes
9
+ - Fix some bugs in explode
10
+
11
+ # 0.0.8 2024-04-30
12
+
2
13
  - Change on-disk format of distributed encode and simplify
3
14
  - Check for all partitions nominally completed encoding before doing
4
15
  anything destructive in dencode-finalise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bio2zarr
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: Convert bioinformatics data to Zarr
5
5
  Author-email: sgkit Developers <project@sgkit.dev>
6
6
  License: Apache License
@@ -206,10 +206,13 @@ License: Apache License
206
206
  limitations under the License.
207
207
 
208
208
  Project-URL: repository, https://github.com/sgkit-dev/bio2zarr
209
- Project-URL: documentation, https://sgkit-dev.github.io/bio2zarr/intro.html
209
+ Project-URL: documentation, https://sgkit-dev.github.io/bio2zarr/
210
210
  Classifier: Development Status :: 3 - Alpha
211
211
  Classifier: License :: OSI Approved :: Apache Software License
212
- Classifier: Operating System :: OS Independent
212
+ Classifier: Operating System :: POSIX
213
+ Classifier: Operating System :: POSIX :: Linux
214
+ Classifier: Operating System :: MacOS
215
+ Classifier: Operating System :: MacOS :: MacOS X
213
216
  Classifier: Intended Audience :: Science/Research
214
217
  Classifier: Programming Language :: Python
215
218
  Classifier: Programming Language :: Python :: 3
@@ -234,130 +237,14 @@ Requires-Dist: pysam; extra == "dev"
234
237
  Requires-Dist: pytest; extra == "dev"
235
238
  Requires-Dist: pytest-coverage; extra == "dev"
236
239
  Requires-Dist: pytest-xdist; extra == "dev"
237
- Requires-Dist: sgkit; extra == "dev"
240
+ Requires-Dist: sgkit>=0.8.0; extra == "dev"
238
241
  Requires-Dist: tqdm; extra == "dev"
239
242
 
240
243
  [![CI](https://github.com/sgkit-dev/bio2zarr/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sgkit-dev/bio2zarr/actions/workflows/ci.yml)
244
+ [![Coverage Status](https://coveralls.io/repos/github/sgkit-dev/bio2zarr/badge.svg)](https://coveralls.io/github/sgkit-dev/bio2zarr)
245
+
241
246
 
242
247
  # bio2zarr
243
248
  Convert bioinformatics file formats to Zarr
244
249
 
245
- Initially supports converting VCF to the
246
- [sgkit vcf-zarr specification](https://github.com/pystatgen/vcf-zarr-spec/)
247
-
248
- **This is early alpha-status code: everything is subject to change,
249
- and it has not been thoroughly tested**
250
-
251
- ## Install
252
-
253
- ```
254
- $ python3 -m pip install bio2zarr
255
- ```
256
-
257
- This will install the programs ``vcf2zarr``, ``plink2zarr`` and ``vcf_partition``
258
- into your local Python path. You may need to update your $PATH to call the
259
- executables directly.
260
-
261
- Alternatively, calling
262
- ```
263
- $ python3 -m bio2zarr vcf2zarr <args>
264
- ```
265
- is equivalent to
266
-
267
- ```
268
- $ vcf2zarr <args>
269
- ```
270
- and will always work.
271
-
272
-
273
- ## vcf2zarr
274
-
275
-
276
- Convert a VCF to zarr format:
277
-
278
- ```
279
- $ vcf2zarr convert <VCF1> <VCF2> <zarr>
280
- ```
281
-
282
- Converts the VCF to zarr format.
283
-
284
- **Do not use this for anything but the smallest files**
285
-
286
- The recommended approach is to use a multi-stage conversion
287
-
288
- First, convert the VCF into the intermediate format:
289
-
290
- ```
291
- vcf2zarr explode tests/data/vcf/sample.vcf.gz tmp/sample.exploded
292
- ```
293
-
294
- Then, (optionally) inspect this representation to get a feel for your dataset
295
- ```
296
- vcf2zarr inspect tmp/sample.exploded
297
- ```
298
-
299
- Then, (optionally) generate a conversion schema to describe the corresponding
300
- Zarr arrays:
301
-
302
- ```
303
- vcf2zarr mkschema tmp/sample.exploded > sample.schema.json
304
- ```
305
-
306
- View and edit the schema, deleting any columns you don't want, or tweaking
307
- dtypes and compression settings to your taste.
308
-
309
- Finally, encode to Zarr:
310
- ```
311
- vcf2zarr encode tmp/sample.exploded tmp/sample.zarr -s sample.schema.json
312
- ```
313
-
314
- Use the ``-p, --worker-processes`` argument to control the number of workers used
315
- in the ``explode`` and ``encode`` phases.
316
-
317
- ### Shell completion
318
-
319
- To enable shell completion for a particular session in Bash do:
320
-
321
- ```
322
- eval "$(_VCF2ZARR_COMPLETE=bash_source vcf2zarr)"
323
- ```
324
-
325
- If you add this to your ``.bashrc`` vcf2zarr shell completion should available
326
- in all new shell sessions.
327
-
328
- See the [Click documentation](https://click.palletsprojects.com/en/8.1.x/shell-completion/#enabling-completion)
329
- for instructions on how to enable completion in other shells.
330
- a
331
-
332
- ## plink2zarr
333
-
334
- Convert a plink ``.bed`` file to zarr format. **This is incomplete**
335
-
336
- ## vcf_partition
337
-
338
- Partition a given VCF file into (approximately) a give number of regions:
339
-
340
- ```
341
- vcf_partition 20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.recalibrated_variants.vcf.gz -n 10
342
- ```
343
- gives
344
- ```
345
- chr20:1-6799360
346
- chr20:6799361-14319616
347
- chr20:14319617-21790720
348
- chr20:21790721-28770304
349
- chr20:28770305-31096832
350
- chr20:31096833-38043648
351
- chr20:38043649-45580288
352
- chr20:45580289-52117504
353
- chr20:52117505-58834944
354
- chr20:58834945-
355
- ```
356
-
357
- These reqion strings can then be used to split computation of the VCF
358
- into chunks for parallelisation.
359
-
360
- **TODO give a nice example here using xargs**
361
-
362
- **WARNING that this does not take into account that indels may overlap
363
- partitions and you may count variants twice or more if they do**
250
+ See the [documentation](https://sgkit-dev.github.io/bio2zarr/) for details.
@@ -0,0 +1,8 @@
1
+ [![CI](https://github.com/sgkit-dev/bio2zarr/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sgkit-dev/bio2zarr/actions/workflows/ci.yml)
2
+ [![Coverage Status](https://coveralls.io/repos/github/sgkit-dev/bio2zarr/badge.svg)](https://coveralls.io/github/sgkit-dev/bio2zarr)
3
+
4
+
5
+ # bio2zarr
6
+ Convert bioinformatics file formats to Zarr
7
+
8
+ See the [documentation](https://sgkit-dev.github.io/bio2zarr/) for details.
@@ -14,9 +14,9 @@ def bio2zarr():
14
14
  # install individual commands as console scripts. However, this
15
15
  # is handy for development and for those whose PATHs aren't set
16
16
  # up in the right way.
17
- bio2zarr.add_command(cli.vcf2zarr)
17
+ bio2zarr.add_command(cli.vcf2zarr_main)
18
18
  bio2zarr.add_command(cli.plink2zarr)
19
- bio2zarr.add_command(cli.vcf_partition)
19
+ bio2zarr.add_command(cli.vcfpartition)
20
20
 
21
21
  if __name__ == "__main__":
22
22
  bio2zarr()
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.0.8'
16
- __version_tuple__ = version_tuple = (0, 0, 8)
15
+ __version__ = version = '0.0.10'
16
+ __version_tuple__ = version_tuple = (0, 0, 10)