bio2zarr 0.0.9__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bio2zarr might be problematic. Click here for more details.
- bio2zarr-0.1.0/.github/workflows/cd.yml +86 -0
- bio2zarr-0.1.0/.github/workflows/ci.yml +107 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/.github/workflows/docs.yml +13 -3
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/CHANGELOG.md +11 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/PKG-INFO +10 -123
- bio2zarr-0.1.0/README.md +8 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/__main__.py +2 -2
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/_version.py +2 -2
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/cli.py +176 -113
- bio2zarr-0.1.0/bio2zarr/constants.py +18 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/core.py +65 -20
- bio2zarr-0.1.0/bio2zarr/vcf2zarr/__init__.py +38 -0
- bio2zarr-0.1.0/bio2zarr/vcf2zarr/icf.py +1221 -0
- bio2zarr-0.1.0/bio2zarr/vcf2zarr/vcz.py +1053 -0
- bio2zarr-0.1.0/bio2zarr/vcf2zarr/verification.py +230 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/vcf_utils.py +11 -6
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr.egg-info/PKG-INFO +10 -123
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr.egg-info/SOURCES.txt +17 -3
- bio2zarr-0.1.0/bio2zarr.egg-info/entry_points.txt +3 -0
- bio2zarr-0.1.0/docs/Makefile +46 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/docs/_config.yml +10 -10
- bio2zarr-0.1.0/docs/_static/asciinema-player.css +2295 -0
- bio2zarr-0.1.0/docs/_static/asciinema-player.min.js +1 -0
- bio2zarr-0.1.0/docs/_static/custom.css +5 -0
- bio2zarr-0.1.0/docs/_toc.yml +11 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/docs/build.sh +2 -2
- bio2zarr-0.1.0/docs/cast_scripts/vcf2zarr_convert.sh +3 -0
- bio2zarr-0.1.0/docs/cast_scripts/vcf2zarr_explode.sh +5 -0
- bio2zarr-0.1.0/docs/installation.md +49 -0
- bio2zarr-0.1.0/docs/intro.md +36 -0
- bio2zarr-0.1.0/docs/requirements.txt +4 -0
- bio2zarr-0.1.0/docs/vcf2zarr/cli_ref.md +76 -0
- bio2zarr-0.1.0/docs/vcf2zarr/overview.md +92 -0
- bio2zarr-0.1.0/docs/vcf2zarr/tutorial.md +272 -0
- bio2zarr-0.1.0/docs/vcfpartition/cli_ref.md +9 -0
- bio2zarr-0.1.0/docs/vcfpartition/overview.md +113 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/pyproject.toml +8 -6
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/validation.py +7 -7
- bio2zarr-0.0.9/.github/workflows/ci.yml +0 -34
- bio2zarr-0.0.9/README.md +0 -124
- bio2zarr-0.0.9/bio2zarr/vcf.py +0 -2445
- bio2zarr-0.0.9/bio2zarr.egg-info/entry_points.txt +0 -4
- bio2zarr-0.0.9/docs/Makefile +0 -18
- bio2zarr-0.0.9/docs/_toc.yml +0 -4
- bio2zarr-0.0.9/docs/cli.md +0 -10
- bio2zarr-0.0.9/docs/intro.md +0 -76
- bio2zarr-0.0.9/docs/references.bib +0 -3
- bio2zarr-0.0.9/docs/requirements.txt +0 -11
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/.gitignore +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/.pre-commit-config.yaml +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/LICENSE +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/MANIFEST.in +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/__init__.py +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/plink.py +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/provenance.py +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr/typing.py +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr.egg-info/dependency_links.txt +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr.egg-info/requires.txt +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/bio2zarr.egg-info/top_level.txt +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/docs/logo.png +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/setup.cfg +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/validation-data/Makefile +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/validation-data/split.sh +0 -0
- {bio2zarr-0.0.9 → bio2zarr-0.1.0}/vcf_generator.py +0 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
name: CD
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
tags:
|
|
8
|
+
- '*'
|
|
9
|
+
release:
|
|
10
|
+
types: [published]
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
packaging:
|
|
14
|
+
if: github.repository_owner == 'sgkit-dev'
|
|
15
|
+
name: Packaging
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: '3.9'
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: |
|
|
24
|
+
python -m pip install --upgrade pip
|
|
25
|
+
python -m pip install build twine validate-pyproject[all]
|
|
26
|
+
- name: Check and install package
|
|
27
|
+
run: |
|
|
28
|
+
validate-pyproject pyproject.toml
|
|
29
|
+
python -m build
|
|
30
|
+
python -m twine check --strict dist/*
|
|
31
|
+
python -m pip install dist/*.whl
|
|
32
|
+
- name: Check vcf2zarr CLI
|
|
33
|
+
run: |
|
|
34
|
+
vcf2zarr --help
|
|
35
|
+
python -m bio2zarr vcf2zarr --help
|
|
36
|
+
- name: Check vcfpartition CLI
|
|
37
|
+
run: |
|
|
38
|
+
vcfpartition --help
|
|
39
|
+
python -m bio2zarr vcfpartition --help
|
|
40
|
+
- name: Store the distribution packages
|
|
41
|
+
uses: actions/upload-artifact@v4
|
|
42
|
+
with:
|
|
43
|
+
name: python-package-distributions
|
|
44
|
+
path: dist/
|
|
45
|
+
|
|
46
|
+
publish-to-pypi:
|
|
47
|
+
if: github.repository_owner == 'sgkit-dev' && github.event_name == 'release'
|
|
48
|
+
needs:
|
|
49
|
+
- packaging
|
|
50
|
+
runs-on: ubuntu-latest
|
|
51
|
+
|
|
52
|
+
environment:
|
|
53
|
+
name: pypi
|
|
54
|
+
url: https://pypi.org/p/bio2zarr
|
|
55
|
+
permissions:
|
|
56
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
57
|
+
|
|
58
|
+
steps:
|
|
59
|
+
- uses: actions/download-artifact@v4
|
|
60
|
+
with:
|
|
61
|
+
name: python-package-distributions
|
|
62
|
+
path: dist/
|
|
63
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
publish-to-testpypi:
|
|
67
|
+
if: github.repository_owner == 'sgkit-dev' && github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
|
|
68
|
+
needs:
|
|
69
|
+
- packaging
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
|
|
72
|
+
environment:
|
|
73
|
+
name: testpypi
|
|
74
|
+
url: https://test.pypi.org/p/bio2zarr
|
|
75
|
+
|
|
76
|
+
permissions:
|
|
77
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
78
|
+
|
|
79
|
+
steps:
|
|
80
|
+
- uses: actions/download-artifact@v4
|
|
81
|
+
with:
|
|
82
|
+
name: python-package-distributions
|
|
83
|
+
path: dist/
|
|
84
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
85
|
+
with:
|
|
86
|
+
repository-url: https://test.pypi.org/legacy/
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
branches:
|
|
7
|
+
- main
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
pre-commit:
|
|
11
|
+
name: Lint
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: '3.11'
|
|
18
|
+
- uses: pre-commit/action@v3.0.1
|
|
19
|
+
test:
|
|
20
|
+
name: Test
|
|
21
|
+
runs-on: ${{ matrix.os }}
|
|
22
|
+
strategy:
|
|
23
|
+
matrix:
|
|
24
|
+
# Use macos-13 because pip binary packages for ARM aren't
|
|
25
|
+
# available for many dependencies
|
|
26
|
+
os: [macos-13, macos-14, ubuntu-latest]
|
|
27
|
+
python-version: ["3.9", "3.10", "3.11"]
|
|
28
|
+
exclude:
|
|
29
|
+
# Just run macos tests on one Python version
|
|
30
|
+
- os: macos-13
|
|
31
|
+
python-version: "3.10"
|
|
32
|
+
- os: macos-13
|
|
33
|
+
python-version: "3.11"
|
|
34
|
+
- os: macos-14
|
|
35
|
+
python-version: "3.9"
|
|
36
|
+
- os: macos-14
|
|
37
|
+
python-version: "3.10"
|
|
38
|
+
steps:
|
|
39
|
+
- uses: actions/checkout@v4
|
|
40
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
41
|
+
uses: actions/setup-python@v5
|
|
42
|
+
with:
|
|
43
|
+
python-version: ${{ matrix.python-version }}
|
|
44
|
+
- name: Install dependencies
|
|
45
|
+
run: |
|
|
46
|
+
python -m pip install --upgrade pip
|
|
47
|
+
python -m pip install '.[dev]'
|
|
48
|
+
- name: Run basic vcf2zarr example
|
|
49
|
+
run: |
|
|
50
|
+
python -m bio2zarr vcf2zarr convert tests/data/vcf/sample.vcf.gz sample.vcz -f
|
|
51
|
+
- name: Run two-pass vcf2zarr example
|
|
52
|
+
run: |
|
|
53
|
+
python -m bio2zarr vcf2zarr explode tests/data/vcf/sample.vcf.gz sample.icf -f
|
|
54
|
+
python -m bio2zarr vcf2zarr encode sample.icf sample.vcz -f
|
|
55
|
+
- name: Run distributed explode example
|
|
56
|
+
run: |
|
|
57
|
+
python -m bio2zarr vcf2zarr dexplode-init tests/data/vcf/sample.vcf.gz sample.icf -fn 3
|
|
58
|
+
python -m bio2zarr vcf2zarr dexplode-partition sample.icf 0
|
|
59
|
+
python -m bio2zarr vcf2zarr dexplode-partition sample.icf 1
|
|
60
|
+
python -m bio2zarr vcf2zarr dexplode-partition sample.icf 2
|
|
61
|
+
python -m bio2zarr vcf2zarr dexplode-finalise sample.icf
|
|
62
|
+
- name: Run distributed encode example
|
|
63
|
+
run: |
|
|
64
|
+
python -m bio2zarr vcf2zarr dencode-init sample.icf sample.vcz -fn 3 --variants-chunk-size=3
|
|
65
|
+
python -m bio2zarr vcf2zarr dencode-partition sample.vcz 0
|
|
66
|
+
python -m bio2zarr vcf2zarr dencode-partition sample.vcz 1
|
|
67
|
+
python -m bio2zarr vcf2zarr dencode-partition sample.vcz 2
|
|
68
|
+
python -m bio2zarr vcf2zarr dencode-finalise sample.vcz
|
|
69
|
+
- name: Run tests
|
|
70
|
+
run: |
|
|
71
|
+
pytest --cov=bio2zarr
|
|
72
|
+
- name: Upload coverage to Coveralls
|
|
73
|
+
uses: coverallsapp/github-action@v2.3.0
|
|
74
|
+
with:
|
|
75
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
76
|
+
# The first coveralls upload will succeed and others seem to fail now.
|
|
77
|
+
# This is a quick workaround for doing a proper "parallel" setup:
|
|
78
|
+
# https://github.com/coverallsapp/github-action
|
|
79
|
+
fail-on-error: false
|
|
80
|
+
|
|
81
|
+
packaging:
|
|
82
|
+
name: Packaging
|
|
83
|
+
runs-on: ubuntu-latest
|
|
84
|
+
steps:
|
|
85
|
+
- uses: actions/checkout@v4
|
|
86
|
+
- uses: actions/setup-python@v5
|
|
87
|
+
with:
|
|
88
|
+
python-version: '3.11'
|
|
89
|
+
- name: Install dependencies
|
|
90
|
+
run: |
|
|
91
|
+
python -m pip install --upgrade pip
|
|
92
|
+
python -m pip install build twine validate-pyproject[all]
|
|
93
|
+
- name: Check and install package
|
|
94
|
+
run: |
|
|
95
|
+
validate-pyproject pyproject.toml
|
|
96
|
+
python -m build
|
|
97
|
+
python -m twine check --strict dist/*
|
|
98
|
+
python -m pip install dist/*.whl
|
|
99
|
+
- name: Check vcf2zarr CLI
|
|
100
|
+
run: |
|
|
101
|
+
vcf2zarr --help
|
|
102
|
+
python -m bio2zarr vcf2zarr --help
|
|
103
|
+
- name: Check vcfpartition CLI
|
|
104
|
+
run: |
|
|
105
|
+
vcfpartition --help
|
|
106
|
+
python -m bio2zarr vcfpartition --help
|
|
107
|
+
|
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
name:
|
|
1
|
+
name: Docs
|
|
2
2
|
|
|
3
3
|
on:
|
|
4
4
|
pull_request:
|
|
5
5
|
push:
|
|
6
|
-
branches:
|
|
6
|
+
branches:
|
|
7
|
+
- main
|
|
7
8
|
tags:
|
|
8
9
|
- '*'
|
|
9
10
|
|
|
10
11
|
jobs:
|
|
11
12
|
build-docs:
|
|
12
|
-
name:
|
|
13
|
+
name: Build
|
|
13
14
|
runs-on: ubuntu-latest
|
|
14
15
|
steps:
|
|
15
16
|
- name: Cancel Previous Runs
|
|
@@ -28,6 +29,15 @@ jobs:
|
|
|
28
29
|
run: |
|
|
29
30
|
pip install --upgrade pip wheel
|
|
30
31
|
pip install -r docs/requirements.txt
|
|
32
|
+
python3 -m bash_kernel.install
|
|
33
|
+
|
|
34
|
+
- name: Install bcftools
|
|
35
|
+
run: |
|
|
36
|
+
sudo apt-get install bcftools jq
|
|
37
|
+
|
|
38
|
+
- name: Install package
|
|
39
|
+
run: |
|
|
40
|
+
python3 -m pip install .
|
|
31
41
|
|
|
32
42
|
- name: Build Docs
|
|
33
43
|
run: |
|
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
# 0.1.0 2024-06-10
|
|
2
|
+
|
|
3
|
+
- Initial production-ready version.
|
|
4
|
+
- Add -Q/--no-progress flag to CLI
|
|
5
|
+
- Change num-partitions argument in dexplode-init and dencode-init
|
|
6
|
+
to a named option.
|
|
7
|
+
|
|
8
|
+
# 0.0.10 2024-05-15
|
|
9
|
+
- Change output format of dexplode-init and dencode-init
|
|
10
|
+
- Bugfix for mac progress, and change of multiprocessing startup strategy.
|
|
11
|
+
|
|
1
12
|
# 0.0.9 2024-05-02
|
|
2
13
|
|
|
3
14
|
- Change on-disk format for explode and schema
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bio2zarr
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Convert bioinformatics data to Zarr
|
|
5
5
|
Author-email: sgkit Developers <project@sgkit.dev>
|
|
6
6
|
License: Apache License
|
|
@@ -206,10 +206,13 @@ License: Apache License
|
|
|
206
206
|
limitations under the License.
|
|
207
207
|
|
|
208
208
|
Project-URL: repository, https://github.com/sgkit-dev/bio2zarr
|
|
209
|
-
Project-URL: documentation, https://sgkit-dev.github.io/bio2zarr/
|
|
210
|
-
Classifier: Development Status ::
|
|
209
|
+
Project-URL: documentation, https://sgkit-dev.github.io/bio2zarr/
|
|
210
|
+
Classifier: Development Status :: 4 - Beta
|
|
211
211
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
212
|
-
Classifier: Operating System ::
|
|
212
|
+
Classifier: Operating System :: POSIX
|
|
213
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
214
|
+
Classifier: Operating System :: MacOS
|
|
215
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
213
216
|
Classifier: Intended Audience :: Science/Research
|
|
214
217
|
Classifier: Programming Language :: Python
|
|
215
218
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -238,126 +241,10 @@ Requires-Dist: sgkit>=0.8.0; extra == "dev"
|
|
|
238
241
|
Requires-Dist: tqdm; extra == "dev"
|
|
239
242
|
|
|
240
243
|
[](https://github.com/sgkit-dev/bio2zarr/actions/workflows/ci.yml)
|
|
244
|
+
[](https://coveralls.io/github/sgkit-dev/bio2zarr)
|
|
245
|
+
|
|
241
246
|
|
|
242
247
|
# bio2zarr
|
|
243
248
|
Convert bioinformatics file formats to Zarr
|
|
244
249
|
|
|
245
|
-
|
|
246
|
-
[sgkit vcf-zarr specification](https://github.com/pystatgen/vcf-zarr-spec/)
|
|
247
|
-
|
|
248
|
-
**This is early alpha-status code: everything is subject to change,
|
|
249
|
-
and it has not been thoroughly tested**
|
|
250
|
-
|
|
251
|
-
## Install
|
|
252
|
-
|
|
253
|
-
```
|
|
254
|
-
$ python3 -m pip install bio2zarr
|
|
255
|
-
```
|
|
256
|
-
|
|
257
|
-
This will install the programs ``vcf2zarr``, ``plink2zarr`` and ``vcf_partition``
|
|
258
|
-
into your local Python path. You may need to update your $PATH to call the
|
|
259
|
-
executables directly.
|
|
260
|
-
|
|
261
|
-
Alternatively, calling
|
|
262
|
-
```
|
|
263
|
-
$ python3 -m bio2zarr vcf2zarr <args>
|
|
264
|
-
```
|
|
265
|
-
is equivalent to
|
|
266
|
-
|
|
267
|
-
```
|
|
268
|
-
$ vcf2zarr <args>
|
|
269
|
-
```
|
|
270
|
-
and will always work.
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
## vcf2zarr
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
Convert a VCF to zarr format:
|
|
277
|
-
|
|
278
|
-
```
|
|
279
|
-
$ vcf2zarr convert <VCF1> <VCF2> <zarr>
|
|
280
|
-
```
|
|
281
|
-
|
|
282
|
-
Converts the VCF to zarr format.
|
|
283
|
-
|
|
284
|
-
**Do not use this for anything but the smallest files**
|
|
285
|
-
|
|
286
|
-
The recommended approach is to use a multi-stage conversion
|
|
287
|
-
|
|
288
|
-
First, convert the VCF into the intermediate format:
|
|
289
|
-
|
|
290
|
-
```
|
|
291
|
-
vcf2zarr explode tests/data/vcf/sample.vcf.gz tmp/sample.exploded
|
|
292
|
-
```
|
|
293
|
-
|
|
294
|
-
Then, (optionally) inspect this representation to get a feel for your dataset
|
|
295
|
-
```
|
|
296
|
-
vcf2zarr inspect tmp/sample.exploded
|
|
297
|
-
```
|
|
298
|
-
|
|
299
|
-
Then, (optionally) generate a conversion schema to describe the corresponding
|
|
300
|
-
Zarr arrays:
|
|
301
|
-
|
|
302
|
-
```
|
|
303
|
-
vcf2zarr mkschema tmp/sample.exploded > sample.schema.json
|
|
304
|
-
```
|
|
305
|
-
|
|
306
|
-
View and edit the schema, deleting any columns you don't want, or tweaking
|
|
307
|
-
dtypes and compression settings to your taste.
|
|
308
|
-
|
|
309
|
-
Finally, encode to Zarr:
|
|
310
|
-
```
|
|
311
|
-
vcf2zarr encode tmp/sample.exploded tmp/sample.zarr -s sample.schema.json
|
|
312
|
-
```
|
|
313
|
-
|
|
314
|
-
Use the ``-p, --worker-processes`` argument to control the number of workers used
|
|
315
|
-
in the ``explode`` and ``encode`` phases.
|
|
316
|
-
|
|
317
|
-
### Shell completion
|
|
318
|
-
|
|
319
|
-
To enable shell completion for a particular session in Bash do:
|
|
320
|
-
|
|
321
|
-
```
|
|
322
|
-
eval "$(_VCF2ZARR_COMPLETE=bash_source vcf2zarr)"
|
|
323
|
-
```
|
|
324
|
-
|
|
325
|
-
If you add this to your ``.bashrc`` vcf2zarr shell completion should available
|
|
326
|
-
in all new shell sessions.
|
|
327
|
-
|
|
328
|
-
See the [Click documentation](https://click.palletsprojects.com/en/8.1.x/shell-completion/#enabling-completion)
|
|
329
|
-
for instructions on how to enable completion in other shells.
|
|
330
|
-
a
|
|
331
|
-
|
|
332
|
-
## plink2zarr
|
|
333
|
-
|
|
334
|
-
Convert a plink ``.bed`` file to zarr format. **This is incomplete**
|
|
335
|
-
|
|
336
|
-
## vcf_partition
|
|
337
|
-
|
|
338
|
-
Partition a given VCF file into (approximately) a give number of regions:
|
|
339
|
-
|
|
340
|
-
```
|
|
341
|
-
vcf_partition 20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.recalibrated_variants.vcf.gz -n 10
|
|
342
|
-
```
|
|
343
|
-
gives
|
|
344
|
-
```
|
|
345
|
-
chr20:1-6799360
|
|
346
|
-
chr20:6799361-14319616
|
|
347
|
-
chr20:14319617-21790720
|
|
348
|
-
chr20:21790721-28770304
|
|
349
|
-
chr20:28770305-31096832
|
|
350
|
-
chr20:31096833-38043648
|
|
351
|
-
chr20:38043649-45580288
|
|
352
|
-
chr20:45580289-52117504
|
|
353
|
-
chr20:52117505-58834944
|
|
354
|
-
chr20:58834945-
|
|
355
|
-
```
|
|
356
|
-
|
|
357
|
-
These reqion strings can then be used to split computation of the VCF
|
|
358
|
-
into chunks for parallelisation.
|
|
359
|
-
|
|
360
|
-
**TODO give a nice example here using xargs**
|
|
361
|
-
|
|
362
|
-
**WARNING that this does not take into account that indels may overlap
|
|
363
|
-
partitions and you may count variants twice or more if they do**
|
|
250
|
+
See the [documentation](https://sgkit-dev.github.io/bio2zarr/) for details.
|
bio2zarr-0.1.0/README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
[](https://github.com/sgkit-dev/bio2zarr/actions/workflows/ci.yml)
|
|
2
|
+
[](https://coveralls.io/github/sgkit-dev/bio2zarr)
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# bio2zarr
|
|
6
|
+
Convert bioinformatics file formats to Zarr
|
|
7
|
+
|
|
8
|
+
See the [documentation](https://sgkit-dev.github.io/bio2zarr/) for details.
|
|
@@ -14,9 +14,9 @@ def bio2zarr():
|
|
|
14
14
|
# install individual commands as console scripts. However, this
|
|
15
15
|
# is handy for development and for those whose PATHs aren't set
|
|
16
16
|
# up in the right way.
|
|
17
|
-
bio2zarr.add_command(cli.
|
|
17
|
+
bio2zarr.add_command(cli.vcf2zarr_main)
|
|
18
18
|
bio2zarr.add_command(cli.plink2zarr)
|
|
19
|
-
bio2zarr.add_command(cli.
|
|
19
|
+
bio2zarr.add_command(cli.vcfpartition)
|
|
20
20
|
|
|
21
21
|
if __name__ == "__main__":
|
|
22
22
|
bio2zarr()
|