Demultiplex 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- demultiplex-1.2.3/.github/ISSUE_TEMPLATE/Bug_report.md +28 -0
- demultiplex-1.2.3/.github/ISSUE_TEMPLATE/Feature_request.md +18 -0
- demultiplex-1.2.3/.github/ISSUE_TEMPLATE/Question.md +23 -0
- demultiplex-1.2.3/.github/PULL_REQUEST_TEMPLATE.md +40 -0
- demultiplex-1.2.3/.github/workflows/release.yml +30 -0
- demultiplex-1.2.3/.github/workflows/test.yml +36 -0
- demultiplex-1.2.3/.gitignore +7 -0
- demultiplex-1.2.3/.readthedocs.yaml +14 -0
- demultiplex-1.2.3/LICENSE.md +17 -0
- demultiplex-1.2.3/MANIFEST.in +1 -0
- demultiplex-1.2.3/PKG-INFO +66 -0
- demultiplex-1.2.3/README.rst +45 -0
- demultiplex-1.2.3/data/barcodes.tsv +4 -0
- demultiplex-1.2.3/data/demultiplex.fq +20 -0
- demultiplex-1.2.3/data/demultiplex.fq.bz2 +0 -0
- demultiplex-1.2.3/data/demultiplex.fq.gz +0 -0
- demultiplex-1.2.3/data/demultiplex_x.fq +20 -0
- demultiplex-1.2.3/data/matchcodes.tsv +4 -0
- demultiplex-1.2.3/demultiplex/__init__.py +28 -0
- demultiplex-1.2.3/demultiplex/cli.py +150 -0
- demultiplex-1.2.3/demultiplex/demultiplex.py +222 -0
- demultiplex-1.2.3/demultiplex/match.py +24 -0
- demultiplex-1.2.3/docs/CODE_OF_CONDUCT.md +66 -0
- demultiplex-1.2.3/docs/CONTRIBUTING.md +62 -0
- demultiplex-1.2.3/docs/cli.rst +7 -0
- demultiplex-1.2.3/docs/conf.py +13 -0
- demultiplex-1.2.3/docs/credits.rst +10 -0
- demultiplex-1.2.3/docs/faq.rst +52 -0
- demultiplex-1.2.3/docs/index.rst +11 -0
- demultiplex-1.2.3/docs/installation.rst +24 -0
- demultiplex-1.2.3/docs/requirements.txt +5 -0
- demultiplex-1.2.3/docs/usage.rst +221 -0
- demultiplex-1.2.3/pyproject.toml +25 -0
- demultiplex-1.2.3/tests/shared.py +36 -0
- demultiplex-1.2.3/tests/test_demultiplex.py +104 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Create a report to help us improve
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
**Describe the bug**
|
|
7
|
+
A clear and concise description of what the bug is.
|
|
8
|
+
|
|
9
|
+
**To Reproduce**
|
|
10
|
+
Steps to reproduce the behaviour:
|
|
11
|
+
|
|
12
|
+
1. Go to '...'
|
|
13
|
+
2. Click on '....'
|
|
14
|
+
3. Scroll down to '....'
|
|
15
|
+
4. See error
|
|
16
|
+
|
|
17
|
+
**Expected behavior**
|
|
18
|
+
A clear and concise description of what you expected to happen.
|
|
19
|
+
|
|
20
|
+
**Screenshots**
|
|
21
|
+
If applicable, add screenshots to help explain your problem.
|
|
22
|
+
|
|
23
|
+
**Desktop (please complete the following information):**
|
|
24
|
+
- OS: [e.g. Ubuntu Desktop 18.04]
|
|
25
|
+
- Version [e.g. 0.0.14]
|
|
26
|
+
|
|
27
|
+
**Additional context**
|
|
28
|
+
Add any other context about the problem here.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature request
|
|
3
|
+
about: Suggest an idea for this project
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
**Is your feature request related to a problem? Please describe.**
|
|
7
|
+
A clear and concise description of what the problem is. Ex. I'm always
|
|
8
|
+
frustrated when [...]
|
|
9
|
+
|
|
10
|
+
**Describe the solution you'd like**
|
|
11
|
+
A clear and concise description of what you want to happen.
|
|
12
|
+
|
|
13
|
+
**Describe alternatives you've considered**
|
|
14
|
+
A clear and concise description of any alternative solutions or features you've
|
|
15
|
+
considered.
|
|
16
|
+
|
|
17
|
+
**Additional context**
|
|
18
|
+
Add any other context or screenshots about the feature request here.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Question
|
|
3
|
+
about: Questions on how to use the program
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
**Before you start**
|
|
7
|
+
|
|
8
|
+
Please check the following resources:
|
|
9
|
+
|
|
10
|
+
- The [Frequently Asked Questions](https://demultiplex.readthedocs.io/en/latest/faq.html).
|
|
11
|
+
- Previously answered [questions](https://github.com/jfjlaros/demultiplex/issues?q=is%3Aissue+is%3Aclosed+label%3Aquestion).
|
|
12
|
+
|
|
13
|
+
**Asking a question**
|
|
14
|
+
|
|
15
|
+
Please explain what the goal is and describe which steps were taken to reach this goal.
|
|
16
|
+
|
|
17
|
+
If possible, provide a snippet of:
|
|
18
|
+
|
|
19
|
+
- The barcodes file.
|
|
20
|
+
- The FastQ or Fasta input file.
|
|
21
|
+
|
|
22
|
+
Most importantly, specify where the barcode is expected to be (in the header,
|
|
23
|
+
in the read, etc.).
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Submit a pull request
|
|
2
|
+
Thank you for submitting a pull request. To speed up the review process, please
|
|
3
|
+
ensure that everything below is true:
|
|
4
|
+
|
|
5
|
+
1. This is not a duplicate of an [existing pull request][1].
|
|
6
|
+
2. No existing features have been broken without good reason.
|
|
7
|
+
3. Your commit messages are detailed
|
|
8
|
+
4. The code style [guidelines][2] have been followed.
|
|
9
|
+
5. Documentation has been updated to reflect your changes.
|
|
10
|
+
6. Tests have been added or updated to reflect your changes.
|
|
11
|
+
7. All tests pass.
|
|
12
|
+
|
|
13
|
+
Any questions should be directed to @jfjlaros.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
Replace any ":question:" below with information about your pull request.
|
|
18
|
+
|
|
19
|
+
## Pull Request Details
|
|
20
|
+
Provide details about your pull request and what it adds, fixes, or changes.
|
|
21
|
+
|
|
22
|
+
:question:
|
|
23
|
+
|
|
24
|
+
## Breaking Changes
|
|
25
|
+
Describe what features are broken by this pull request and why, if any.
|
|
26
|
+
|
|
27
|
+
:question:
|
|
28
|
+
|
|
29
|
+
## Issues Fixed
|
|
30
|
+
Enter the issue numbers resolved by this pull request below, if any.
|
|
31
|
+
|
|
32
|
+
1. :question:
|
|
33
|
+
|
|
34
|
+
## Other Relevant Information
|
|
35
|
+
Provide any other important details below.
|
|
36
|
+
|
|
37
|
+
:question:
|
|
38
|
+
|
|
39
|
+
[1]: https://github.com/jfjlaros/demultiplex/pulls
|
|
40
|
+
[2]: https://github.com/jfjlaros/demultiplex/blob/master/docs/CONTRIBUTING.md#code-style
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
tags:
|
|
5
|
+
- "v*"
|
|
6
|
+
jobs:
|
|
7
|
+
build-and-publish:
|
|
8
|
+
runs-on: ubuntu-latest
|
|
9
|
+
permissions:
|
|
10
|
+
id-token: write
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v6
|
|
13
|
+
- uses: actions/setup-python@v6
|
|
14
|
+
with:
|
|
15
|
+
python-version: "3.13"
|
|
16
|
+
- name: Install build tools
|
|
17
|
+
run: |
|
|
18
|
+
python -m pip install --upgrade pip
|
|
19
|
+
pip install build
|
|
20
|
+
- name: Build package
|
|
21
|
+
run: python -m build
|
|
22
|
+
- name: Publish to PyPI
|
|
23
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
24
|
+
github-release:
|
|
25
|
+
needs: build-and-publish
|
|
26
|
+
runs-on: ubuntu-latest
|
|
27
|
+
permissions:
|
|
28
|
+
contents: write
|
|
29
|
+
steps:
|
|
30
|
+
- uses: softprops/action-gh-release@v3
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Test
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches:
|
|
5
|
+
- master
|
|
6
|
+
pull_request:
|
|
7
|
+
branches:
|
|
8
|
+
- master
|
|
9
|
+
schedule:
|
|
10
|
+
- cron: "34 12 * * *"
|
|
11
|
+
jobs:
|
|
12
|
+
build:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
strategy:
|
|
15
|
+
matrix:
|
|
16
|
+
python-version:
|
|
17
|
+
- "3.11"
|
|
18
|
+
- "3.12"
|
|
19
|
+
- "3.13"
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v6
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v6
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: |
|
|
28
|
+
pip install --upgrade pip
|
|
29
|
+
pip install flake8 pytest .
|
|
30
|
+
- name: Lint with flake8
|
|
31
|
+
run: |
|
|
32
|
+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
|
33
|
+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
|
34
|
+
- name: Test with pytest
|
|
35
|
+
run: |
|
|
36
|
+
pytest
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
2
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
3
|
+
the Software without restriction, including without limitation the rights to
|
|
4
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
5
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
6
|
+
so, subject to the following conditions:
|
|
7
|
+
|
|
8
|
+
The above copyright notice and this permission notice shall be included in all
|
|
9
|
+
copies or substantial portions of the Software.
|
|
10
|
+
|
|
11
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
12
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
13
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
14
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
15
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
16
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
17
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include demultiplex/setup.cfg
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: Demultiplex
|
|
3
|
+
Version: 1.2.3
|
|
4
|
+
Summary: Demultiplex any number of FASTA or a FASTQ files based on a list of barcodes.
|
|
5
|
+
Project-URL: homepage, https://github.com/jfjlaros/demultiplex
|
|
6
|
+
Author-email: "Jeroen F.J. Laros" <jlaros@fixedpoint.nl>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE.md
|
|
9
|
+
Keywords: FASTA,FASTQ,barcode,bioinformatics,demultiplex
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering
|
|
15
|
+
Requires-Dist: biopython
|
|
16
|
+
Requires-Dist: dict-trie
|
|
17
|
+
Requires-Dist: fastools
|
|
18
|
+
Requires-Dist: jit-open
|
|
19
|
+
Requires-Dist: tssv
|
|
20
|
+
Description-Content-Type: text/x-rst
|
|
21
|
+
|
|
22
|
+
Demultiplex: FASTA/FASTQ demultiplexer
|
|
23
|
+
======================================
|
|
24
|
+
|
|
25
|
+
.. image:: https://img.shields.io/github/last-commit/jfjlaros/demultiplex.svg
|
|
26
|
+
:target: https://github.com/jfjlaros/demultiplex/graphs/commit-activity
|
|
27
|
+
.. image:: https://github.com/jfjlaros/demultiplex/actions/workflows/test.yml/badge.svg
|
|
28
|
+
:target: https://github.com/jfjlaros/demultiplex/actions/workflows/test.yml
|
|
29
|
+
.. image:: https://readthedocs.org/projects/demultiplex/badge/?version=latest
|
|
30
|
+
:target: https://demultiplex.readthedocs.io/en/latest
|
|
31
|
+
.. image:: https://img.shields.io/github/release-date/jfjlaros/demultiplex.svg
|
|
32
|
+
:target: https://github.com/jfjlaros/demultiplex/releases
|
|
33
|
+
.. image:: https://img.shields.io/github/release/jfjlaros/demultiplex.svg
|
|
34
|
+
:target: https://github.com/jfjlaros/demultiplex/releases
|
|
35
|
+
.. image:: https://img.shields.io/pypi/v/demultiplex.svg
|
|
36
|
+
:target: https://pypi.org/project/demultiplex/
|
|
37
|
+
.. image:: https://img.shields.io/github/languages/code-size/jfjlaros/demultiplex.svg
|
|
38
|
+
:target: https://github.com/jfjlaros/demultiplex
|
|
39
|
+
.. image:: https://img.shields.io/github/languages/count/jfjlaros/demultiplex.svg
|
|
40
|
+
:target: https://github.com/jfjlaros/demultiplex
|
|
41
|
+
.. image:: https://img.shields.io/github/languages/top/jfjlaros/demultiplex.svg
|
|
42
|
+
:target: https://github.com/jfjlaros/demultiplex
|
|
43
|
+
.. image:: https://img.shields.io/github/license/jfjlaros/demultiplex.svg
|
|
44
|
+
:target: https://raw.githubusercontent.com/jfjlaros/demultiplex/master/LICENSE.md
|
|
45
|
+
.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.8362959.svg
|
|
46
|
+
:target: https://zenodo.org/record/8362959
|
|
47
|
+
|
|
48
|
+
----
|
|
49
|
+
|
|
50
|
+
Versatile NGS demultiplexer with the following features:
|
|
51
|
+
|
|
52
|
+
- Support for FASTA and FASTQ files.
|
|
53
|
+
- Support for gzip and bzip2 compressed files.
|
|
54
|
+
- Support for multiple reads per fragment, e.g., paired-end.
|
|
55
|
+
- Handles barcodes in the header and in the reads.
|
|
56
|
+
- Handles barcodes at *unknown* locations in reads (e.g., PacBio or Nanopore
|
|
57
|
+
barcodes).
|
|
58
|
+
- Support for selection of part of a barcode.
|
|
59
|
+
- Allows for mismatches, insertions and deletions.
|
|
60
|
+
- Barcode guessing by frequency or fixed amount.
|
|
61
|
+
- Handles large numbers (over one million) of barcodes.
|
|
62
|
+
|
|
63
|
+
Please see ReadTheDocs_ for the latest documentation.
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
.. _ReadTheDocs: https://demultiplex.readthedocs.io/en/latest/index.html
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
Demultiplex: FASTA/FASTQ demultiplexer
|
|
2
|
+
======================================
|
|
3
|
+
|
|
4
|
+
.. image:: https://img.shields.io/github/last-commit/jfjlaros/demultiplex.svg
|
|
5
|
+
:target: https://github.com/jfjlaros/demultiplex/graphs/commit-activity
|
|
6
|
+
.. image:: https://github.com/jfjlaros/demultiplex/actions/workflows/test.yml/badge.svg
|
|
7
|
+
:target: https://github.com/jfjlaros/demultiplex/actions/workflows/test.yml
|
|
8
|
+
.. image:: https://readthedocs.org/projects/demultiplex/badge/?version=latest
|
|
9
|
+
:target: https://demultiplex.readthedocs.io/en/latest
|
|
10
|
+
.. image:: https://img.shields.io/github/release-date/jfjlaros/demultiplex.svg
|
|
11
|
+
:target: https://github.com/jfjlaros/demultiplex/releases
|
|
12
|
+
.. image:: https://img.shields.io/github/release/jfjlaros/demultiplex.svg
|
|
13
|
+
:target: https://github.com/jfjlaros/demultiplex/releases
|
|
14
|
+
.. image:: https://img.shields.io/pypi/v/demultiplex.svg
|
|
15
|
+
:target: https://pypi.org/project/demultiplex/
|
|
16
|
+
.. image:: https://img.shields.io/github/languages/code-size/jfjlaros/demultiplex.svg
|
|
17
|
+
:target: https://github.com/jfjlaros/demultiplex
|
|
18
|
+
.. image:: https://img.shields.io/github/languages/count/jfjlaros/demultiplex.svg
|
|
19
|
+
:target: https://github.com/jfjlaros/demultiplex
|
|
20
|
+
.. image:: https://img.shields.io/github/languages/top/jfjlaros/demultiplex.svg
|
|
21
|
+
:target: https://github.com/jfjlaros/demultiplex
|
|
22
|
+
.. image:: https://img.shields.io/github/license/jfjlaros/demultiplex.svg
|
|
23
|
+
:target: https://raw.githubusercontent.com/jfjlaros/demultiplex/master/LICENSE.md
|
|
24
|
+
.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.8362959.svg
|
|
25
|
+
:target: https://zenodo.org/record/8362959
|
|
26
|
+
|
|
27
|
+
----
|
|
28
|
+
|
|
29
|
+
Versatile NGS demultiplexer with the following features:
|
|
30
|
+
|
|
31
|
+
- Support for FASTA and FASTQ files.
|
|
32
|
+
- Support for gzip and bzip2 compressed files.
|
|
33
|
+
- Support for multiple reads per fragment, e.g., paired-end.
|
|
34
|
+
- Handles barcodes in the header and in the reads.
|
|
35
|
+
- Handles barcodes at *unknown* locations in reads (e.g., PacBio or Nanopore
|
|
36
|
+
barcodes).
|
|
37
|
+
- Support for selection of part of a barcode.
|
|
38
|
+
- Allows for mismatches, insertions and deletions.
|
|
39
|
+
- Barcode guessing by frequency or fixed amount.
|
|
40
|
+
- Handles large numbers (over one million) of barcodes.
|
|
41
|
+
|
|
42
|
+
Please see ReadTheDocs_ for the latest documentation.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
.. _ReadTheDocs: https://demultiplex.readthedocs.io/en/latest/index.html
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
@HWI-ABCDE:0:0:0000:0000#ACTA/1
|
|
2
|
+
AAAAATATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
3
|
+
+
|
|
4
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
5
|
+
@HWI-ABCDE:0:0:0000:0000#ACC/1
|
|
6
|
+
ACCCCTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
7
|
+
+
|
|
8
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
9
|
+
@HWI-ABCDE:0:0:0000:0000#ACTG/1
|
|
10
|
+
AGGGGTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
11
|
+
+
|
|
12
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
13
|
+
@HWI-ABCDE:0:0:0000:0000#ACTT/1
|
|
14
|
+
ATTTTTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
15
|
+
+
|
|
16
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
17
|
+
@HWI-ABCDE:0:0:0000:0000#ACTT/1
|
|
18
|
+
ATTTTTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
19
|
+
+
|
|
20
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
@HWI-M00000:00:000000000-ABCDE:0:0000:00000:0000 1:N:0:ACTA
|
|
2
|
+
AGTAGTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
3
|
+
+
|
|
4
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
5
|
+
@HWI-M00000:00:000000000-ABCDE:0:0000:00000:0000 1:N:0:ACTC
|
|
6
|
+
AGTAGTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
7
|
+
+
|
|
8
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
9
|
+
@HWI-M00000:00:000000000-ABCDE:0:0000:00000:0000 1:N:0:ACTG
|
|
10
|
+
AGTAGTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
11
|
+
+
|
|
12
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
13
|
+
@HWI-M00000:00:000000000-ABCDE:0:0000:00000:0000 1:N:0:ACTT
|
|
14
|
+
AGTAGTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
15
|
+
+
|
|
16
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
17
|
+
@HWI-M00000:00:000000000-ABCDE:0:0000:00000:0000 1:N:0:ACTT
|
|
18
|
+
AGTAGTATGACCTAGATCAACCGGAGAGTGTATTGGTAAGCCCGCGATTTCATGCCCGTG
|
|
19
|
+
+
|
|
20
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from importlib.metadata import PackageNotFoundError, metadata
|
|
2
|
+
from re import split
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
from .demultiplex import Extractor, count, demultiplex
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _extract(key: str, delim: str = r'[^\s\S]', index: int = 0) -> str:
|
|
9
|
+
try:
|
|
10
|
+
value = metadata(__package__).get(key, '')
|
|
11
|
+
except PackageNotFoundError:
|
|
12
|
+
return '<NO DATA>'
|
|
13
|
+
return split(delim, value)[index]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def doc_split(func: Callable) -> str:
|
|
17
|
+
return func.__doc__.split('\n\n')[0]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_project = _extract('Name')
|
|
21
|
+
_version = _extract('Version')
|
|
22
|
+
_year = '2013-2026'
|
|
23
|
+
_author = _extract('Author')
|
|
24
|
+
_email = _extract('Author-email')
|
|
25
|
+
_description = _extract('Summary')
|
|
26
|
+
_copyright = f'Copyright (c) {_year} by {_author} <{_email}>'
|
|
27
|
+
_url = _extract('Project-URL')
|
|
28
|
+
_info = f'{_project} version {_version}\n\n{_copyright}\nHomepage: {_url}'
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
|
2
|
+
from sys import stdin
|
|
3
|
+
|
|
4
|
+
from fastools import Peeker
|
|
5
|
+
|
|
6
|
+
from . import _copyright, _description, _info, doc_split
|
|
7
|
+
from .demultiplex import (
|
|
8
|
+
_get_barcode, _type_handler, Extractor, count, demultiplex, match)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _file_type(*args, **kwargs):
|
|
12
|
+
"""Argparse FileType replacement."""
|
|
13
|
+
def _open(name):
|
|
14
|
+
return _type_handler[name.split('.')[-1]](name, *args, **kwargs)
|
|
15
|
+
|
|
16
|
+
return _open
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def guess(
|
|
20
|
+
input_handle, output_handle, in_read, fmt, start, end, sample_size,
|
|
21
|
+
threshold, use_freq):
|
|
22
|
+
"""Retrieve the most frequent barcodes."""
|
|
23
|
+
extractor = Extractor(input_handle, in_read, fmt, start, end)
|
|
24
|
+
barcodes = count(input_handle, extractor, sample_size, threshold, use_freq)
|
|
25
|
+
|
|
26
|
+
for i, barcode in enumerate(sorted(barcodes)):
|
|
27
|
+
output_handle.write('{} {}\n'.format(i + 1, barcode))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def demux(
|
|
31
|
+
input_handles, barcodes_handle, in_read, fmt, start, end, mismatch,
|
|
32
|
+
use_edit, path='.'):
|
|
33
|
+
"""Demultiplex any number of files given a list of barcodes."""
|
|
34
|
+
extractor = Extractor(input_handles[0], in_read, fmt, start, end)
|
|
35
|
+
demultiplex(
|
|
36
|
+
input_handles, barcodes_handle, extractor, mismatch, use_edit, path)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def bcmatch(
|
|
40
|
+
input_handles, barcodes_handle, mismatch, use_edit, path='.',
|
|
41
|
+
filter_multiple=False, directional=False):
|
|
42
|
+
"""Demultiplex one file given a list of barcode tuples."""
|
|
43
|
+
match(
|
|
44
|
+
input_handles, barcodes_handle, mismatch, use_edit, path,
|
|
45
|
+
filter_multiple, directional)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _arg_parser() -> object:
|
|
49
|
+
"""Command line argument parsing."""
|
|
50
|
+
common_parser = ArgumentParser(add_help=False)
|
|
51
|
+
common_parser.add_argument(
|
|
52
|
+
'-r', dest='in_read', action='store_true',
|
|
53
|
+
help='extract the barcodes from the read')
|
|
54
|
+
common_parser.add_argument(
|
|
55
|
+
'--format', dest='fmt', default=None, choices=_get_barcode.keys(),
|
|
56
|
+
help='provdide the header format')
|
|
57
|
+
common_parser.add_argument(
|
|
58
|
+
'-s', dest='start', type=int, default=None,
|
|
59
|
+
help='start of the selection')
|
|
60
|
+
common_parser.add_argument(
|
|
61
|
+
'-e', dest='end', type=int, default=None, help='end of the selection')
|
|
62
|
+
|
|
63
|
+
common_options_parser = ArgumentParser(add_help=False)
|
|
64
|
+
common_options_parser.add_argument(
|
|
65
|
+
'-m', dest='mismatch', type=int, default=1,
|
|
66
|
+
help='number of mismatches')
|
|
67
|
+
common_options_parser.add_argument(
|
|
68
|
+
'-d', dest='use_edit', action='store_true',
|
|
69
|
+
help='use Levenshtein distance')
|
|
70
|
+
common_options_parser.add_argument(
|
|
71
|
+
'-p', dest='path', type=str, default='.', help='output directory')
|
|
72
|
+
|
|
73
|
+
input_parser = ArgumentParser(add_help=False)
|
|
74
|
+
input_parser.add_argument(
|
|
75
|
+
'barcodes_handle', metavar='BARCODES', type=_file_type('rt'),
|
|
76
|
+
help='barcodes file')
|
|
77
|
+
input_parser.add_argument(
|
|
78
|
+
'input_handles', metavar='INPUT', nargs='+', type=_file_type('rt'),
|
|
79
|
+
help='input files')
|
|
80
|
+
|
|
81
|
+
parser = ArgumentParser(
|
|
82
|
+
formatter_class=ArgumentDefaultsHelpFormatter, description=_description,
|
|
83
|
+
epilog=_copyright)
|
|
84
|
+
parser.add_argument('-v', action='version', version=_info)
|
|
85
|
+
subparsers = parser.add_subparsers(dest='subcommand')
|
|
86
|
+
subparsers.required = True
|
|
87
|
+
|
|
88
|
+
subparser = subparsers.add_parser(
|
|
89
|
+
'guess', formatter_class=ArgumentDefaultsHelpFormatter,
|
|
90
|
+
parents=[common_parser], description=doc_split(guess))
|
|
91
|
+
subparser.add_argument(
|
|
92
|
+
'input_handle', metavar='INPUT', type=_file_type('rt'),
|
|
93
|
+
help='input file')
|
|
94
|
+
subparser.add_argument(
|
|
95
|
+
'-o', dest='output_handle', metavar='OUTPUT', type=_file_type('wt'),
|
|
96
|
+
default='-', help='output file')
|
|
97
|
+
subparser.add_argument(
|
|
98
|
+
'-n', dest='sample_size', type=int, default=1000000,
|
|
99
|
+
help='sample size')
|
|
100
|
+
subparser.add_argument(
|
|
101
|
+
'-f', dest='use_freq', action='store_true',
|
|
102
|
+
help='select on frequency instead of a fixed amount')
|
|
103
|
+
subparser.add_argument(
|
|
104
|
+
'-t', dest='threshold', type=int, default=12,
|
|
105
|
+
help='threshold for the selection method')
|
|
106
|
+
subparser.set_defaults(func=guess)
|
|
107
|
+
|
|
108
|
+
subparser = subparsers.add_parser(
|
|
109
|
+
'demux', formatter_class=ArgumentDefaultsHelpFormatter,
|
|
110
|
+
parents=[common_parser, common_options_parser, input_parser],
|
|
111
|
+
description=doc_split(demux))
|
|
112
|
+
subparser.set_defaults(func=demux)
|
|
113
|
+
|
|
114
|
+
subparser = subparsers.add_parser(
|
|
115
|
+
'match', formatter_class=ArgumentDefaultsHelpFormatter,
|
|
116
|
+
parents=[common_options_parser, input_parser],
|
|
117
|
+
description=doc_split(bcmatch))
|
|
118
|
+
subparser.add_argument(
|
|
119
|
+
'-f', dest='filter_multiple', default=False, action='store_true',
|
|
120
|
+
help='write multiple matches to separate files')
|
|
121
|
+
subparser.add_argument(
|
|
122
|
+
'-D', dest='directional', default=False, action='store_true',
|
|
123
|
+
help='directional input data')
|
|
124
|
+
subparser.set_defaults(func=bcmatch)
|
|
125
|
+
|
|
126
|
+
return parser
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def main():
|
|
130
|
+
"""Main entry point."""
|
|
131
|
+
parser = _arg_parser()
|
|
132
|
+
|
|
133
|
+
global stdin
|
|
134
|
+
stdin = Peeker(stdin)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
args = parser.parse_args()
|
|
138
|
+
except IOError as error:
|
|
139
|
+
parser.error(error)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
args.func(
|
|
143
|
+
**{k: v for k, v in vars(args).items()
|
|
144
|
+
if k not in ('func', 'subcommand')})
|
|
145
|
+
except (ValueError, OSError) as error:
|
|
146
|
+
parser.error(error)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
if __name__ == '__main__':
|
|
150
|
+
main()
|