edentity 1.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. edentity-1.4.3/.gitignore +29 -0
  2. edentity-1.4.3/.gitlab-ci.yml +137 -0
  3. edentity-1.4.3/.pre-commit-config.yaml +8 -0
  4. edentity-1.4.3/Dockerfile +77 -0
  5. edentity-1.4.3/LICENSE.md +201 -0
  6. edentity-1.4.3/MANIFEST.in +33 -0
  7. edentity-1.4.3/PKG-INFO +159 -0
  8. edentity-1.4.3/README.md +145 -0
  9. edentity-1.4.3/dag.png +0 -0
  10. edentity-1.4.3/edentity/__init__.py +0 -0
  11. edentity-1.4.3/edentity/__main__.py +114 -0
  12. edentity-1.4.3/edentity/_version.py +21 -0
  13. edentity-1.4.3/edentity/utils/__init__.py +0 -0
  14. edentity-1.4.3/edentity/utils/configs.py +75 -0
  15. edentity-1.4.3/edentity/workflow/Dockerfile +30 -0
  16. edentity-1.4.3/edentity/workflow/Snakefile +120 -0
  17. edentity-1.4.3/edentity/workflow/rules/chimera.smk +14 -0
  18. edentity-1.4.3/edentity/workflow/rules/denoise.smk +15 -0
  19. edentity-1.4.3/edentity/workflow/rules/dereplication.smk +14 -0
  20. edentity-1.4.3/edentity/workflow/rules/filter.smk +14 -0
  21. edentity-1.4.3/edentity/workflow/rules/merge.smk +81 -0
  22. edentity-1.4.3/edentity/workflow/rules/search_exact.smk +69 -0
  23. edentity-1.4.3/edentity/workflow/rules/trimming.smk +14 -0
  24. edentity-1.4.3/edentity/workflow/scripts/chimera.py +78 -0
  25. edentity-1.4.3/edentity/workflow/scripts/custom_multiqc_module.py +46 -0
  26. edentity-1.4.3/edentity/workflow/scripts/denoise.py +72 -0
  27. edentity-1.4.3/edentity/workflow/scripts/dereplication.py +63 -0
  28. edentity-1.4.3/edentity/workflow/scripts/esv_table.py +291 -0
  29. edentity-1.4.3/edentity/workflow/scripts/filter.py +69 -0
  30. edentity-1.4.3/edentity/workflow/scripts/merge.py +89 -0
  31. edentity-1.4.3/edentity/workflow/scripts/primerHandler.py +150 -0
  32. edentity-1.4.3/edentity/workflow/scripts/runtime.py +48 -0
  33. edentity-1.4.3/edentity/workflow/scripts/search_exact.py +54 -0
  34. edentity-1.4.3/edentity/workflow/scripts/trimming.py +133 -0
  35. edentity-1.4.3/edentity-galaxy-pipeline.xml +229 -0
  36. edentity-1.4.3/edentity.egg-info/SOURCES.txt +37 -0
  37. edentity-1.4.3/environment.yaml +24 -0
  38. edentity-1.4.3/galaxy_client.py +238 -0
  39. edentity-1.4.3/pyproject.toml +56 -0
  40. edentity-1.4.3/setup.cfg +4 -0
@@ -0,0 +1,29 @@
1
+
2
+ #ignore snakemake logs
3
+ workflow/.snakemake/
4
+ .snakemake/
5
+
6
+ #ignore reports
7
+ workflow/reports/
8
+
9
+ #ignore logs
10
+ workflow/logs
11
+
12
+ #ignore notebooks for now
13
+ workflow/notebooks
14
+ workflow/scripts/samples.ipynb
15
+
16
+ #test data output
17
+ tests/test
18
+ tests/vsearch
19
+
20
+ # ignore apptainer
21
+ .cache/
22
+ .conda/
23
+ .condarc
24
+ .DS_Store
25
+ !tests/data/
26
+
27
+ # ignore galaxy tests
28
+ GalaxyTest/
29
+ __pycache__/
@@ -0,0 +1,137 @@
1
+
2
+ variables:
3
+ DOCKER_IMAGE: "$CI_REGISTRY_IMAGE:v0.1.0"
4
+ ARTIFACT_PATH: "./tests/" # path to where snakemake outputs are expected
5
+ SNAKEMAKE_ENV: "conda-envs/snakemake/" # would not recommend changing this
6
+ SNAKEMAKE_VERSION: "8.16.0"
7
+ GALAXY_ROOT: "tempGalaxy/" # path to where temp galaxy will be installed; change this to absolute path to galaxy root
8
+
9
+
10
+
11
+ # two images will always be pubslished: one with actual version tag and another with same digest but tagged latest :)
12
+ default:
13
+ # image: $DOCKER_IMAGE
14
+ tags:
15
+ - eDentity-runner # tag the runner to be used for this pipeline
16
+
17
+ # list pipeline stages
18
+ stages:
19
+ - prepare
20
+ - build
21
+ - linting
22
+ - test
23
+ - publish
24
+
25
+ # Default rule for all jobs
26
+ .common_rule: &common_rule
27
+ rules:
28
+ - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
29
+ when: always
30
+ - if: '$CI_COMMIT_BRANCH == "main"'
31
+ when: always
32
+ - if: $CI_COMMIT_TAG
33
+ when: always
34
+
35
+ #create snakemake conda env and preserve the environment for the next stages
36
+ install-dependencies:
37
+ stage: prepare
38
+ <<: *common_rule
39
+ script:
40
+
41
+ # create the prefix path in case conda decides not to create
42
+ - mkdir -p $CI_PROJECT_DIR/$SNAKEMAKE_ENV
43
+ - mkdir -p $CI_PROJECT_DIR/conda-envs/unit-tests/
44
+ - mkdir -p $CI_PROJECT_DIR/conda-envs/pypi-build-and-publish/
45
+
46
+ # create conda envs for running snakemake and unittests
47
+ - conda env create -f environment.yaml --prefix $CI_PROJECT_DIR/$SNAKEMAKE_ENV
48
+ - conda create --prefix $CI_PROJECT_DIR/conda-envs/unit-tests/ -c conda-forge flake8 pytest pandas -y
49
+
50
+ # create conda env for building and publishing pypackage
51
+ - conda create --prefix $CI_PROJECT_DIR/conda-envs/pypi-build-and-publish/ -c conda-forge python=3.10 pip twine setuptools setuptools_scm[toml] build wheel -y
52
+
53
+ # preserve the envs created so that next stages can call up
54
+ artifacts:
55
+ paths:
56
+ - ./conda-envs
57
+ expire_in: 1 week
58
+
59
+
60
+ # # Linting (ignoring "line too long (E127)"" and "continuation line over-indented for visual indent (E501)")
61
+ # # couldnt quite fix these 2 linting errors so ignore them for now
62
+ # linting:
63
+ # stage: linting
64
+ # <<: *common_rule
65
+ # script:
66
+ # - conda run -p $CI_PROJECT_DIR/conda-envs/unit-tests/ flake8 --ignore=E127,E501,E999 edentity/workflow/scripts/
67
+
68
+
69
+ build-pypackage:
70
+ stage: build
71
+ script:
72
+ - echo "Building Python package..."
73
+ - eval "$(conda shell.bash hook)"
74
+ - conda activate $CI_PROJECT_DIR/conda-envs/pypi-build-and-publish
75
+ - python -m build
76
+ rules:
77
+ - if: $CI_COMMIT_TAG
78
+ artifacts:
79
+ paths:
80
+ - dist/
81
+ expire_in: 1 week
82
+ needs:
83
+ - install-dependencies
84
+
85
+ test-edentity-package:
86
+ stage: test
87
+ <<: *common_rule
88
+ script:
89
+ - eval "$(conda shell.bash hook)"
90
+ - conda activate edentity-test-environment # use the build env; install other requirements of edentity package in the conda env
91
+ - pip install -e . # install the package in editable mode
92
+ - edentity --raw_data_dir tests/data --forward_primer AAACTCGTGCCAGCCACC --reverse_primer GGGTATCTAATCCCAGTTTG --work_dir tests/vsearch
93
+ artifacts:
94
+ paths:
95
+ - $ARTIFACT_PATH
96
+ expire_in: 1 week
97
+ needs:
98
+ - build-pypackage
99
+
100
+ publish:
101
+ stage: publish
102
+ script:
103
+ - echo "Publishing package to PyPI..."
104
+ - eval "$(conda shell.bash hook)"
105
+ - conda activate $CI_PROJECT_DIR/conda-envs/pypi-build-and-publish
106
+ - TWINE_PASSWORD=${PYPI_API_TOKEN} TWINE_USERNAME=__token__ python -m twine upload --skip-existing dist/*
107
+ rules:
108
+ - if: $CI_COMMIT_TAG
109
+ needs:
110
+ - build-pypackage
111
+ - install-dependencies
112
+
113
+
114
+
115
+
116
+
117
+ # test galaxy pipeline THIS part is now done on nbitk galaxy client.
118
+ # this will run the galaxy pipeline and uses diff command to compare
119
+ # the output with the expected output
120
+ # Ensure the expected output is in the test-data folder
121
+ # and that it was generated with the same params as defined in the test section of tool gaxalaxy xml
122
+ # test-galaxy:
123
+ # stage: test-galaxy
124
+ # <<: *common_rule
125
+ # script:
126
+ # - eval "$(conda shell.bash hook)"
127
+ # - conda activate galaxyTest
128
+ # - planemo test --test_data=test-data edentity-galaxy-pipeline.xml
129
+ # artifacts:
130
+ # paths:
131
+ # - tool_test_output.html
132
+ # - tool_test_output.json
133
+ # expire_in: 1 week
134
+ #TO ADD:
135
+ # 1. unit test the ESV table output content. Ideally compare the number of ESVs, the number of samples (columns), the names of the samples
136
+ # Could write a python script that has a predified ESV table and compare it to the output of the pipeline
137
+ #
@@ -0,0 +1,8 @@
1
+ # Requirements for running these pre-commit hooks
2
+ # pre-commit: pip install pre-commit
3
+
4
+ repos:
5
+ - repo: https://github.com/gitleaks/gitleaks
6
+ rev: v8.18.0
7
+ hooks:
8
+ - id: gitleaks
@@ -0,0 +1,77 @@
1
+ FROM condaforge/mambaforge:24.9.2-0
2
+ LABEL io.github.snakemake.containerized="true"
3
+ LABEL io.github.snakemake.conda_env_hash="428af8958ac834f4d5e69a3faf690bf39fa900031c1f165ab52b89a72457ebf4"
4
+
5
+ # Step 2: Retrieve conda environments
6
+
7
+ # Conda environment:
8
+ # source: workflow/envs/cutadapt.yaml
9
+ # prefix: /conda-envs/edf345db7c901e7d3dc3ca55e5af56e2
10
+ # priority: strict
11
+ # name: cutadapt
12
+ # channels:
13
+ # - conda-forge
14
+ # - bioconda
15
+ # - nodefaults
16
+ # dependencies:
17
+ # - pip
18
+ # - cutadapt=4.9
19
+ # - biopython=1.84
20
+ # - pip:
21
+ # - nbitk==0.3.2
22
+ RUN mkdir -p /conda-envs/edf345db7c901e7d3dc3ca55e5af56e2
23
+ COPY workflow/envs/cutadapt.yaml /conda-envs/edf345db7c901e7d3dc3ca55e5af56e2/environment.yaml
24
+
25
+ # Conda environment:
26
+ # source: workflow/envs/fastp.yaml
27
+ # prefix: /conda-envs/b588f57bfee906402cd48ab9480dae74
28
+ # priority: strict
29
+ # name: fastp
30
+ # channels:
31
+ # - conda-forge
32
+ # - bioconda
33
+ # - nodefaults
34
+ # dependencies:
35
+ # - fastp=0.24.0
36
+ RUN mkdir -p /conda-envs/b588f57bfee906402cd48ab9480dae74
37
+ COPY workflow/envs/fastp.yaml /conda-envs/b588f57bfee906402cd48ab9480dae74/environment.yaml
38
+
39
+ # Conda environment:
40
+ # source: workflow/envs/multiqc.yaml
41
+ # prefix: /conda-envs/54be30cf1928dd479025b6fec1bd55c4
42
+ # priority: strict
43
+ # name: multiqc
44
+ # channels:
45
+ # - conda-forge
46
+ # - bioconda
47
+ # - nodefaults
48
+ # dependencies:
49
+ # - multiqc=1.27.1
50
+ RUN mkdir -p /conda-envs/54be30cf1928dd479025b6fec1bd55c4
51
+ COPY workflow/envs/multiqc.yaml /conda-envs/54be30cf1928dd479025b6fec1bd55c4/environment.yaml
52
+
53
+ # Conda environment:
54
+ # source: workflow/envs/vsearch.yaml
55
+ # prefix: /conda-envs/1575a372c32dbdce868ad7f639c291bf
56
+ # priority: strict
57
+ # name: vsearch
58
+ # channels:
59
+ # - conda-forge
60
+ # - bioconda
61
+ # - nodefaults
62
+ # dependencies:
63
+ # - vsearch=2.28.1
64
+ # - pip
65
+ # - biopython=1.84
66
+ # - pip:
67
+ # - nbitk==0.3.2
68
+ RUN mkdir -p /conda-envs/1575a372c32dbdce868ad7f639c291bf
69
+ COPY workflow/envs/vsearch.yaml /conda-envs/1575a372c32dbdce868ad7f639c291bf/environment.yaml
70
+
71
+ # Step 3: Generate conda environments
72
+
73
+ RUN conda env create --prefix /conda-envs/edf345db7c901e7d3dc3ca55e5af56e2 --file /conda-envs/edf345db7c901e7d3dc3ca55e5af56e2/environment.yaml && \
74
+ conda env create --prefix /conda-envs/b588f57bfee906402cd48ab9480dae74 --file /conda-envs/b588f57bfee906402cd48ab9480dae74/environment.yaml && \
75
+ conda env create --prefix /conda-envs/54be30cf1928dd479025b6fec1bd55c4 --file /conda-envs/54be30cf1928dd479025b6fec1bd55c4/environment.yaml && \
76
+ conda env create --prefix /conda-envs/1575a372c32dbdce868ad7f639c291bf --file /conda-envs/1575a372c32dbdce868ad7f639c291bf/environment.yaml && \
77
+ conda clean --all -y
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2024 Rutger Vos
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -0,0 +1,33 @@
1
+ # Include Python modules
2
+ recursive-include edentity *.py
3
+
4
+ # Include only necessary workflow files
5
+ include edentity/workflow/Snakefile
6
+ recursive-include edentity/workflow/rules *.smk
7
+ recursive-include edentity/workflow/scripts *.py
8
+
9
+ # Include configuration files
10
+ include pyproject.toml
11
+ include README.md
12
+ include LICENSE.md
13
+ include .gitignore
14
+
15
+ # exclude unnecessary files
16
+ prune edentity/configs
17
+ prune edentity/resources
18
+ prune edentity/workflow/profiles
19
+ prune *.egg-info
20
+
21
+ # prune git directories
22
+ prune .git
23
+
24
+ # prune pycharm directories
25
+ prune **/.idea
26
+ # prune test directories
27
+ prune tests
28
+ prune test-data
29
+
30
+ prune build
31
+ prune dist
32
+ prune __pycache__
33
+ global-exclude *.pyc
@@ -0,0 +1,159 @@
1
+ Metadata-Version: 2.4
2
+ Name: edentity
3
+ Version: 1.4.3
4
+ Summary: A Snakemake-based pipeline for amplicon processing
5
+ Author-email: Luka Lenaroto <luka.lenaroto@naturalis.nl>, Pierre-Etienne Cholley <pierreetienne.cholley@naturalis.nl>, Dick Groenenberg <dick.groenenberg@naturalis.nl>, Edwin den Haas <edwin.denhaas@naturalis.nl>, Dominika Kresa <dominika.kresa@naturalis.nl>, Rutger Vos <rutger.vos@naturalis.nl>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE.md
11
+ Requires-Dist: snakemake>=8.16.0
12
+ Requires-Dist: polars>=1.30.0
13
+ Dynamic: license-file
14
+
15
+ # edentity-metabarcoding-pipeline
16
+
17
+ ![alt text](dag.png)
18
+
19
+ ## Table of Contents
20
+
21
+ - [Brief on Vsearch](#brief-on-vsearch)
22
+ - [Usage of this workflow](#usage-of-this-workflow)
23
+ - [Using Conda](#using-conda)
24
+ - [Install Requirements](#install-requirements)
25
+ - [Install conda or miniconda](#install-conda-or-miniconda)
26
+ - [Steps to run edentity-metabarcoding-pipeline](#steps-to-run-edentity-metabarcoding-pipeline)
27
+ - [Using Docker](#using-docker)
28
+ - [Install Requirements](#install-requirements-1)
29
+ - [Deploying to Galaxy](#deploying-to-galaxy)
30
+ - [Prerequisites](#prerequisites)
31
+ - [Steps to Deploy](#steps-to-deploy)
32
+
33
+ # Brief on Vsearch
34
+
35
+ Vsearch is a metabarcoding pipeline for illumina/AVITI paired-end data. More details can be found at [vsearch github](https://github.com/torognes/vsearch)
36
+
37
+
38
+ Vsearch publication: https://doi.org/10.7717/peerj.2584
39
+
40
+ Technical implementation of this pipeline is inspired by [APSCALE]( https://doi.org/10.1093/bioinformatics/btac588); please cite them if you use this pipeline.
41
+
42
+ # Usage of this workflow
43
+
44
+ This workflow can run on:
45
+
46
+ - Conda
47
+
48
+ - Docker
49
+
50
+ - Galaxy
51
+
52
+
53
+ ## Using Conda
54
+
55
+ ### Install Requirements
56
+
57
+ #### Install conda or miniconda
58
+ Ensure (mini)conda is installed on your system. Information on installing miniconda can be found [here](https://docs.anaconda.com/miniconda/)
59
+
60
+
61
+ #### Steps to run edentity-metabarcoding-pipeline
62
+
63
+ ###### 1 Clone this repo
64
+
65
+ ```
66
+ git clone https://gitlab.com/naturalis/bii/bioinformatics/edentity/pipelines/edentity-metabarcoding-pipeline.git && cd edentity-metabarcoding-pipeline/
67
+
68
+ ```
69
+
70
+ ###### 2 Install snakemake conda environment from yaml file
71
+ ```
72
+ conda env create -n snakemake -f workflow/envs/snakemake.yaml
73
+ ```
74
+
75
+
76
+ ###### 3 Activate snakemake conda environment
77
+
78
+ ```
79
+ conda activate snakemake
80
+ ```
81
+
82
+
83
+ ###### 4 Run the workflow: parameters used here are only for example; replace them with params specific to your project.
84
+
85
+ ```
86
+ snakemake -p --profile workflow/profile/ \
87
+ --config forward_primer=AAACTCGTGCCAGCCACC \
88
+ reverse_primer=GGGTATCTAATCCCAGTTTG \
89
+ raw_data_dir=/path/to/your/raw_data/ \
90
+ work_dir=/path/to/your/work_directory \
91
+ min_length=200 max_length=600
92
+ ```
93
+
94
+ Explain parameters and where more info can be found. Link to validation schema.
95
+
96
+ ## Using Docker
97
+
98
+ ### Install Requirements
99
+
100
+ ###### 1. Apptainer:
101
+ Install [apptainer](https://apptainer.org/docs/user/latest/quick_start.html#installation)
102
+
103
+
104
+
105
+ ###### 2. Run the workflow:
106
+
107
+ ```
108
+ snakemake -p --profile workflow/profile/ \
109
+ --config forward_primer=AAACTCGTGCCAGCCACC \
110
+ reverse_primer=GGGTATCTAATCCCAGTTTG \
111
+ raw_data_dir=/path/to/your/raw_data/ \
112
+ work_dir=/path/to/your/work_directory \
113
+ min_length=200 max_length=600 --use-apptainer
114
+ ```
115
+
116
+ ## Deploying to Galaxy
117
+
118
+ ### Prerequisites
119
+
120
+ Ensure you have access to a Galaxy instance where you have administrative privileges or the ability to install tools and workflows.
121
+
122
+ ### Steps to Deploy
123
+
124
+ ###### 1. Clone the Galaxy branch of this repository
125
+ Clone galaxy branch of this pipeline into your Galaxy tools directory (for Naturalis clone into: `/data/galaxy/local_tools/`)
126
+
127
+ ```
128
+
129
+ git clone -b galaxy git@gitlab.com:naturalis/bii/bioinformatics/edentity/pipelines/edentity-metabarcoding-pipeline.git
130
+
131
+ ```
132
+
133
+ ###### 2. Configure Galaxy tools xml
134
+ Edit your Galaxy tool configuration xml file to include the `edentity-galaxy-pipeline.xml` file located at the root directory of this repository.
135
+
136
+ For example to add this pipeline to your galaxy instance: Open `galaxy/config/tool_config.xml` and add the lines below.
137
+
138
+ ```
139
+ <section id="metabarcoding-pipeline" name="Metabarcoding Pipelines">
140
+ <tool file="edentity-metabarcoding-pipeline/edentity-galaxy-pipeline.xml"/>
141
+ </section>
142
+
143
+ ```
144
+
145
+ NB:
146
+ - Ensure you paste the above lines within `<toolbox> </toolbox>` section in the `galaxy/config/tool_config.xml`
147
+ - Ensure paths are correctly referenced depending on where you cloned the pipeline
148
+ - Some useful tips on adding custom tools on galaxy can be found [here](https://galaxyproject.org/admin/tools/add-tool-tutorial/)
149
+
150
+
151
+ ###### 3. Restart Galaxy
152
+
153
+ Restart your Galaxy instance to load the new tool configuration.
154
+
155
+
156
+ ###### 4. Running the pipeline on Galaxy:
157
+ Example on how to run this pipeline on Galaxy is available [here](https://gitlab.com/naturalis/bii/bioinformatics/edentity/pipelines/edentity-metabarcoding-pipeline/-/wikis/eDentity-metabarcoding-pipeline)
158
+
159
+