DAJIN2 0.4.1__zip → 0.4.3__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {DAJIN2-0.4.1/src/DAJIN2.egg-info → DAJIN2-0.4.3}/PKG-INFO +41 -36
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/README.md +30 -23
- DAJIN2-0.4.3/requirements.txt +20 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/setup.py +1 -1
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/label_merger.py +20 -16
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/consensus.py +3 -2
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/name_handler.py +1 -7
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/core.py +20 -123
- DAJIN2-0.4.3/src/DAJIN2/core/preprocess/__init__.py +9 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/genome_fetcher.py +11 -3
- DAJIN2-0.4.3/src/DAJIN2/core/preprocess/input_formatter.py +109 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/mapping.py +4 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/midsv_caller.py +3 -4
- DAJIN2-0.4.3/src/DAJIN2/core/report/__init__.py +3 -0
- DAJIN2-0.4.1/src/DAJIN2/core/report/report_bam.py → DAJIN2-0.4.3/src/DAJIN2/core/report/bam_exporter.py +64 -50
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/main.py +1 -1
- DAJIN2-0.4.3/src/DAJIN2/utils/fastx_handler.py +94 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/input_validator.py +32 -21
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/io.py +6 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/sam_handler.py +1 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3/src/DAJIN2.egg-info}/PKG-INFO +41 -36
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/SOURCES.txt +5 -5
- DAJIN2-0.4.3/src/DAJIN2.egg-info/requires.txt +16 -0
- DAJIN2-0.4.1/requirements.txt +0 -23
- DAJIN2-0.4.1/src/DAJIN2/core/preprocess/__init__.py +0 -12
- DAJIN2-0.4.1/src/DAJIN2/core/preprocess/fastx_parser.py +0 -59
- DAJIN2-0.4.1/src/DAJIN2/core/report/__init__.py +0 -3
- DAJIN2-0.4.1/src/DAJIN2/utils/fastx_handler.py +0 -42
- DAJIN2-0.4.1/src/DAJIN2.egg-info/requires.txt +0 -18
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/LICENSE +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/MANIFEST.in +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/setup.cfg +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/__init__.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/__init__.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/classification/__init__.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/classification/allele_merger.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/classification/classifier.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/__init__.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/appender.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/clustering.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/kmer_generator.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/label_extractor.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/label_updator.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/score_handler.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/strand_bias_handler.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/__init__.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/clust_formatter.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/mutation_extractor.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/similarity_searcher.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/cache_checker.py +0 -0
- /DAJIN2-0.4.1/src/DAJIN2/core/preprocess/directories.py → /DAJIN2-0.4.3/src/DAJIN2/core/preprocess/directory_manager.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/homopolymer_handler.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/insertions_to_fasta.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/knockin_handler.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/mutation_extractor.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/core/report/insertion_reflector.py +0 -0
- /DAJIN2-0.4.1/src/DAJIN2/core/report/report_mutation.py → /DAJIN2-0.4.3/src/DAJIN2/core/report/mutation_exporter.py +0 -0
- /DAJIN2-0.4.1/src/DAJIN2/core/report/report_files.py → /DAJIN2-0.4.3/src/DAJIN2/core/report/sequence_exporter.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/gui.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/static/css/style.css +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/template_igvjs.html +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/templates/index.html +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/config.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/cssplits_handler.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/dna_handler.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/multiprocess.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/utils/report_generator.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2/view.py +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/dependency_links.txt +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/entry_points.txt +0 -0
- {DAJIN2-0.4.1 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: DAJIN2
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: One-step genotyping tools for targeted long-read sequencing
|
|
5
5
|
Home-page: https://github.com/akikuno/DAJIN2
|
|
6
6
|
Author: Akihiro Kuno
|
|
@@ -14,24 +14,22 @@ Classifier: Intended Audience :: Science/Research
|
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
16
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: numpy>=1.
|
|
18
|
-
Requires-Dist: scipy>=1.
|
|
17
|
+
Requires-Dist: numpy>=1.24.0
|
|
18
|
+
Requires-Dist: scipy>=1.10.0
|
|
19
19
|
Requires-Dist: pandas>=1.0.0
|
|
20
|
-
Requires-Dist: openpyxl>=3.
|
|
21
|
-
Requires-Dist: rapidfuzz>=3.
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist: scikit-learn>=1.0.0
|
|
24
|
-
Requires-Dist: openpyxl>=3.0.0
|
|
20
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
21
|
+
Requires-Dist: rapidfuzz>=3.6.0
|
|
22
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
25
23
|
Requires-Dist: mappy>=2.24
|
|
26
|
-
Requires-Dist: pysam>=0.
|
|
24
|
+
Requires-Dist: pysam>=0.21.0
|
|
27
25
|
Requires-Dist: Flask>=2.2.0
|
|
28
26
|
Requires-Dist: waitress>=2.1.0
|
|
29
27
|
Requires-Dist: Jinja2>=3.1.0
|
|
30
|
-
Requires-Dist: plotly>=5.
|
|
28
|
+
Requires-Dist: plotly>=5.19.0
|
|
31
29
|
Requires-Dist: kaleido>=0.2.0
|
|
32
|
-
Requires-Dist: cstag>=0.
|
|
33
|
-
Requires-Dist: midsv>=0.
|
|
34
|
-
Requires-Dist: wslPath>=0.
|
|
30
|
+
Requires-Dist: cstag>=1.0.0
|
|
31
|
+
Requires-Dist: midsv>=0.11.0
|
|
32
|
+
Requires-Dist: wslPath>=0.4.1
|
|
35
33
|
|
|
36
34
|
[](https://choosealicense.com/licenses/mit/)
|
|
37
35
|
[](https://github.com/akikuno/dajin2/actions)
|
|
@@ -56,14 +54,14 @@ The name DAJIN is derived from the phrase 一網**打尽** (Ichimou **DAJIN** or
|
|
|
56
54
|
+ **Comprehensive Mutation Detection**: Equipped with the capability to detect genome editing events over a wide range, it can identify a broad spectrum of mutations, from small changes to large structural variations.
|
|
57
55
|
+ DAJIN2 is also possible to detect complex mutations characteristic of genome editing, such as "insertions occurring in regions where deletions have occurred."
|
|
58
56
|
+ **Intuitive Visualization**: The outcomes of genome editing are visualized intuitively, allowing for the rapid and easy identification and analysis of mutations.
|
|
59
|
-
+ **Multi-Sample Compatibility**:
|
|
57
|
+
+ **Multi-Sample Compatibility**: Enabling parallel processing of multiple samples. This facilitates efficient progression of large-scale experiments and comparative studies.
|
|
60
58
|
|
|
61
59
|
|
|
62
60
|
## 🛠 Installation
|
|
63
61
|
|
|
64
62
|
### Prerequisites
|
|
65
63
|
|
|
66
|
-
- Python 3.
|
|
64
|
+
- Python 3.8 or later
|
|
67
65
|
- Unix-like environment (Linux, macOS, WSL2, etc.)
|
|
68
66
|
|
|
69
67
|
### From [Bioconda](https://anaconda.org/bioconda/DAJIN2) (Recommended)
|
|
@@ -80,6 +78,7 @@ conda activate env-dajin2
|
|
|
80
78
|
> CONDA_SUBDIR=osx-64 conda create -n env-dajin2 -c conda-forge -c bioconda python=3.10 DAJIN2 -y
|
|
81
79
|
> conda activate env-dajin2
|
|
82
80
|
> conda config --env --set subdir osx-64
|
|
81
|
+
> python -c "import platform; print(platform.machine())" # Confirm that the output is 'x86_64', not 'arm64'
|
|
83
82
|
> ```
|
|
84
83
|
|
|
85
84
|
### From [PyPI](https://pypi.org/project/DAJIN2/)
|
|
@@ -92,7 +91,7 @@ pip install DAJIN2
|
|
|
92
91
|
> If you encounter any issues during the installation, please refer to the [Troubleshooting Guide](https://github.com/akikuno/DAJIN2/blob/main/docs/TROUBLESHOOTING.md)
|
|
93
92
|
|
|
94
93
|
|
|
95
|
-
##
|
|
94
|
+
## 💻 Usage
|
|
96
95
|
|
|
97
96
|
### Required Files
|
|
98
97
|
|
|
@@ -126,11 +125,11 @@ Assuming barcode01 as the control and barcode02 as the sample, specify each dire
|
|
|
126
125
|
The FASTA file should contain descriptions of the alleles anticipated as a result of genome editing.
|
|
127
126
|
|
|
128
127
|
> [!IMPORTANT]
|
|
129
|
-
>
|
|
128
|
+
> **A header name >control and its sequence are mandatory.**
|
|
130
129
|
|
|
131
130
|
If there are anticipated alleles (e.g., knock-ins or knock-outs), include their sequences in the FASTA file too. These anticipated alleles can be named arbitrarily.
|
|
132
131
|
|
|
133
|
-
Below is
|
|
132
|
+
Below is an example of a FASTA file:
|
|
134
133
|
|
|
135
134
|
```text
|
|
136
135
|
>control
|
|
@@ -166,12 +165,17 @@ Options:
|
|
|
166
165
|
#### Example
|
|
167
166
|
|
|
168
167
|
```bash
|
|
168
|
+
# Download example dataset
|
|
169
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_single.tar.gz
|
|
170
|
+
tar -xf example_single.tar.gz
|
|
171
|
+
|
|
172
|
+
# Run DAJIN2
|
|
169
173
|
DAJIN2 \
|
|
170
|
-
--control
|
|
171
|
-
--sample
|
|
172
|
-
--allele
|
|
173
|
-
--name
|
|
174
|
-
--genome
|
|
174
|
+
--control example_single/control \
|
|
175
|
+
--sample example_single/sample \
|
|
176
|
+
--allele example_single/stx2_deletion.fa \
|
|
177
|
+
--name stx2_deletion \
|
|
178
|
+
--genome mm39 \
|
|
175
179
|
--threads 4
|
|
176
180
|
```
|
|
177
181
|
|
|
@@ -208,7 +212,6 @@ DAJIN2 \
|
|
|
208
212
|
|
|
209
213
|
By using the `batch` subcommand, you can process multiple FASTQ files simultaneously.
|
|
210
214
|
For this purpose, a CSV or Excel file consolidating the sample information is required.
|
|
211
|
-
<!-- For a specific example, please refer to [this link](https://github.com/akikuno/DAJIN2/blob/main/examples/example-batch/batch.csv). -->
|
|
212
215
|
|
|
213
216
|
> [!NOTE]
|
|
214
217
|
> For guidance on how to compile sample information, please refer to [this document](https://docs.google.com/presentation/d/e/2PACX-1vSMEmXJPG2TNjfT66XZJRzqJd82aAqO5gJrdEzyhn15YBBr_Li-j5puOgVChYf3jA/embed?start=false&loop=false&delayms=3000).
|
|
@@ -226,13 +229,18 @@ options:
|
|
|
226
229
|
#### Example
|
|
227
230
|
|
|
228
231
|
```bash
|
|
229
|
-
|
|
232
|
+
# Donwload the example dataset
|
|
233
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
234
|
+
tar -xf example_batch.tar.gz
|
|
235
|
+
|
|
236
|
+
# Run DAJIN2
|
|
237
|
+
DAJIN2 batch --file example_batch/batch.csv --threads 4
|
|
230
238
|
```
|
|
231
239
|
|
|
232
240
|
<!-- ```bash
|
|
233
241
|
# Donwload the example dataset
|
|
234
|
-
wget https://github.com/akikuno/DAJIN2/raw/main/examples/
|
|
235
|
-
tar -xf
|
|
242
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
243
|
+
tar -xf example_batch.tar.gz
|
|
236
244
|
|
|
237
245
|
# Run DAJIN2
|
|
238
246
|
DAJIN2 batch --file example-batch/batch.csv --threads 3
|
|
@@ -313,16 +321,17 @@ For example, Tyr point mutation is highlighted in **green**.
|
|
|
313
321
|
### 3. MUTATION_INFO
|
|
314
322
|
|
|
315
323
|
The MUTATION_INFO directory saves tables depicting mutation sites for each allele.
|
|
316
|
-
An example of a Tyr point mutation is described by its position on the chromosome and the type of mutation.
|
|
324
|
+
An example of a *Tyr* point mutation is described by its position on the chromosome and the type of mutation.
|
|
317
325
|
|
|
318
326
|
<img src="https://user-images.githubusercontent.com/15861316/274519342-a613490d-5dbb-4a27-a2cf-bca0686b30f0.png" width="75%">
|
|
319
327
|
|
|
320
|
-
### 4. read_plot.html and read_plot.pdf
|
|
328
|
+
### 4. resd_summary.xlsx, read_plot.html and read_plot.pdf
|
|
321
329
|
|
|
330
|
+
read_summary.xlsx describes the number of reads and presence proportion for each allele.
|
|
322
331
|
Both read_plot.html and read_plot.pdf illustrate the proportions of each allele.
|
|
323
|
-
The chart's **Allele type** indicates the type of allele, and **Percent of reads** shows the proportion of reads for
|
|
332
|
+
The chart's **Allele type** indicates the type of allele, and **Percent of reads** shows the proportion of reads for each allele.
|
|
324
333
|
|
|
325
|
-
|
|
334
|
+
The **Allele type** includes:
|
|
326
335
|
- **Intact**: Alleles that perfectly match the input FASTA allele.
|
|
327
336
|
- **Indels**: Substitutions, deletions, insertions, or inversions within 50 bases.
|
|
328
337
|
- **SV**: Substitutions, deletions, insertions, or inversions beyond 50 bases.
|
|
@@ -333,14 +342,10 @@ Additionally, the types of **Allele type** include:
|
|
|
333
342
|
> In PCR amplicon sequencing, the % of reads might not match the actual allele proportions due to amplification bias.
|
|
334
343
|
> Especially when large deletions are present, the deletion alleles might be significantly amplified, potentially not reflecting the actual allele proportions.
|
|
335
344
|
|
|
336
|
-
### 5. read_summary.xlsx
|
|
337
|
-
|
|
338
|
-
- read_summary.xlsx: Describes the number of reads and presence proportion for each allele.
|
|
339
|
-
|
|
340
345
|
## 📣Feedback and Support
|
|
341
346
|
|
|
342
347
|
For questions, bug reports, or other forms of feedback, we'd love to hear from you!
|
|
343
|
-
Please use [GitHub Issues](https://github.com/akikuno/DAJIN2/issues) for all reporting purposes.
|
|
348
|
+
Please use [GitHub Issues](https://github.com/akikuno/DAJIN2/issues/new/choose) for all reporting purposes.
|
|
344
349
|
|
|
345
350
|
Please refer to [CONTRIBUTING](https://github.com/akikuno/DAJIN2/blob/main/docs/CONTRIBUTING.md) for how to contribute and how to verify your contributions.
|
|
346
351
|
|
|
@@ -21,14 +21,14 @@ The name DAJIN is derived from the phrase 一網**打尽** (Ichimou **DAJIN** or
|
|
|
21
21
|
+ **Comprehensive Mutation Detection**: Equipped with the capability to detect genome editing events over a wide range, it can identify a broad spectrum of mutations, from small changes to large structural variations.
|
|
22
22
|
+ DAJIN2 is also possible to detect complex mutations characteristic of genome editing, such as "insertions occurring in regions where deletions have occurred."
|
|
23
23
|
+ **Intuitive Visualization**: The outcomes of genome editing are visualized intuitively, allowing for the rapid and easy identification and analysis of mutations.
|
|
24
|
-
+ **Multi-Sample Compatibility**:
|
|
24
|
+
+ **Multi-Sample Compatibility**: Enabling parallel processing of multiple samples. This facilitates efficient progression of large-scale experiments and comparative studies.
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
## 🛠 Installation
|
|
28
28
|
|
|
29
29
|
### Prerequisites
|
|
30
30
|
|
|
31
|
-
- Python 3.
|
|
31
|
+
- Python 3.8 or later
|
|
32
32
|
- Unix-like environment (Linux, macOS, WSL2, etc.)
|
|
33
33
|
|
|
34
34
|
### From [Bioconda](https://anaconda.org/bioconda/DAJIN2) (Recommended)
|
|
@@ -45,6 +45,7 @@ conda activate env-dajin2
|
|
|
45
45
|
> CONDA_SUBDIR=osx-64 conda create -n env-dajin2 -c conda-forge -c bioconda python=3.10 DAJIN2 -y
|
|
46
46
|
> conda activate env-dajin2
|
|
47
47
|
> conda config --env --set subdir osx-64
|
|
48
|
+
> python -c "import platform; print(platform.machine())" # Confirm that the output is 'x86_64', not 'arm64'
|
|
48
49
|
> ```
|
|
49
50
|
|
|
50
51
|
### From [PyPI](https://pypi.org/project/DAJIN2/)
|
|
@@ -57,7 +58,7 @@ pip install DAJIN2
|
|
|
57
58
|
> If you encounter any issues during the installation, please refer to the [Troubleshooting Guide](https://github.com/akikuno/DAJIN2/blob/main/docs/TROUBLESHOOTING.md)
|
|
58
59
|
|
|
59
60
|
|
|
60
|
-
##
|
|
61
|
+
## 💻 Usage
|
|
61
62
|
|
|
62
63
|
### Required Files
|
|
63
64
|
|
|
@@ -91,11 +92,11 @@ Assuming barcode01 as the control and barcode02 as the sample, specify each dire
|
|
|
91
92
|
The FASTA file should contain descriptions of the alleles anticipated as a result of genome editing.
|
|
92
93
|
|
|
93
94
|
> [!IMPORTANT]
|
|
94
|
-
>
|
|
95
|
+
> **A header name >control and its sequence are mandatory.**
|
|
95
96
|
|
|
96
97
|
If there are anticipated alleles (e.g., knock-ins or knock-outs), include their sequences in the FASTA file too. These anticipated alleles can be named arbitrarily.
|
|
97
98
|
|
|
98
|
-
Below is
|
|
99
|
+
Below is an example of a FASTA file:
|
|
99
100
|
|
|
100
101
|
```text
|
|
101
102
|
>control
|
|
@@ -131,12 +132,17 @@ Options:
|
|
|
131
132
|
#### Example
|
|
132
133
|
|
|
133
134
|
```bash
|
|
135
|
+
# Download example dataset
|
|
136
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_single.tar.gz
|
|
137
|
+
tar -xf example_single.tar.gz
|
|
138
|
+
|
|
139
|
+
# Run DAJIN2
|
|
134
140
|
DAJIN2 \
|
|
135
|
-
--control
|
|
136
|
-
--sample
|
|
137
|
-
--allele
|
|
138
|
-
--name
|
|
139
|
-
--genome
|
|
141
|
+
--control example_single/control \
|
|
142
|
+
--sample example_single/sample \
|
|
143
|
+
--allele example_single/stx2_deletion.fa \
|
|
144
|
+
--name stx2_deletion \
|
|
145
|
+
--genome mm39 \
|
|
140
146
|
--threads 4
|
|
141
147
|
```
|
|
142
148
|
|
|
@@ -173,7 +179,6 @@ DAJIN2 \
|
|
|
173
179
|
|
|
174
180
|
By using the `batch` subcommand, you can process multiple FASTQ files simultaneously.
|
|
175
181
|
For this purpose, a CSV or Excel file consolidating the sample information is required.
|
|
176
|
-
<!-- For a specific example, please refer to [this link](https://github.com/akikuno/DAJIN2/blob/main/examples/example-batch/batch.csv). -->
|
|
177
182
|
|
|
178
183
|
> [!NOTE]
|
|
179
184
|
> For guidance on how to compile sample information, please refer to [this document](https://docs.google.com/presentation/d/e/2PACX-1vSMEmXJPG2TNjfT66XZJRzqJd82aAqO5gJrdEzyhn15YBBr_Li-j5puOgVChYf3jA/embed?start=false&loop=false&delayms=3000).
|
|
@@ -191,13 +196,18 @@ options:
|
|
|
191
196
|
#### Example
|
|
192
197
|
|
|
193
198
|
```bash
|
|
194
|
-
|
|
199
|
+
# Donwload the example dataset
|
|
200
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
201
|
+
tar -xf example_batch.tar.gz
|
|
202
|
+
|
|
203
|
+
# Run DAJIN2
|
|
204
|
+
DAJIN2 batch --file example_batch/batch.csv --threads 4
|
|
195
205
|
```
|
|
196
206
|
|
|
197
207
|
<!-- ```bash
|
|
198
208
|
# Donwload the example dataset
|
|
199
|
-
wget https://github.com/akikuno/DAJIN2/raw/main/examples/
|
|
200
|
-
tar -xf
|
|
209
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
210
|
+
tar -xf example_batch.tar.gz
|
|
201
211
|
|
|
202
212
|
# Run DAJIN2
|
|
203
213
|
DAJIN2 batch --file example-batch/batch.csv --threads 3
|
|
@@ -278,16 +288,17 @@ For example, Tyr point mutation is highlighted in **green**.
|
|
|
278
288
|
### 3. MUTATION_INFO
|
|
279
289
|
|
|
280
290
|
The MUTATION_INFO directory saves tables depicting mutation sites for each allele.
|
|
281
|
-
An example of a Tyr point mutation is described by its position on the chromosome and the type of mutation.
|
|
291
|
+
An example of a *Tyr* point mutation is described by its position on the chromosome and the type of mutation.
|
|
282
292
|
|
|
283
293
|
<img src="https://user-images.githubusercontent.com/15861316/274519342-a613490d-5dbb-4a27-a2cf-bca0686b30f0.png" width="75%">
|
|
284
294
|
|
|
285
|
-
### 4. read_plot.html and read_plot.pdf
|
|
295
|
+
### 4. resd_summary.xlsx, read_plot.html and read_plot.pdf
|
|
286
296
|
|
|
297
|
+
read_summary.xlsx describes the number of reads and presence proportion for each allele.
|
|
287
298
|
Both read_plot.html and read_plot.pdf illustrate the proportions of each allele.
|
|
288
|
-
The chart's **Allele type** indicates the type of allele, and **Percent of reads** shows the proportion of reads for
|
|
299
|
+
The chart's **Allele type** indicates the type of allele, and **Percent of reads** shows the proportion of reads for each allele.
|
|
289
300
|
|
|
290
|
-
|
|
301
|
+
The **Allele type** includes:
|
|
291
302
|
- **Intact**: Alleles that perfectly match the input FASTA allele.
|
|
292
303
|
- **Indels**: Substitutions, deletions, insertions, or inversions within 50 bases.
|
|
293
304
|
- **SV**: Substitutions, deletions, insertions, or inversions beyond 50 bases.
|
|
@@ -298,14 +309,10 @@ Additionally, the types of **Allele type** include:
|
|
|
298
309
|
> In PCR amplicon sequencing, the % of reads might not match the actual allele proportions due to amplification bias.
|
|
299
310
|
> Especially when large deletions are present, the deletion alleles might be significantly amplified, potentially not reflecting the actual allele proportions.
|
|
300
311
|
|
|
301
|
-
### 5. read_summary.xlsx
|
|
302
|
-
|
|
303
|
-
- read_summary.xlsx: Describes the number of reads and presence proportion for each allele.
|
|
304
|
-
|
|
305
312
|
## 📣Feedback and Support
|
|
306
313
|
|
|
307
314
|
For questions, bug reports, or other forms of feedback, we'd love to hear from you!
|
|
308
|
-
Please use [GitHub Issues](https://github.com/akikuno/DAJIN2/issues) for all reporting purposes.
|
|
315
|
+
Please use [GitHub Issues](https://github.com/akikuno/DAJIN2/issues/new/choose) for all reporting purposes.
|
|
309
316
|
|
|
310
317
|
Please refer to [CONTRIBUTING](https://github.com/akikuno/DAJIN2/blob/main/docs/CONTRIBUTING.md) for how to contribute and how to verify your contributions.
|
|
311
318
|
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
numpy >= 1.24.0
|
|
2
|
+
scipy >= 1.10.0
|
|
3
|
+
pandas >= 1.0.0
|
|
4
|
+
openpyxl >= 3.1.0
|
|
5
|
+
rapidfuzz >=3.6.0
|
|
6
|
+
scikit-learn >= 1.3.0
|
|
7
|
+
|
|
8
|
+
mappy >= 2.24
|
|
9
|
+
pysam >= 0.21.0
|
|
10
|
+
|
|
11
|
+
Flask >= 2.2.0
|
|
12
|
+
waitress >= 2.1.0
|
|
13
|
+
Jinja2 >= 3.1.0
|
|
14
|
+
|
|
15
|
+
plotly >= 5.19.0
|
|
16
|
+
kaleido >= 0.2.0
|
|
17
|
+
|
|
18
|
+
cstag >= 1.0.0
|
|
19
|
+
midsv >= 0.11.0
|
|
20
|
+
wslPath >=0.4.1
|
|
@@ -9,7 +9,7 @@ with open("requirements.txt") as requirements_file:
|
|
|
9
9
|
|
|
10
10
|
setuptools.setup(
|
|
11
11
|
name="DAJIN2",
|
|
12
|
-
version="0.4.
|
|
12
|
+
version="0.4.3",
|
|
13
13
|
author="Akihiro Kuno",
|
|
14
14
|
author_email="akuno@md.tsukuba.ac.jp",
|
|
15
15
|
description="One-step genotyping tools for targeted long-read sequencing",
|
|
@@ -11,20 +11,6 @@ def calculate_label_percentages(labels: list[int]) -> dict[int, float]:
|
|
|
11
11
|
return {label: (count / total_labels * 100) for label, count in label_counts.items()}
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def merge_mixed_cluster(labels_control: list[int], labels_sample: list[int], threshold: float = 0.5) -> list[int]:
|
|
15
|
-
"""Merge labels in sample if they appear more than 'threshold' percentage in control."""
|
|
16
|
-
labels_merged = labels_sample.copy()
|
|
17
|
-
label_percentages_control = calculate_label_percentages(labels_control)
|
|
18
|
-
mixed_labels = {label for label, percent in label_percentages_control.items() if percent > threshold}
|
|
19
|
-
|
|
20
|
-
new_label = max(labels_merged) + 1
|
|
21
|
-
for i, label in enumerate(labels_sample):
|
|
22
|
-
if label in mixed_labels:
|
|
23
|
-
labels_merged[i] = new_label
|
|
24
|
-
|
|
25
|
-
return labels_merged
|
|
26
|
-
|
|
27
|
-
|
|
28
14
|
def map_clusters_to_previous(labels_sample: list[int], labels_previous: list[int]) -> dict[int, int]:
|
|
29
15
|
"""
|
|
30
16
|
Determine which cluster in labels_previous corresponds to each cluster in labels_sample.
|
|
@@ -63,6 +49,8 @@ def merge_minor_cluster(
|
|
|
63
49
|
minor_labels_percentage = {label for label, percent in label_percentages.items() if percent < threshold_percentage}
|
|
64
50
|
minor_labels_readnumber = {label for label, num in Counter(labels_sample).items() if num <= threshold_readnumber}
|
|
65
51
|
minor_labels = minor_labels_percentage | minor_labels_readnumber
|
|
52
|
+
if minor_labels == set():
|
|
53
|
+
return labels_sample
|
|
66
54
|
|
|
67
55
|
correspondence = map_clusters_to_previous(labels_sample, labels_previous)
|
|
68
56
|
update_required_labels = get_update_required_labels(correspondence)
|
|
@@ -70,7 +58,23 @@ def merge_minor_cluster(
|
|
|
70
58
|
labels_merged = labels_sample.copy()
|
|
71
59
|
for m in minor_labels:
|
|
72
60
|
new_label = max(labels_merged) + 1
|
|
73
|
-
labels_merged = [
|
|
61
|
+
labels_merged = [
|
|
62
|
+
new_label if label in update_required_labels[correspondence[m]] else label for label in labels_merged
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
return labels_merged
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def merge_mixed_cluster(labels_control: list[int], labels_sample: list[int], threshold: float = 0.5) -> list[int]:
|
|
69
|
+
"""Merge labels in sample if they appear more than 'threshold' percentage in control."""
|
|
70
|
+
labels_merged = labels_sample.copy()
|
|
71
|
+
label_percentages_control = calculate_label_percentages(labels_control)
|
|
72
|
+
mixed_labels = {label for label, percent in label_percentages_control.items() if percent > threshold}
|
|
73
|
+
|
|
74
|
+
new_label = max(labels_merged) + 1
|
|
75
|
+
for i, label in enumerate(labels_sample):
|
|
76
|
+
if label in mixed_labels:
|
|
77
|
+
labels_merged[i] = new_label
|
|
74
78
|
|
|
75
79
|
return labels_merged
|
|
76
80
|
|
|
@@ -82,7 +86,7 @@ def merge_minor_cluster(
|
|
|
82
86
|
|
|
83
87
|
def merge_labels(labels_control: list[int], labels_sample: list[int], labels_previous: list[int]) -> list[int]:
|
|
84
88
|
labels_merged = merge_minor_cluster(
|
|
85
|
-
labels_sample, labels_previous, threshold_percentage=0.5, threshold_readnumber=
|
|
89
|
+
labels_sample, labels_previous, threshold_percentage=0.5, threshold_readnumber=5
|
|
86
90
|
)
|
|
87
91
|
labels_merged = merge_mixed_cluster(labels_control, labels_merged)
|
|
88
92
|
return labels_merged
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from
|
|
4
|
+
from dataclasses import dataclass
|
|
5
5
|
from itertools import groupby
|
|
6
6
|
from collections import defaultdict
|
|
7
7
|
|
|
@@ -90,7 +90,8 @@ def call_percentage(cssplits: list[list[str]], mutation_loci: list[set[str]]) ->
|
|
|
90
90
|
###########################################################
|
|
91
91
|
|
|
92
92
|
|
|
93
|
-
|
|
93
|
+
@dataclass(frozen=True)
|
|
94
|
+
class ConsensusKey:
|
|
94
95
|
allele: str
|
|
95
96
|
label: int
|
|
96
97
|
percent: float
|
|
@@ -1,13 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class ConsensusKey(NamedTuple):
|
|
8
|
-
allele: str
|
|
9
|
-
label: int
|
|
10
|
-
percent: float
|
|
4
|
+
from DAJIN2.core.consensus.consensus import ConsensusKey
|
|
11
5
|
|
|
12
6
|
|
|
13
7
|
def _detect_sv(cons_percentages: dict[ConsensusKey, list], threshold: int = 50) -> list[bool]:
|
|
@@ -2,119 +2,16 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import shutil
|
|
4
4
|
import logging
|
|
5
|
-
import uuid
|
|
6
5
|
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import NamedTuple
|
|
9
|
-
from collections import defaultdict
|
|
10
7
|
|
|
11
|
-
from DAJIN2.utils import io,
|
|
8
|
+
from DAJIN2.utils import io, fastx_handler
|
|
12
9
|
from DAJIN2.core import classification, clustering, consensus, preprocess, report
|
|
10
|
+
from DAJIN2.core.preprocess.input_formatter import FormattedInputs
|
|
13
11
|
|
|
14
12
|
logger = logging.getLogger(__name__)
|
|
15
13
|
|
|
16
14
|
|
|
17
|
-
def parse_arguments(arguments: dict) -> tuple:
|
|
18
|
-
genome_urls = defaultdict(str)
|
|
19
|
-
if arguments.get("genome"):
|
|
20
|
-
genome_urls.update(
|
|
21
|
-
{"genome": arguments["genome"], "blat": arguments["blat"], "goldenpath": arguments["goldenpath"]}
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
return (
|
|
25
|
-
arguments["sample"],
|
|
26
|
-
arguments["control"],
|
|
27
|
-
arguments["allele"],
|
|
28
|
-
arguments["name"],
|
|
29
|
-
arguments["threads"],
|
|
30
|
-
genome_urls,
|
|
31
|
-
uuid.uuid4().hex,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def convert_input_paths_to_posix(sample: str, control: str, allele: str) -> tuple:
|
|
36
|
-
sample = io.convert_to_posix(sample)
|
|
37
|
-
control = io.convert_to_posix(control)
|
|
38
|
-
allele = io.convert_to_posix(allele)
|
|
39
|
-
|
|
40
|
-
return sample, control, allele
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def create_temporal_directory(name: str, control_name: str) -> Path:
|
|
44
|
-
tempdir = Path(config.TEMP_ROOT_DIR, name)
|
|
45
|
-
Path(tempdir, "cache", ".igvjs", control_name).mkdir(parents=True, exist_ok=True)
|
|
46
|
-
|
|
47
|
-
return tempdir
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def check_caches(tempdir: Path, path_allele: str, genome_url: str) -> bool:
|
|
51
|
-
is_cache_hash = preprocess.cache_checker.exists_cached_hash(tempdir=tempdir, path=path_allele)
|
|
52
|
-
is_cache_genome = preprocess.cache_checker.exists_cached_genome(tempdir=tempdir, genome=genome_url)
|
|
53
|
-
|
|
54
|
-
return is_cache_hash and is_cache_genome
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def get_genome_coordinates(genome_urls: dict, fasta_alleles: dict, is_cache_genome: bool, tempdir: Path) -> dict:
|
|
58
|
-
genome_coordinates = {
|
|
59
|
-
"genome": genome_urls["genome"],
|
|
60
|
-
"chrom_size": 0,
|
|
61
|
-
"chrom": "control",
|
|
62
|
-
"start": 0,
|
|
63
|
-
"end": len(fasta_alleles["control"]) - 1,
|
|
64
|
-
"strand": "+",
|
|
65
|
-
}
|
|
66
|
-
if genome_urls["genome"]:
|
|
67
|
-
if is_cache_genome:
|
|
68
|
-
genome_coordinates = next(io.read_jsonl(Path(tempdir, "cache", "genome_coordinates.jsonl")))
|
|
69
|
-
else:
|
|
70
|
-
genome_coordinates = preprocess.genome_fetcher.fetch_coordinates(
|
|
71
|
-
genome_coordinates, genome_urls, fasta_alleles["control"]
|
|
72
|
-
)
|
|
73
|
-
genome_coordinates["chrom_size"] = preprocess.genome_fetcher.fetch_chromosome_size(
|
|
74
|
-
genome_coordinates, genome_urls
|
|
75
|
-
)
|
|
76
|
-
io.write_jsonl([genome_coordinates], Path(tempdir, "cache", "genome_coordinates.jsonl"))
|
|
77
|
-
|
|
78
|
-
return genome_coordinates
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class FormattedInputs(NamedTuple):
|
|
82
|
-
path_sample: str
|
|
83
|
-
path_control: str
|
|
84
|
-
path_allele: str
|
|
85
|
-
sample_name: str
|
|
86
|
-
control_name: str
|
|
87
|
-
fasta_alleles: dict[str, str]
|
|
88
|
-
tempdir: Path
|
|
89
|
-
genome_coordinates: dict[str, str]
|
|
90
|
-
threads: int
|
|
91
|
-
uuid: str
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def format_inputs(arguments: dict) -> FormattedInputs:
|
|
95
|
-
path_sample, path_control, path_allele, name, threads, genome_urls, uuid = parse_arguments(arguments)
|
|
96
|
-
path_sample, path_control, path_allele = convert_input_paths_to_posix(path_sample, path_control, path_allele)
|
|
97
|
-
sample_name = preprocess.fastx_parser.extract_basename(path_sample)
|
|
98
|
-
control_name = preprocess.fastx_parser.extract_basename(path_control)
|
|
99
|
-
fasta_alleles = preprocess.fastx_parser.dictionize_allele(path_allele)
|
|
100
|
-
tempdir = create_temporal_directory(name, control_name)
|
|
101
|
-
is_cache_genome = check_caches(tempdir, path_allele, genome_urls["genome"])
|
|
102
|
-
genome_coordinates = get_genome_coordinates(genome_urls, fasta_alleles, is_cache_genome, tempdir)
|
|
103
|
-
|
|
104
|
-
return FormattedInputs(
|
|
105
|
-
path_sample,
|
|
106
|
-
path_control,
|
|
107
|
-
path_allele,
|
|
108
|
-
sample_name,
|
|
109
|
-
control_name,
|
|
110
|
-
fasta_alleles,
|
|
111
|
-
tempdir,
|
|
112
|
-
genome_coordinates,
|
|
113
|
-
threads,
|
|
114
|
-
uuid,
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
|
|
118
15
|
###########################################################
|
|
119
16
|
# main
|
|
120
17
|
###########################################################
|
|
@@ -126,9 +23,9 @@ def execute_control(arguments: dict):
|
|
|
126
23
|
###########################################################
|
|
127
24
|
# Preprocess
|
|
128
25
|
###########################################################
|
|
129
|
-
ARGS = format_inputs(arguments)
|
|
130
|
-
preprocess.
|
|
131
|
-
preprocess.
|
|
26
|
+
ARGS: FormattedInputs = preprocess.format_inputs(arguments)
|
|
27
|
+
preprocess.create_temporal_directories(ARGS.tempdir, ARGS.control_name, is_control=True)
|
|
28
|
+
preprocess.create_report_directories(ARGS.tempdir, ARGS.control_name, is_control=True)
|
|
132
29
|
io.cache_control_hash(ARGS.tempdir, ARGS.path_allele)
|
|
133
30
|
|
|
134
31
|
###########################################################
|
|
@@ -151,7 +48,7 @@ def execute_control(arguments: dict):
|
|
|
151
48
|
# ============================================================
|
|
152
49
|
# Export fasta files as single-FASTA format
|
|
153
50
|
# ============================================================
|
|
154
|
-
|
|
51
|
+
fastx_handler.export_fasta_files(ARGS.tempdir, ARGS.fasta_alleles, ARGS.control_name)
|
|
155
52
|
|
|
156
53
|
# ============================================================
|
|
157
54
|
# Mapping using mappy
|
|
@@ -173,8 +70,8 @@ def execute_control(arguments: dict):
|
|
|
173
70
|
# Output BAM files
|
|
174
71
|
###########################################################
|
|
175
72
|
logger.info(f"Output BAM files of {arguments['control']}...")
|
|
176
|
-
report.
|
|
177
|
-
ARGS.tempdir, ARGS.control_name, ARGS.genome_coordinates, ARGS.threads, is_control=True
|
|
73
|
+
report.bam_exporter.export_to_bam(
|
|
74
|
+
ARGS.tempdir, ARGS.control_name, ARGS.genome_coordinates, ARGS.threads, ARGS.uuid, is_control=True
|
|
178
75
|
)
|
|
179
76
|
###########################################################
|
|
180
77
|
# Finish call
|
|
@@ -189,9 +86,9 @@ def execute_sample(arguments: dict):
|
|
|
189
86
|
# Preprocess
|
|
190
87
|
###########################################################
|
|
191
88
|
|
|
192
|
-
ARGS = format_inputs(arguments)
|
|
193
|
-
preprocess.
|
|
194
|
-
preprocess.
|
|
89
|
+
ARGS: FormattedInputs = preprocess.format_inputs(arguments)
|
|
90
|
+
preprocess.create_temporal_directories(ARGS.tempdir, ARGS.sample_name, is_control=False)
|
|
91
|
+
preprocess.create_report_directories(ARGS.tempdir, ARGS.sample_name, is_control=False)
|
|
195
92
|
|
|
196
93
|
logger.info(f"Preprocess {arguments['sample']}...")
|
|
197
94
|
|
|
@@ -209,7 +106,7 @@ def execute_sample(arguments: dict):
|
|
|
209
106
|
shutil.copy(path_fasta, Path(ARGS.tempdir, ARGS.sample_name, "fasta"))
|
|
210
107
|
|
|
211
108
|
paths_fasta = Path(ARGS.tempdir, ARGS.sample_name, "fasta").glob("*.fasta")
|
|
212
|
-
preprocess.
|
|
109
|
+
preprocess.generate_sam(ARGS, paths_fasta, is_control=False, is_insertion=False)
|
|
213
110
|
|
|
214
111
|
# ============================================================
|
|
215
112
|
# MIDSV conversion
|
|
@@ -234,8 +131,8 @@ def execute_sample(arguments: dict):
|
|
|
234
131
|
|
|
235
132
|
if paths_insertion_fasta:
|
|
236
133
|
# mapping to insertion alleles
|
|
237
|
-
preprocess.
|
|
238
|
-
preprocess.
|
|
134
|
+
preprocess.generate_sam(ARGS, paths_insertion_fasta, is_control=True, is_insertion=True)
|
|
135
|
+
preprocess.generate_sam(ARGS, paths_insertion_fasta, is_control=False, is_insertion=True)
|
|
239
136
|
# add insertions to ARGS.fasta_alleles
|
|
240
137
|
for path_fasta in paths_insertion_fasta:
|
|
241
138
|
allele, seq = Path(path_fasta).read_text().strip().split("\n")
|
|
@@ -307,15 +204,15 @@ def execute_sample(arguments: dict):
|
|
|
307
204
|
# RESULT
|
|
308
205
|
io.write_jsonl(RESULT_SAMPLE, Path(ARGS.tempdir, "result", f"{ARGS.sample_name}.jsonl"))
|
|
309
206
|
# FASTA
|
|
310
|
-
report.
|
|
311
|
-
report.
|
|
207
|
+
report.sequence_exporter.export_to_fasta(ARGS.tempdir, ARGS.sample_name, cons_sequence)
|
|
208
|
+
report.sequence_exporter.export_reference_to_fasta(ARGS.tempdir, ARGS.sample_name)
|
|
312
209
|
# HTML
|
|
313
|
-
report.
|
|
210
|
+
report.sequence_exporter.export_to_html(ARGS.tempdir, ARGS.sample_name, cons_percentage)
|
|
314
211
|
# CSV (Allele Info)
|
|
315
|
-
report.
|
|
212
|
+
report.mutation_exporter.export_to_csv(ARGS.tempdir, ARGS.sample_name, ARGS.genome_coordinates, cons_percentage)
|
|
316
213
|
# BAM
|
|
317
|
-
report.
|
|
318
|
-
ARGS.tempdir, ARGS.sample_name, ARGS.genome_coordinates, ARGS.threads, RESULT_SAMPLE
|
|
214
|
+
report.bam_exporter.export_to_bam(
|
|
215
|
+
ARGS.tempdir, ARGS.sample_name, ARGS.genome_coordinates, ARGS.threads, ARGS.uuid, RESULT_SAMPLE
|
|
319
216
|
)
|
|
320
217
|
for path_bam_igvjs in Path(ARGS.tempdir, "cache", ".igvjs").glob(f"{ARGS.control_name}_control.bam*"):
|
|
321
218
|
shutil.copy(path_bam_igvjs, Path(ARGS.tempdir, "report", ".igvjs", ARGS.sample_name))
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from DAJIN2.core.preprocess.cache_checker import exists_cached_hash, exists_cached_genome
|
|
2
|
+
from DAJIN2.core.preprocess.genome_fetcher import fetch_coordinates, fetch_chromosome_size
|
|
3
|
+
from DAJIN2.core.preprocess.mapping import generate_sam
|
|
4
|
+
from DAJIN2.core.preprocess.directory_manager import create_temporal_directories, create_report_directories
|
|
5
|
+
from DAJIN2.core.preprocess.input_formatter import format_inputs
|
|
6
|
+
from DAJIN2.core.preprocess.midsv_caller import generate_midsv
|
|
7
|
+
from DAJIN2.core.preprocess.knockin_handler import extract_knockin_loci
|
|
8
|
+
from DAJIN2.core.preprocess.mutation_extractor import cache_mutation_loci
|
|
9
|
+
from DAJIN2.core.preprocess.insertions_to_fasta import generate_insertion_fasta
|