DAJIN2 0.4.2__zip → 0.4.3__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {DAJIN2-0.4.2/src/DAJIN2.egg-info → DAJIN2-0.4.3}/PKG-INFO +29 -19
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/README.md +19 -9
- DAJIN2-0.4.3/requirements.txt +20 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/setup.py +1 -1
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/label_merger.py +20 -16
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/core.py +8 -8
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/genome_fetcher.py +11 -3
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/midsv_caller.py +3 -4
- DAJIN2-0.4.3/src/DAJIN2/core/report/__init__.py +3 -0
- DAJIN2-0.4.2/src/DAJIN2/core/report/report_bam.py → DAJIN2-0.4.3/src/DAJIN2/core/report/bam_exporter.py +64 -50
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/main.py +1 -1
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/io.py +6 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/sam_handler.py +0 -13
- {DAJIN2-0.4.2 → DAJIN2-0.4.3/src/DAJIN2.egg-info}/PKG-INFO +29 -19
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/SOURCES.txt +3 -3
- DAJIN2-0.4.3/src/DAJIN2.egg-info/requires.txt +16 -0
- DAJIN2-0.4.2/requirements.txt +0 -20
- DAJIN2-0.4.2/src/DAJIN2/core/report/__init__.py +0 -3
- DAJIN2-0.4.2/src/DAJIN2.egg-info/requires.txt +0 -16
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/LICENSE +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/MANIFEST.in +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/setup.cfg +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/__init__.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/__init__.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/classification/__init__.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/classification/allele_merger.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/classification/classifier.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/__init__.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/appender.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/clustering.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/kmer_generator.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/label_extractor.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/label_updator.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/score_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/clustering/strand_bias_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/__init__.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/clust_formatter.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/consensus.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/mutation_extractor.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/name_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/consensus/similarity_searcher.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/__init__.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/cache_checker.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/directory_manager.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/homopolymer_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/input_formatter.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/insertions_to_fasta.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/knockin_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/mapping.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/preprocess/mutation_extractor.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/core/report/insertion_reflector.py +0 -0
- /DAJIN2-0.4.2/src/DAJIN2/core/report/report_mutation.py → /DAJIN2-0.4.3/src/DAJIN2/core/report/mutation_exporter.py +0 -0
- /DAJIN2-0.4.2/src/DAJIN2/core/report/report_files.py → /DAJIN2-0.4.3/src/DAJIN2/core/report/sequence_exporter.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/gui.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/static/css/style.css +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/template_igvjs.html +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/templates/index.html +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/config.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/cssplits_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/dna_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/fastx_handler.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/input_validator.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/multiprocess.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/utils/report_generator.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2/view.py +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/dependency_links.txt +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/entry_points.txt +0 -0
- {DAJIN2-0.4.2 → DAJIN2-0.4.3}/src/DAJIN2.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: DAJIN2
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: One-step genotyping tools for targeted long-read sequencing
|
|
5
5
|
Home-page: https://github.com/akikuno/DAJIN2
|
|
6
6
|
Author: Akihiro Kuno
|
|
@@ -14,22 +14,22 @@ Classifier: Intended Audience :: Science/Research
|
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
16
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: numpy>=1.
|
|
18
|
-
Requires-Dist: scipy>=1.
|
|
17
|
+
Requires-Dist: numpy>=1.24.0
|
|
18
|
+
Requires-Dist: scipy>=1.10.0
|
|
19
19
|
Requires-Dist: pandas>=1.0.0
|
|
20
|
-
Requires-Dist: openpyxl>=3.
|
|
21
|
-
Requires-Dist: rapidfuzz>=3.
|
|
22
|
-
Requires-Dist: scikit-learn>=1.
|
|
20
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
21
|
+
Requires-Dist: rapidfuzz>=3.6.0
|
|
22
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
23
23
|
Requires-Dist: mappy>=2.24
|
|
24
|
-
Requires-Dist: pysam>=0.
|
|
24
|
+
Requires-Dist: pysam>=0.21.0
|
|
25
25
|
Requires-Dist: Flask>=2.2.0
|
|
26
26
|
Requires-Dist: waitress>=2.1.0
|
|
27
27
|
Requires-Dist: Jinja2>=3.1.0
|
|
28
|
-
Requires-Dist: plotly>=5.
|
|
28
|
+
Requires-Dist: plotly>=5.19.0
|
|
29
29
|
Requires-Dist: kaleido>=0.2.0
|
|
30
30
|
Requires-Dist: cstag>=1.0.0
|
|
31
|
-
Requires-Dist: midsv>=0.
|
|
32
|
-
Requires-Dist: wslPath>=0.
|
|
31
|
+
Requires-Dist: midsv>=0.11.0
|
|
32
|
+
Requires-Dist: wslPath>=0.4.1
|
|
33
33
|
|
|
34
34
|
[](https://choosealicense.com/licenses/mit/)
|
|
35
35
|
[](https://github.com/akikuno/dajin2/actions)
|
|
@@ -78,6 +78,7 @@ conda activate env-dajin2
|
|
|
78
78
|
> CONDA_SUBDIR=osx-64 conda create -n env-dajin2 -c conda-forge -c bioconda python=3.10 DAJIN2 -y
|
|
79
79
|
> conda activate env-dajin2
|
|
80
80
|
> conda config --env --set subdir osx-64
|
|
81
|
+
> python -c "import platform; print(platform.machine())" # Confirm that the output is 'x86_64', not 'arm64'
|
|
81
82
|
> ```
|
|
82
83
|
|
|
83
84
|
### From [PyPI](https://pypi.org/project/DAJIN2/)
|
|
@@ -164,12 +165,17 @@ Options:
|
|
|
164
165
|
#### Example
|
|
165
166
|
|
|
166
167
|
```bash
|
|
168
|
+
# Download example dataset
|
|
169
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_single.tar.gz
|
|
170
|
+
tar -xf example_single.tar.gz
|
|
171
|
+
|
|
172
|
+
# Run DAJIN2
|
|
167
173
|
DAJIN2 \
|
|
168
|
-
--control
|
|
169
|
-
--sample
|
|
170
|
-
--allele
|
|
171
|
-
--name
|
|
172
|
-
--genome
|
|
174
|
+
--control example_single/control \
|
|
175
|
+
--sample example_single/sample \
|
|
176
|
+
--allele example_single/stx2_deletion.fa \
|
|
177
|
+
--name stx2_deletion \
|
|
178
|
+
--genome mm39 \
|
|
173
179
|
--threads 4
|
|
174
180
|
```
|
|
175
181
|
|
|
@@ -206,7 +212,6 @@ DAJIN2 \
|
|
|
206
212
|
|
|
207
213
|
By using the `batch` subcommand, you can process multiple FASTQ files simultaneously.
|
|
208
214
|
For this purpose, a CSV or Excel file consolidating the sample information is required.
|
|
209
|
-
<!-- For a specific example, please refer to [this link](https://github.com/akikuno/DAJIN2/blob/main/examples/example-batch/batch.csv). -->
|
|
210
215
|
|
|
211
216
|
> [!NOTE]
|
|
212
217
|
> For guidance on how to compile sample information, please refer to [this document](https://docs.google.com/presentation/d/e/2PACX-1vSMEmXJPG2TNjfT66XZJRzqJd82aAqO5gJrdEzyhn15YBBr_Li-j5puOgVChYf3jA/embed?start=false&loop=false&delayms=3000).
|
|
@@ -224,13 +229,18 @@ options:
|
|
|
224
229
|
#### Example
|
|
225
230
|
|
|
226
231
|
```bash
|
|
227
|
-
|
|
232
|
+
# Donwload the example dataset
|
|
233
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
234
|
+
tar -xf example_batch.tar.gz
|
|
235
|
+
|
|
236
|
+
# Run DAJIN2
|
|
237
|
+
DAJIN2 batch --file example_batch/batch.csv --threads 4
|
|
228
238
|
```
|
|
229
239
|
|
|
230
240
|
<!-- ```bash
|
|
231
241
|
# Donwload the example dataset
|
|
232
|
-
wget https://github.com/akikuno/DAJIN2/raw/main/examples/
|
|
233
|
-
tar -xf
|
|
242
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
243
|
+
tar -xf example_batch.tar.gz
|
|
234
244
|
|
|
235
245
|
# Run DAJIN2
|
|
236
246
|
DAJIN2 batch --file example-batch/batch.csv --threads 3
|
|
@@ -45,6 +45,7 @@ conda activate env-dajin2
|
|
|
45
45
|
> CONDA_SUBDIR=osx-64 conda create -n env-dajin2 -c conda-forge -c bioconda python=3.10 DAJIN2 -y
|
|
46
46
|
> conda activate env-dajin2
|
|
47
47
|
> conda config --env --set subdir osx-64
|
|
48
|
+
> python -c "import platform; print(platform.machine())" # Confirm that the output is 'x86_64', not 'arm64'
|
|
48
49
|
> ```
|
|
49
50
|
|
|
50
51
|
### From [PyPI](https://pypi.org/project/DAJIN2/)
|
|
@@ -131,12 +132,17 @@ Options:
|
|
|
131
132
|
#### Example
|
|
132
133
|
|
|
133
134
|
```bash
|
|
135
|
+
# Download example dataset
|
|
136
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_single.tar.gz
|
|
137
|
+
tar -xf example_single.tar.gz
|
|
138
|
+
|
|
139
|
+
# Run DAJIN2
|
|
134
140
|
DAJIN2 \
|
|
135
|
-
--control
|
|
136
|
-
--sample
|
|
137
|
-
--allele
|
|
138
|
-
--name
|
|
139
|
-
--genome
|
|
141
|
+
--control example_single/control \
|
|
142
|
+
--sample example_single/sample \
|
|
143
|
+
--allele example_single/stx2_deletion.fa \
|
|
144
|
+
--name stx2_deletion \
|
|
145
|
+
--genome mm39 \
|
|
140
146
|
--threads 4
|
|
141
147
|
```
|
|
142
148
|
|
|
@@ -173,7 +179,6 @@ DAJIN2 \
|
|
|
173
179
|
|
|
174
180
|
By using the `batch` subcommand, you can process multiple FASTQ files simultaneously.
|
|
175
181
|
For this purpose, a CSV or Excel file consolidating the sample information is required.
|
|
176
|
-
<!-- For a specific example, please refer to [this link](https://github.com/akikuno/DAJIN2/blob/main/examples/example-batch/batch.csv). -->
|
|
177
182
|
|
|
178
183
|
> [!NOTE]
|
|
179
184
|
> For guidance on how to compile sample information, please refer to [this document](https://docs.google.com/presentation/d/e/2PACX-1vSMEmXJPG2TNjfT66XZJRzqJd82aAqO5gJrdEzyhn15YBBr_Li-j5puOgVChYf3jA/embed?start=false&loop=false&delayms=3000).
|
|
@@ -191,13 +196,18 @@ options:
|
|
|
191
196
|
#### Example
|
|
192
197
|
|
|
193
198
|
```bash
|
|
194
|
-
|
|
199
|
+
# Donwload the example dataset
|
|
200
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
201
|
+
tar -xf example_batch.tar.gz
|
|
202
|
+
|
|
203
|
+
# Run DAJIN2
|
|
204
|
+
DAJIN2 batch --file example_batch/batch.csv --threads 4
|
|
195
205
|
```
|
|
196
206
|
|
|
197
207
|
<!-- ```bash
|
|
198
208
|
# Donwload the example dataset
|
|
199
|
-
wget https://github.com/akikuno/DAJIN2/raw/main/examples/
|
|
200
|
-
tar -xf
|
|
209
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
210
|
+
tar -xf example_batch.tar.gz
|
|
201
211
|
|
|
202
212
|
# Run DAJIN2
|
|
203
213
|
DAJIN2 batch --file example-batch/batch.csv --threads 3
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
numpy >= 1.24.0
|
|
2
|
+
scipy >= 1.10.0
|
|
3
|
+
pandas >= 1.0.0
|
|
4
|
+
openpyxl >= 3.1.0
|
|
5
|
+
rapidfuzz >=3.6.0
|
|
6
|
+
scikit-learn >= 1.3.0
|
|
7
|
+
|
|
8
|
+
mappy >= 2.24
|
|
9
|
+
pysam >= 0.21.0
|
|
10
|
+
|
|
11
|
+
Flask >= 2.2.0
|
|
12
|
+
waitress >= 2.1.0
|
|
13
|
+
Jinja2 >= 3.1.0
|
|
14
|
+
|
|
15
|
+
plotly >= 5.19.0
|
|
16
|
+
kaleido >= 0.2.0
|
|
17
|
+
|
|
18
|
+
cstag >= 1.0.0
|
|
19
|
+
midsv >= 0.11.0
|
|
20
|
+
wslPath >=0.4.1
|
|
@@ -9,7 +9,7 @@ with open("requirements.txt") as requirements_file:
|
|
|
9
9
|
|
|
10
10
|
setuptools.setup(
|
|
11
11
|
name="DAJIN2",
|
|
12
|
-
version="0.4.
|
|
12
|
+
version="0.4.3",
|
|
13
13
|
author="Akihiro Kuno",
|
|
14
14
|
author_email="akuno@md.tsukuba.ac.jp",
|
|
15
15
|
description="One-step genotyping tools for targeted long-read sequencing",
|
|
@@ -11,20 +11,6 @@ def calculate_label_percentages(labels: list[int]) -> dict[int, float]:
|
|
|
11
11
|
return {label: (count / total_labels * 100) for label, count in label_counts.items()}
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def merge_mixed_cluster(labels_control: list[int], labels_sample: list[int], threshold: float = 0.5) -> list[int]:
|
|
15
|
-
"""Merge labels in sample if they appear more than 'threshold' percentage in control."""
|
|
16
|
-
labels_merged = labels_sample.copy()
|
|
17
|
-
label_percentages_control = calculate_label_percentages(labels_control)
|
|
18
|
-
mixed_labels = {label for label, percent in label_percentages_control.items() if percent > threshold}
|
|
19
|
-
|
|
20
|
-
new_label = max(labels_merged) + 1
|
|
21
|
-
for i, label in enumerate(labels_sample):
|
|
22
|
-
if label in mixed_labels:
|
|
23
|
-
labels_merged[i] = new_label
|
|
24
|
-
|
|
25
|
-
return labels_merged
|
|
26
|
-
|
|
27
|
-
|
|
28
14
|
def map_clusters_to_previous(labels_sample: list[int], labels_previous: list[int]) -> dict[int, int]:
|
|
29
15
|
"""
|
|
30
16
|
Determine which cluster in labels_previous corresponds to each cluster in labels_sample.
|
|
@@ -63,6 +49,8 @@ def merge_minor_cluster(
|
|
|
63
49
|
minor_labels_percentage = {label for label, percent in label_percentages.items() if percent < threshold_percentage}
|
|
64
50
|
minor_labels_readnumber = {label for label, num in Counter(labels_sample).items() if num <= threshold_readnumber}
|
|
65
51
|
minor_labels = minor_labels_percentage | minor_labels_readnumber
|
|
52
|
+
if minor_labels == set():
|
|
53
|
+
return labels_sample
|
|
66
54
|
|
|
67
55
|
correspondence = map_clusters_to_previous(labels_sample, labels_previous)
|
|
68
56
|
update_required_labels = get_update_required_labels(correspondence)
|
|
@@ -70,7 +58,23 @@ def merge_minor_cluster(
|
|
|
70
58
|
labels_merged = labels_sample.copy()
|
|
71
59
|
for m in minor_labels:
|
|
72
60
|
new_label = max(labels_merged) + 1
|
|
73
|
-
labels_merged = [
|
|
61
|
+
labels_merged = [
|
|
62
|
+
new_label if label in update_required_labels[correspondence[m]] else label for label in labels_merged
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
return labels_merged
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def merge_mixed_cluster(labels_control: list[int], labels_sample: list[int], threshold: float = 0.5) -> list[int]:
|
|
69
|
+
"""Merge labels in sample if they appear more than 'threshold' percentage in control."""
|
|
70
|
+
labels_merged = labels_sample.copy()
|
|
71
|
+
label_percentages_control = calculate_label_percentages(labels_control)
|
|
72
|
+
mixed_labels = {label for label, percent in label_percentages_control.items() if percent > threshold}
|
|
73
|
+
|
|
74
|
+
new_label = max(labels_merged) + 1
|
|
75
|
+
for i, label in enumerate(labels_sample):
|
|
76
|
+
if label in mixed_labels:
|
|
77
|
+
labels_merged[i] = new_label
|
|
74
78
|
|
|
75
79
|
return labels_merged
|
|
76
80
|
|
|
@@ -82,7 +86,7 @@ def merge_minor_cluster(
|
|
|
82
86
|
|
|
83
87
|
def merge_labels(labels_control: list[int], labels_sample: list[int], labels_previous: list[int]) -> list[int]:
|
|
84
88
|
labels_merged = merge_minor_cluster(
|
|
85
|
-
labels_sample, labels_previous, threshold_percentage=0.5, threshold_readnumber=
|
|
89
|
+
labels_sample, labels_previous, threshold_percentage=0.5, threshold_readnumber=5
|
|
86
90
|
)
|
|
87
91
|
labels_merged = merge_mixed_cluster(labels_control, labels_merged)
|
|
88
92
|
return labels_merged
|
|
@@ -70,8 +70,8 @@ def execute_control(arguments: dict):
|
|
|
70
70
|
# Output BAM files
|
|
71
71
|
###########################################################
|
|
72
72
|
logger.info(f"Output BAM files of {arguments['control']}...")
|
|
73
|
-
report.
|
|
74
|
-
ARGS.tempdir, ARGS.control_name, ARGS.genome_coordinates, ARGS.threads, is_control=True
|
|
73
|
+
report.bam_exporter.export_to_bam(
|
|
74
|
+
ARGS.tempdir, ARGS.control_name, ARGS.genome_coordinates, ARGS.threads, ARGS.uuid, is_control=True
|
|
75
75
|
)
|
|
76
76
|
###########################################################
|
|
77
77
|
# Finish call
|
|
@@ -204,15 +204,15 @@ def execute_sample(arguments: dict):
|
|
|
204
204
|
# RESULT
|
|
205
205
|
io.write_jsonl(RESULT_SAMPLE, Path(ARGS.tempdir, "result", f"{ARGS.sample_name}.jsonl"))
|
|
206
206
|
# FASTA
|
|
207
|
-
report.
|
|
208
|
-
report.
|
|
207
|
+
report.sequence_exporter.export_to_fasta(ARGS.tempdir, ARGS.sample_name, cons_sequence)
|
|
208
|
+
report.sequence_exporter.export_reference_to_fasta(ARGS.tempdir, ARGS.sample_name)
|
|
209
209
|
# HTML
|
|
210
|
-
report.
|
|
210
|
+
report.sequence_exporter.export_to_html(ARGS.tempdir, ARGS.sample_name, cons_percentage)
|
|
211
211
|
# CSV (Allele Info)
|
|
212
|
-
report.
|
|
212
|
+
report.mutation_exporter.export_to_csv(ARGS.tempdir, ARGS.sample_name, ARGS.genome_coordinates, cons_percentage)
|
|
213
213
|
# BAM
|
|
214
|
-
report.
|
|
215
|
-
ARGS.tempdir, ARGS.sample_name, ARGS.genome_coordinates, ARGS.threads, RESULT_SAMPLE
|
|
214
|
+
report.bam_exporter.export_to_bam(
|
|
215
|
+
ARGS.tempdir, ARGS.sample_name, ARGS.genome_coordinates, ARGS.threads, ARGS.uuid, RESULT_SAMPLE
|
|
216
216
|
)
|
|
217
217
|
for path_bam_igvjs in Path(ARGS.tempdir, "cache", ".igvjs").glob(f"{ARGS.control_name}_control.bam*"):
|
|
218
218
|
shutil.copy(path_bam_igvjs, Path(ARGS.tempdir, "report", ".igvjs", ARGS.sample_name))
|
|
@@ -5,11 +5,19 @@ from urllib.request import urlopen
|
|
|
5
5
|
|
|
6
6
|
def fetch_seq_coordinates(genome: str, blat_url: str, seq: str) -> dict:
|
|
7
7
|
url = f"{blat_url}?db={genome}&type=BLAT&userSeq={seq}"
|
|
8
|
-
|
|
9
|
-
matches = [
|
|
8
|
+
records = urlopen(url).read().decode("utf8").split("\n")
|
|
9
|
+
matches = []
|
|
10
|
+
for record in records:
|
|
11
|
+
if "100.0%" not in record:
|
|
12
|
+
continue
|
|
13
|
+
record_trim = [r for r in record.split(" ") if r]
|
|
14
|
+
if record_trim[-1] == str(len(seq)):
|
|
15
|
+
matches = record_trim
|
|
16
|
+
|
|
10
17
|
if not matches:
|
|
11
18
|
raise ValueError(f"{seq[:60]}... is not found in {genome}")
|
|
12
|
-
|
|
19
|
+
|
|
20
|
+
chrom, strand, start, end, _ = matches[-5:]
|
|
13
21
|
return {"chrom": chrom, "strand": strand, "start": int(start), "end": int(end)}
|
|
14
22
|
|
|
15
23
|
|
|
@@ -8,8 +8,7 @@ from itertools import chain, groupby
|
|
|
8
8
|
|
|
9
9
|
from collections import Counter
|
|
10
10
|
|
|
11
|
-
from DAJIN2.utils import sam_handler
|
|
12
|
-
from DAJIN2.utils import cssplits_handler
|
|
11
|
+
from DAJIN2.utils import io, sam_handler, cssplits_handler
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
def has_inversion_in_splice(CIGAR: str) -> bool:
|
|
@@ -215,8 +214,8 @@ def generate_midsv(ARGS, is_control: bool = False, is_insertion: bool = False) -
|
|
|
215
214
|
path_splice = Path(ARGS.tempdir, name, "sam", f"splice_{allele}.sam")
|
|
216
215
|
path_output_midsv = Path(ARGS.tempdir, name, "midsv", f"{allele}.json")
|
|
217
216
|
|
|
218
|
-
sam_ont = sam_handler.remove_overlapped_reads(list(
|
|
219
|
-
sam_splice = sam_handler.remove_overlapped_reads(list(
|
|
217
|
+
sam_ont = sam_handler.remove_overlapped_reads(list(io.read_sam(path_ont)))
|
|
218
|
+
sam_splice = sam_handler.remove_overlapped_reads(list(io.read_sam(path_splice)))
|
|
220
219
|
qname_of_map_ont = extract_qname_of_map_ont(sam_ont, sam_splice)
|
|
221
220
|
sam_of_map_ont = filter_sam_by_preset(sam_ont, qname_of_map_ont, preset="map-ont")
|
|
222
221
|
sam_of_splice = filter_sam_by_preset(sam_splice, qname_of_map_ont, preset="splice")
|
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import random
|
|
4
3
|
from collections import defaultdict
|
|
5
4
|
from itertools import groupby
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
|
|
8
|
-
import midsv
|
|
9
7
|
import pysam
|
|
10
8
|
|
|
11
|
-
from DAJIN2.utils import sam_handler
|
|
9
|
+
from DAJIN2.utils import io, sam_handler
|
|
12
10
|
|
|
13
11
|
|
|
14
|
-
def
|
|
12
|
+
def recalculate_sam_coodinates_to_reference(sam: list[list[str]], GENOME_COODINATES: dict) -> list[str]:
|
|
13
|
+
"""Recalculate SAM genomic coordinates with the reference genome, not with the FASTA_ALLELE"""
|
|
15
14
|
sam_headers = [s for s in sam if s[0].startswith("@")]
|
|
16
15
|
sam_contents = [s for s in sam if not s[0].startswith("@")]
|
|
17
16
|
for s in sam_headers:
|
|
@@ -29,31 +28,44 @@ def realign(sam: list[list[str]], GENOME_COODINATES: dict) -> list[str]:
|
|
|
29
28
|
return sam_headers + sam_contents
|
|
30
29
|
|
|
31
30
|
|
|
31
|
+
def convert_pos_to_one_indexed(sam_lines: list[list[str]]) -> list[list[str]]:
|
|
32
|
+
"""Convert SAM POS from 0-indexed to 1-indexed"""
|
|
33
|
+
|
|
34
|
+
def convert_line(line: list[str]) -> list[str]:
|
|
35
|
+
if not line[0].startswith("@") and line[3] == "0":
|
|
36
|
+
line[3] = "1"
|
|
37
|
+
return line
|
|
38
|
+
|
|
39
|
+
return [convert_line(line) for line in sam_lines]
|
|
40
|
+
|
|
41
|
+
|
|
32
42
|
def group_by_name(sam_contents: list[str], clust_sample: list[dict]) -> dict[list]:
|
|
43
|
+
"""Group alignments in map-ont.sam by allele name (NAME)"""
|
|
33
44
|
sam_contents.sort()
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
for
|
|
37
|
-
|
|
38
|
-
clust_sample_qname_set.add(qname)
|
|
45
|
+
clust_sample_sorted = sorted(clust_sample, key=lambda x: x["QNAME"])
|
|
46
|
+
|
|
47
|
+
qnames: set[str] = {c["QNAME"] for c in clust_sample_sorted}
|
|
48
|
+
|
|
39
49
|
sam_groups = defaultdict(list)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
while
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if
|
|
48
|
-
|
|
50
|
+
idx_sam_contents = 0
|
|
51
|
+
idx_clust_sample = 0
|
|
52
|
+
while idx_sam_contents < len(sam_contents) and idx_clust_sample < len(clust_sample_sorted):
|
|
53
|
+
alignments_sam = sam_contents[idx_sam_contents][:-1] # Discard CS tags to reduce file size
|
|
54
|
+
alignments_clsut_sample = clust_sample_sorted[idx_clust_sample]
|
|
55
|
+
qname_sam = alignments_sam[0]
|
|
56
|
+
|
|
57
|
+
if qname_sam not in qnames:
|
|
58
|
+
idx_sam_contents += 1
|
|
49
59
|
continue
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
|
|
61
|
+
if qname_sam == alignments_clsut_sample["QNAME"]:
|
|
62
|
+
key = alignments_clsut_sample["NAME"]
|
|
63
|
+
sam_groups[key].append(alignments_sam)
|
|
64
|
+
idx_sam_contents += 1
|
|
54
65
|
else:
|
|
55
|
-
|
|
56
|
-
|
|
66
|
+
idx_clust_sample += 1
|
|
67
|
+
|
|
68
|
+
return dict(sam_groups)
|
|
57
69
|
|
|
58
70
|
|
|
59
71
|
###############################################################################
|
|
@@ -67,13 +79,11 @@ def subset_qnames(RESULT_SAMPLE, readnum: int = 100) -> dict[set[str]]:
|
|
|
67
79
|
group = list(group)
|
|
68
80
|
qnames = [res["QNAME"] for res in group[:readnum]]
|
|
69
81
|
qnames_by_name[name] = set(qnames)
|
|
70
|
-
return qnames_by_name
|
|
82
|
+
return dict(qnames_by_name)
|
|
71
83
|
|
|
72
84
|
|
|
73
|
-
def subset_reads(
|
|
74
|
-
|
|
75
|
-
sam_subset = [sam for sam in sam_content if sam[0] in qnames]
|
|
76
|
-
return sam_subset
|
|
85
|
+
def subset_reads(sam_content: list[str], qnames: set[str]) -> list[str]:
|
|
86
|
+
return [sam for sam in sam_content if sam[0] in qnames]
|
|
77
87
|
|
|
78
88
|
|
|
79
89
|
###############################################################################
|
|
@@ -89,31 +99,34 @@ def write_sam_to_bam(sam: list[list[str]], path_sam: str | Path, path_bam: str |
|
|
|
89
99
|
|
|
90
100
|
|
|
91
101
|
def update_sam(sam: list, GENOME_COODINATES: dict = {}) -> list:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
if "genome"
|
|
96
|
-
|
|
97
|
-
|
|
102
|
+
sam_records = sam.copy()
|
|
103
|
+
sam_records = sam_handler.remove_overlapped_reads(sam_records)
|
|
104
|
+
sam_records = sam_handler.remove_microhomology(sam_records)
|
|
105
|
+
if GENOME_COODINATES["genome"]:
|
|
106
|
+
return recalculate_sam_coodinates_to_reference(sam_records, GENOME_COODINATES)
|
|
107
|
+
else:
|
|
108
|
+
return convert_pos_to_one_indexed(sam_records)
|
|
98
109
|
|
|
99
110
|
|
|
100
|
-
def export_to_bam(TEMPDIR, NAME, GENOME_COODINATES, THREADS, RESULT_SAMPLE=None, is_control=False) -> None:
|
|
101
|
-
randomnum = random.randint(100_000, 999_999)
|
|
111
|
+
def export_to_bam(TEMPDIR, NAME, GENOME_COODINATES, THREADS, UUID, RESULT_SAMPLE=None, is_control=False) -> None:
|
|
102
112
|
path_sam_input = Path(TEMPDIR, NAME, "sam", "map-ont_control.sam")
|
|
103
|
-
|
|
113
|
+
sam_records = list(io.read_sam(path_sam_input))
|
|
114
|
+
|
|
104
115
|
# Update sam
|
|
105
|
-
|
|
116
|
+
sam_updated = update_sam(sam_records, GENOME_COODINATES)
|
|
117
|
+
|
|
106
118
|
# Output SAM and BAM
|
|
107
|
-
path_sam_output = Path(TEMPDIR, "report", "BAM", f"
|
|
119
|
+
path_sam_output = Path(TEMPDIR, "report", "BAM", f"temp_{UUID}_{NAME}_control.sam")
|
|
108
120
|
path_bam_output = Path(TEMPDIR, "report", "BAM", NAME, f"{NAME}.bam")
|
|
109
|
-
write_sam_to_bam(
|
|
121
|
+
write_sam_to_bam(sam_updated, path_sam_output, path_bam_output, THREADS)
|
|
122
|
+
|
|
110
123
|
# Prepare SAM headers and contents
|
|
111
|
-
sam_headers = [s for s in
|
|
112
|
-
sam_contents = [s for s in
|
|
124
|
+
sam_headers = [s for s in sam_updated if s[0].startswith("@")]
|
|
125
|
+
sam_contents = [s for s in sam_updated if not s[0].startswith("@")]
|
|
113
126
|
if is_control:
|
|
114
|
-
qnames = set(list(set(s[0] for s in sam_contents[:10000]))[:100])
|
|
115
|
-
sam_subset = [s for s in
|
|
116
|
-
path_sam_output = Path(TEMPDIR, "report", "BAM", f"
|
|
127
|
+
qnames: set[str] = set(list(set(s[0] for s in sam_contents[:10000]))[:100])
|
|
128
|
+
sam_subset = [s for s in sam_updated if s[0] in qnames]
|
|
129
|
+
path_sam_output = Path(TEMPDIR, "report", "BAM", f"temp_{UUID}_{NAME}_control_cache.sam")
|
|
117
130
|
path_bam_output = Path(TEMPDIR, "cache", ".igvjs", NAME, "control.bam")
|
|
118
131
|
write_sam_to_bam(sam_headers + sam_subset, path_sam_output, path_bam_output, THREADS)
|
|
119
132
|
else:
|
|
@@ -122,14 +135,15 @@ def export_to_bam(TEMPDIR, NAME, GENOME_COODINATES, THREADS, RESULT_SAMPLE=None,
|
|
|
122
135
|
# Output SAM and BAM
|
|
123
136
|
for name, sam_content in sam_groups.items():
|
|
124
137
|
# BAM
|
|
125
|
-
path_sam_output = Path(TEMPDIR, "report", "
|
|
138
|
+
path_sam_output = Path(TEMPDIR, "report", "BAM", f"temp_{UUID}_{name}.sam")
|
|
126
139
|
path_bam_output = Path(TEMPDIR, "report", "BAM", NAME, f"{NAME}_{name}.bam")
|
|
127
140
|
write_sam_to_bam(sam_headers + sam_content, path_sam_output, path_bam_output, THREADS)
|
|
128
141
|
# igvjs
|
|
129
|
-
sam_subset = subset_reads(
|
|
130
|
-
path_sam_output = Path(TEMPDIR, "report", "
|
|
142
|
+
sam_subset = subset_reads(sam_content, qnames_by_name[name])
|
|
143
|
+
path_sam_output = Path(TEMPDIR, "report", "BAM", f"temp_{UUID}_{name}_subset.sam")
|
|
131
144
|
path_bam_output = Path(TEMPDIR, "report", ".igvjs", NAME, f"{name}.bam")
|
|
132
145
|
write_sam_to_bam(sam_headers + sam_subset, path_sam_output, path_bam_output, THREADS)
|
|
146
|
+
|
|
133
147
|
# Remove temporary files
|
|
134
|
-
sam_temp = Path(TEMPDIR, "report", "BAM").glob(f"
|
|
148
|
+
sam_temp = Path(TEMPDIR, "report", "BAM").glob(f"temp_{UUID}*.sam")
|
|
135
149
|
[s.unlink() for s in sam_temp]
|
|
@@ -19,6 +19,12 @@ from openpyxl import load_workbook, Workbook
|
|
|
19
19
|
###########################################################
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def read_sam(path_of_sam: str | Path) -> Generator[list]:
|
|
23
|
+
with open(path_of_sam) as f:
|
|
24
|
+
for line in f:
|
|
25
|
+
yield line.strip().split("\t")
|
|
26
|
+
|
|
27
|
+
|
|
22
28
|
def load_pickle(file_path: Path):
|
|
23
29
|
with open(file_path, "rb") as f:
|
|
24
30
|
return pickle.load(f)
|
|
@@ -2,8 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Generator
|
|
7
5
|
from itertools import groupby
|
|
8
6
|
from DAJIN2.utils.dna_handler import revcomp
|
|
9
7
|
|
|
@@ -25,17 +23,6 @@ def is_mapped(s: list[str]) -> bool:
|
|
|
25
23
|
return not s[0].startswith("@") and s[9] != "*"
|
|
26
24
|
|
|
27
25
|
|
|
28
|
-
###########################################################
|
|
29
|
-
# Read sam
|
|
30
|
-
###########################################################
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def read_sam(path_of_sam: str | Path) -> Generator[list]:
|
|
34
|
-
with open(path_of_sam) as f:
|
|
35
|
-
for line in f:
|
|
36
|
-
yield line.strip().split("\t")
|
|
37
|
-
|
|
38
|
-
|
|
39
26
|
###########################################################
|
|
40
27
|
# remove_overlapped_reads
|
|
41
28
|
###########################################################
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: DAJIN2
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: One-step genotyping tools for targeted long-read sequencing
|
|
5
5
|
Home-page: https://github.com/akikuno/DAJIN2
|
|
6
6
|
Author: Akihiro Kuno
|
|
@@ -14,22 +14,22 @@ Classifier: Intended Audience :: Science/Research
|
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
16
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: numpy>=1.
|
|
18
|
-
Requires-Dist: scipy>=1.
|
|
17
|
+
Requires-Dist: numpy>=1.24.0
|
|
18
|
+
Requires-Dist: scipy>=1.10.0
|
|
19
19
|
Requires-Dist: pandas>=1.0.0
|
|
20
|
-
Requires-Dist: openpyxl>=3.
|
|
21
|
-
Requires-Dist: rapidfuzz>=3.
|
|
22
|
-
Requires-Dist: scikit-learn>=1.
|
|
20
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
21
|
+
Requires-Dist: rapidfuzz>=3.6.0
|
|
22
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
23
23
|
Requires-Dist: mappy>=2.24
|
|
24
|
-
Requires-Dist: pysam>=0.
|
|
24
|
+
Requires-Dist: pysam>=0.21.0
|
|
25
25
|
Requires-Dist: Flask>=2.2.0
|
|
26
26
|
Requires-Dist: waitress>=2.1.0
|
|
27
27
|
Requires-Dist: Jinja2>=3.1.0
|
|
28
|
-
Requires-Dist: plotly>=5.
|
|
28
|
+
Requires-Dist: plotly>=5.19.0
|
|
29
29
|
Requires-Dist: kaleido>=0.2.0
|
|
30
30
|
Requires-Dist: cstag>=1.0.0
|
|
31
|
-
Requires-Dist: midsv>=0.
|
|
32
|
-
Requires-Dist: wslPath>=0.
|
|
31
|
+
Requires-Dist: midsv>=0.11.0
|
|
32
|
+
Requires-Dist: wslPath>=0.4.1
|
|
33
33
|
|
|
34
34
|
[](https://choosealicense.com/licenses/mit/)
|
|
35
35
|
[](https://github.com/akikuno/dajin2/actions)
|
|
@@ -78,6 +78,7 @@ conda activate env-dajin2
|
|
|
78
78
|
> CONDA_SUBDIR=osx-64 conda create -n env-dajin2 -c conda-forge -c bioconda python=3.10 DAJIN2 -y
|
|
79
79
|
> conda activate env-dajin2
|
|
80
80
|
> conda config --env --set subdir osx-64
|
|
81
|
+
> python -c "import platform; print(platform.machine())" # Confirm that the output is 'x86_64', not 'arm64'
|
|
81
82
|
> ```
|
|
82
83
|
|
|
83
84
|
### From [PyPI](https://pypi.org/project/DAJIN2/)
|
|
@@ -164,12 +165,17 @@ Options:
|
|
|
164
165
|
#### Example
|
|
165
166
|
|
|
166
167
|
```bash
|
|
168
|
+
# Download example dataset
|
|
169
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_single.tar.gz
|
|
170
|
+
tar -xf example_single.tar.gz
|
|
171
|
+
|
|
172
|
+
# Run DAJIN2
|
|
167
173
|
DAJIN2 \
|
|
168
|
-
--control
|
|
169
|
-
--sample
|
|
170
|
-
--allele
|
|
171
|
-
--name
|
|
172
|
-
--genome
|
|
174
|
+
--control example_single/control \
|
|
175
|
+
--sample example_single/sample \
|
|
176
|
+
--allele example_single/stx2_deletion.fa \
|
|
177
|
+
--name stx2_deletion \
|
|
178
|
+
--genome mm39 \
|
|
173
179
|
--threads 4
|
|
174
180
|
```
|
|
175
181
|
|
|
@@ -206,7 +212,6 @@ DAJIN2 \
|
|
|
206
212
|
|
|
207
213
|
By using the `batch` subcommand, you can process multiple FASTQ files simultaneously.
|
|
208
214
|
For this purpose, a CSV or Excel file consolidating the sample information is required.
|
|
209
|
-
<!-- For a specific example, please refer to [this link](https://github.com/akikuno/DAJIN2/blob/main/examples/example-batch/batch.csv). -->
|
|
210
215
|
|
|
211
216
|
> [!NOTE]
|
|
212
217
|
> For guidance on how to compile sample information, please refer to [this document](https://docs.google.com/presentation/d/e/2PACX-1vSMEmXJPG2TNjfT66XZJRzqJd82aAqO5gJrdEzyhn15YBBr_Li-j5puOgVChYf3jA/embed?start=false&loop=false&delayms=3000).
|
|
@@ -224,13 +229,18 @@ options:
|
|
|
224
229
|
#### Example
|
|
225
230
|
|
|
226
231
|
```bash
|
|
227
|
-
|
|
232
|
+
# Donwload the example dataset
|
|
233
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
234
|
+
tar -xf example_batch.tar.gz
|
|
235
|
+
|
|
236
|
+
# Run DAJIN2
|
|
237
|
+
DAJIN2 batch --file example_batch/batch.csv --threads 4
|
|
228
238
|
```
|
|
229
239
|
|
|
230
240
|
<!-- ```bash
|
|
231
241
|
# Donwload the example dataset
|
|
232
|
-
wget https://github.com/akikuno/DAJIN2/raw/main/examples/
|
|
233
|
-
tar -xf
|
|
242
|
+
wget https://github.com/akikuno/DAJIN2/raw/main/examples/example_batch.tar.gz
|
|
243
|
+
tar -xf example_batch.tar.gz
|
|
234
244
|
|
|
235
245
|
# Run DAJIN2
|
|
236
246
|
DAJIN2 batch --file example-batch/batch.csv --threads 3
|
|
@@ -46,10 +46,10 @@ src/DAJIN2/core/preprocess/mapping.py
|
|
|
46
46
|
src/DAJIN2/core/preprocess/midsv_caller.py
|
|
47
47
|
src/DAJIN2/core/preprocess/mutation_extractor.py
|
|
48
48
|
src/DAJIN2/core/report/__init__.py
|
|
49
|
+
src/DAJIN2/core/report/bam_exporter.py
|
|
49
50
|
src/DAJIN2/core/report/insertion_reflector.py
|
|
50
|
-
src/DAJIN2/core/report/
|
|
51
|
-
src/DAJIN2/core/report/
|
|
52
|
-
src/DAJIN2/core/report/report_mutation.py
|
|
51
|
+
src/DAJIN2/core/report/mutation_exporter.py
|
|
52
|
+
src/DAJIN2/core/report/sequence_exporter.py
|
|
53
53
|
src/DAJIN2/static/css/style.css
|
|
54
54
|
src/DAJIN2/templates/index.html
|
|
55
55
|
src/DAJIN2/utils/config.py
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
numpy>=1.24.0
|
|
2
|
+
scipy>=1.10.0
|
|
3
|
+
pandas>=1.0.0
|
|
4
|
+
openpyxl>=3.1.0
|
|
5
|
+
rapidfuzz>=3.6.0
|
|
6
|
+
scikit-learn>=1.3.0
|
|
7
|
+
mappy>=2.24
|
|
8
|
+
pysam>=0.21.0
|
|
9
|
+
Flask>=2.2.0
|
|
10
|
+
waitress>=2.1.0
|
|
11
|
+
Jinja2>=3.1.0
|
|
12
|
+
plotly>=5.19.0
|
|
13
|
+
kaleido>=0.2.0
|
|
14
|
+
cstag>=1.0.0
|
|
15
|
+
midsv>=0.11.0
|
|
16
|
+
wslPath>=0.4.1
|
DAJIN2-0.4.2/requirements.txt
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
numpy >= 1.20.0
|
|
2
|
-
scipy >= 1.6.0
|
|
3
|
-
pandas >= 1.0.0
|
|
4
|
-
openpyxl >= 3.0.0
|
|
5
|
-
rapidfuzz >=3.0.0
|
|
6
|
-
scikit-learn >= 1.0.0
|
|
7
|
-
|
|
8
|
-
mappy >= 2.24
|
|
9
|
-
pysam >= 0.19.0
|
|
10
|
-
|
|
11
|
-
Flask >= 2.2.0
|
|
12
|
-
waitress >= 2.1.0
|
|
13
|
-
Jinja2 >= 3.1.0
|
|
14
|
-
|
|
15
|
-
plotly >= 5.0.0
|
|
16
|
-
kaleido >= 0.2.0
|
|
17
|
-
|
|
18
|
-
cstag >= 1.0.0
|
|
19
|
-
midsv >= 0.10.1
|
|
20
|
-
wslPath >=0.3.0
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
numpy>=1.20.0
|
|
2
|
-
scipy>=1.6.0
|
|
3
|
-
pandas>=1.0.0
|
|
4
|
-
openpyxl>=3.0.0
|
|
5
|
-
rapidfuzz>=3.0.0
|
|
6
|
-
scikit-learn>=1.0.0
|
|
7
|
-
mappy>=2.24
|
|
8
|
-
pysam>=0.19.0
|
|
9
|
-
Flask>=2.2.0
|
|
10
|
-
waitress>=2.1.0
|
|
11
|
-
Jinja2>=3.1.0
|
|
12
|
-
plotly>=5.0.0
|
|
13
|
-
kaleido>=0.2.0
|
|
14
|
-
cstag>=1.0.0
|
|
15
|
-
midsv>=0.10.1
|
|
16
|
-
wslPath>=0.3.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|