miga-base 1.2.18.2 → 1.3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/doctor/base.rb +2 -1
- data/lib/miga/cli/action/init.rb +1 -1
- data/lib/miga/dataset/result/add.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/scripts/essential_genes.bash +4 -8
- data/utils/FastAAI/LICENSE +8 -0
- data/utils/FastAAI/README.md +151 -40
- data/utils/FastAAI/__init__.py +1 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962915_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962925_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962935_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962945_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962995_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963025_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963055_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963065_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_002019225_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_008801575_1.fna.gz +0 -0
- data/utils/FastAAI/fastaai/__init__.py +1 -0
- data/utils/FastAAI/fastaai/fastaai +4805 -0
- data/utils/FastAAI/fastaai/fastaai.py +4805 -0
- data/utils/FastAAI/fastaai/fastaai_miga_crystals_to_db.py +297 -0
- data/utils/FastAAI/fastaai/fastaai_miga_preproc.py +931 -0
- data/utils/FastAAI/metadata/Accession_names_and_IDs.txt +122 -0
- data/utils/distance/commands.rb +51 -23
- metadata +23 -6
- data/utils/FastAAI/FastAAI +0 -3659
- /data/utils/FastAAI/{00.Libraries → fastaai/00.Libraries}/01.SCG_HMMs/Archaea_SCG.hmm +0 -0
- /data/utils/FastAAI/{00.Libraries → fastaai/00.Libraries}/01.SCG_HMMs/Bacteria_SCG.hmm +0 -0
- /data/utils/FastAAI/{00.Libraries → fastaai/00.Libraries}/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
PF01780_19 0
|
2
|
+
PF03948_14 1
|
3
|
+
PF17144_4 2
|
4
|
+
PF00830_19 3
|
5
|
+
PF00347_23 4
|
6
|
+
PF16906_5 5
|
7
|
+
PF13393_6 6
|
8
|
+
PF02565_15 7
|
9
|
+
PF01991_18 8
|
10
|
+
PF01984_20 9
|
11
|
+
PF00861_22 10
|
12
|
+
PF13656_6 11
|
13
|
+
PF00368_18 12
|
14
|
+
PF01142_18 13
|
15
|
+
PF00312_22 14
|
16
|
+
PF02367_17 15
|
17
|
+
PF01951_16 16
|
18
|
+
PF00749_21 17
|
19
|
+
PF01655_18 18
|
20
|
+
PF00318_20 19
|
21
|
+
PF01813_17 20
|
22
|
+
PF01649_18 21
|
23
|
+
PF01025_19 22
|
24
|
+
PF00380_19 23
|
25
|
+
PF01282_19 24
|
26
|
+
PF01864_17 25
|
27
|
+
PF01783_23 26
|
28
|
+
PF01808_18 27
|
29
|
+
PF01982_16 28
|
30
|
+
PF01715_17 29
|
31
|
+
PF00213_18 30
|
32
|
+
PF00119_20 31
|
33
|
+
PF00573_22 32
|
34
|
+
PF01981_16 33
|
35
|
+
PF00281_19 34
|
36
|
+
PF00584_20 35
|
37
|
+
PF00825_18 36
|
38
|
+
PF00406_22 37
|
39
|
+
PF00177_21 38
|
40
|
+
PF01192_22 39
|
41
|
+
PF05833_11 40
|
42
|
+
PF02699_15 41
|
43
|
+
PF01016_19 42
|
44
|
+
PF01765_19 43
|
45
|
+
PF00453_18 44
|
46
|
+
PF01193_24 45
|
47
|
+
PF05221_17 46
|
48
|
+
PF00231_19 47
|
49
|
+
PF00416_22 48
|
50
|
+
PF02033_18 49
|
51
|
+
PF01668_18 50
|
52
|
+
PF00886_19 51
|
53
|
+
PF00252_18 52
|
54
|
+
PF00572_18 53
|
55
|
+
PF00366_20 54
|
56
|
+
PF04104_14 55
|
57
|
+
PF04919_12 56
|
58
|
+
PF01912_18 57
|
59
|
+
PF00276_20 58
|
60
|
+
PF00203_21 59
|
61
|
+
PF00889_19 60
|
62
|
+
PF02996_17 61
|
63
|
+
PF00121_18 62
|
64
|
+
PF01990_17 63
|
65
|
+
PF00344_20 64
|
66
|
+
PF00297_22 65
|
67
|
+
PF01196_19 66
|
68
|
+
PF01194_17 67
|
69
|
+
PF01725_16 68
|
70
|
+
PF00750_19 69
|
71
|
+
PF00338_22 70
|
72
|
+
PF00238_19 71
|
73
|
+
PF01200_18 72
|
74
|
+
PF00162_19 73
|
75
|
+
PF00181_23 74
|
76
|
+
PF01866_17 75
|
77
|
+
PF00709_21 76
|
78
|
+
PF02006_16 77
|
79
|
+
PF00164_25 78
|
80
|
+
PF00237_19 79
|
81
|
+
PF01139_17 80
|
82
|
+
PF01351_18 81
|
83
|
+
PF04010_13 82
|
84
|
+
PF06093_13 83
|
85
|
+
PF00828_19 84
|
86
|
+
PF02410_15 85
|
87
|
+
PF01176_19 86
|
88
|
+
PF02130_17 87
|
89
|
+
PF01948_18 88
|
90
|
+
PF01195_19 89
|
91
|
+
PF01746_21 90
|
92
|
+
PF01667_17 91
|
93
|
+
PF03874_16 92
|
94
|
+
PF01090_19 93
|
95
|
+
PF01198_19 94
|
96
|
+
PF01250_17 95
|
97
|
+
PF17136_4 96
|
98
|
+
PF06026_14 97
|
99
|
+
PF03652_15 98
|
100
|
+
PF04019_12 99
|
101
|
+
PF01201_22 100
|
102
|
+
PF00832_20 101
|
103
|
+
PF01264_21 102
|
104
|
+
PF03840_14 103
|
105
|
+
PF00831_23 104
|
106
|
+
PF00189_20 105
|
107
|
+
PF02601_15 106
|
108
|
+
PF01496_19 107
|
109
|
+
PF00411_19 108
|
110
|
+
PF00334_19 109
|
111
|
+
PF00687_21 110
|
112
|
+
PF01157_18 111
|
113
|
+
PF01245_20 112
|
114
|
+
PF01994_16 113
|
115
|
+
PF01632_19 114
|
116
|
+
PF00827_17 115
|
117
|
+
PF01015_18 116
|
118
|
+
PF00829_21 117
|
119
|
+
PF00410_19 118
|
120
|
+
PF00833_18 119
|
121
|
+
PF00935_19 120
|
122
|
+
PF01992_16 121
|
data/utils/distance/commands.rb
CHANGED
@@ -146,56 +146,84 @@ module MiGA::DistanceRunner::Commands
|
|
146
146
|
##
|
147
147
|
# Execute a FastAAI command
|
148
148
|
def fastaai_cmd(targets)
|
149
|
-
qry_idx = dataset.result(:essential_genes).file_path(:
|
149
|
+
qry_idx = dataset.result(:essential_genes).file_path(:fastaai_crystal)
|
150
150
|
return nil unless qry_idx
|
151
151
|
|
152
152
|
# Merge databases
|
153
153
|
donors = []
|
154
154
|
targets.each do |target|
|
155
|
-
tgt_idx = target&.result(:essential_genes)&.file_path(:
|
155
|
+
tgt_idx = target&.result(:essential_genes)&.file_path(:fastaai_crystal)
|
156
156
|
donors << tgt_idx if tgt_idx
|
157
157
|
end
|
158
158
|
return nil if donors.empty?
|
159
159
|
|
160
160
|
# Build target database
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
161
|
+
fastaai_dir = File.join(MiGA::MiGA.root_path, 'utils', 'FastAAI', 'fastaai')
|
162
|
+
t_db = tmp_file # Target database (from crystals)
|
163
|
+
q_db = tmp_file # Query database (from crystal)
|
164
|
+
File.open(crystals = tmp_file, 'w') { |fh| donors.each { |i| fh.puts i } }
|
165
|
+
script = File.join(fastaai_dir, 'fastaai_miga_crystals_to_db.py')
|
166
|
+
run_cmd(
|
167
|
+
<<~CMD
|
168
|
+
python3 "#{script}" \
|
169
|
+
--crystal_list "#{crystals}" --database_path "#{t_db}" --overwrite
|
170
|
+
CMD
|
171
|
+
)
|
172
|
+
raise "Cannot merge databases into: #{t_db}" unless File.size?(t_db)
|
173
|
+
run_cmd(
|
174
|
+
<<~CMD
|
175
|
+
echo "#{qry_idx}" | \
|
176
|
+
python3 "#{script}" \
|
177
|
+
--crystal_list /dev/stdin --database_path "#{q_db}" --overwrite
|
178
|
+
CMD
|
179
|
+
)
|
180
|
+
raise "Cannot merge databases into: #{q_db}" unless File.size?(q_db)
|
174
181
|
|
175
182
|
# Run FastAAI
|
183
|
+
script = File.join(fastaai_dir, 'fastaai')
|
176
184
|
run_cmd(
|
177
185
|
<<~CMD
|
178
|
-
|
179
|
-
--
|
180
|
-
--
|
186
|
+
python3 "#{script}" db_query \
|
187
|
+
--query "#{q_db}" --target "#{t_db}" \
|
188
|
+
--output "#{out_dir = tmp_file}" \
|
189
|
+
--threads 1 --do_stdev
|
181
190
|
CMD
|
182
191
|
)
|
183
|
-
|
192
|
+
#run_cmd(
|
193
|
+
# <<~CMD
|
194
|
+
# python3 "#{script}" db_query --query "#{q_db}" --target "#{t_db}" \
|
195
|
+
# --output "#{out_dir = tmp_file}" --threads #{opts[:thr]} \
|
196
|
+
# --do_stdev
|
197
|
+
# CMD
|
198
|
+
#)
|
199
|
+
raise "Cannot find FastAAI output: #{out_dir}" unless Dir.exist?(out_dir)
|
184
200
|
|
185
201
|
# Save values in the databases
|
186
202
|
haai_data = {}
|
187
203
|
aai_data = {}
|
188
204
|
# Ugly workaround to the insistence of FastAAI not to provide the files
|
189
205
|
# I ask for ;-)
|
190
|
-
qry_results = File.basename(qry_idx, '.
|
191
|
-
out_file = File.join(
|
206
|
+
# qry_results = File.basename(qry_idx, '.crystal') + '_results.txt'
|
207
|
+
# out_file = File.join(out_dir, 'results', qry_results)
|
208
|
+
out_file = Dir["#{out_dir}/results/*_results.txt"].first
|
209
|
+
unless out_file && File.exist?(out_file)
|
210
|
+
raise "Cannot find FastAAI results: #{Dir["#{out_dir}/**/*"]}"
|
211
|
+
end
|
192
212
|
File.open(out_file, 'r') do |fh|
|
193
213
|
fh.each do |ln|
|
194
214
|
out = ln.chomp.split("\t")
|
195
215
|
haai_data[out[1]] = [
|
196
216
|
out[2].to_f * 100, out[3].to_f * 100, out[4].to_i, out[5].to_i
|
197
217
|
]
|
198
|
-
|
218
|
+
if out[6] =~ /^>/
|
219
|
+
# J-bar = 0.843 <=> AAI-hat = 90%
|
220
|
+
# This approximation is not in the original FastAAI paper, but it
|
221
|
+
# allows to maintain monotonicity at AAI-hat ≥ 90%, which solves
|
222
|
+
# indexing issues the ML-estimate of "AAI ~ 95%"
|
223
|
+
out[6] = Math.sqrt(out[2].to_f) * 100
|
224
|
+
else
|
225
|
+
out[6] = out[6].to_f
|
226
|
+
end
|
199
227
|
aai_data[out[1]] = [out[6], 0, 0, 0]
|
200
228
|
end
|
201
229
|
end
|
@@ -204,7 +232,7 @@ module MiGA::DistanceRunner::Commands
|
|
204
232
|
batch_data_to_db(:aai, aai_data)
|
205
233
|
|
206
234
|
# Cleanup
|
207
|
-
[
|
235
|
+
FileUtils.rm_rf([crystals, t_db, q_db, out_dir])
|
208
236
|
end
|
209
237
|
|
210
238
|
##
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -287,13 +287,30 @@ files:
|
|
287
287
|
- test/test_helper.rb
|
288
288
|
- test/with_daemon_test.rb
|
289
289
|
- test/with_option_test.rb
|
290
|
-
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm
|
291
|
-
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm
|
292
|
-
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm
|
293
|
-
- utils/FastAAI/FastAAI
|
294
290
|
- utils/FastAAI/FastAAI-legacy/FastAAI
|
295
291
|
- utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py
|
292
|
+
- utils/FastAAI/LICENSE
|
296
293
|
- utils/FastAAI/README.md
|
294
|
+
- utils/FastAAI/__init__.py
|
295
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962915_1.fna.gz
|
296
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962925_1.fna.gz
|
297
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962935_1.fna.gz
|
298
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962945_1.fna.gz
|
299
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962995_1.fna.gz
|
300
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963025_1.fna.gz
|
301
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963055_1.fna.gz
|
302
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963065_1.fna.gz
|
303
|
+
- utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_002019225_1.fna.gz
|
304
|
+
- utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_008801575_1.fna.gz
|
305
|
+
- utils/FastAAI/fastaai/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm
|
306
|
+
- utils/FastAAI/fastaai/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm
|
307
|
+
- utils/FastAAI/fastaai/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm
|
308
|
+
- utils/FastAAI/fastaai/__init__.py
|
309
|
+
- utils/FastAAI/fastaai/fastaai
|
310
|
+
- utils/FastAAI/fastaai/fastaai.py
|
311
|
+
- utils/FastAAI/fastaai/fastaai_miga_crystals_to_db.py
|
312
|
+
- utils/FastAAI/fastaai/fastaai_miga_preproc.py
|
313
|
+
- utils/FastAAI/metadata/Accession_names_and_IDs.txt
|
297
314
|
- utils/adapters.fa
|
298
315
|
- utils/cleanup-databases.rb
|
299
316
|
- utils/core-pan-plot.R
|