miga-base 1.2.18.1 → 1.3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/doctor/base.rb +2 -1
- data/lib/miga/cli/action/init.rb +1 -1
- data/lib/miga/dataset/result/add.rb +3 -2
- data/lib/miga/lair.rb +9 -3
- data/lib/miga/version.rb +2 -2
- data/scripts/essential_genes.bash +4 -8
- data/utils/FastAAI/LICENSE +8 -0
- data/utils/FastAAI/README.md +151 -40
- data/utils/FastAAI/__init__.py +1 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962915_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962925_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962935_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962945_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962995_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963025_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963055_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963065_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_002019225_1.fna.gz +0 -0
- data/utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_008801575_1.fna.gz +0 -0
- data/utils/FastAAI/fastaai/__init__.py +1 -0
- data/utils/FastAAI/fastaai/fastaai +4805 -0
- data/utils/FastAAI/fastaai/fastaai.py +4805 -0
- data/utils/FastAAI/fastaai/fastaai_miga_crystals_to_db.py +297 -0
- data/utils/FastAAI/fastaai/fastaai_miga_preproc.py +931 -0
- data/utils/FastAAI/metadata/Accession_names_and_IDs.txt +122 -0
- data/utils/distance/commands.rb +51 -23
- metadata +23 -6
- data/utils/FastAAI/FastAAI +0 -3659
- /data/utils/FastAAI/{00.Libraries → fastaai/00.Libraries}/01.SCG_HMMs/Archaea_SCG.hmm +0 -0
- /data/utils/FastAAI/{00.Libraries → fastaai/00.Libraries}/01.SCG_HMMs/Bacteria_SCG.hmm +0 -0
- /data/utils/FastAAI/{00.Libraries → fastaai/00.Libraries}/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
PF01780_19 0
|
2
|
+
PF03948_14 1
|
3
|
+
PF17144_4 2
|
4
|
+
PF00830_19 3
|
5
|
+
PF00347_23 4
|
6
|
+
PF16906_5 5
|
7
|
+
PF13393_6 6
|
8
|
+
PF02565_15 7
|
9
|
+
PF01991_18 8
|
10
|
+
PF01984_20 9
|
11
|
+
PF00861_22 10
|
12
|
+
PF13656_6 11
|
13
|
+
PF00368_18 12
|
14
|
+
PF01142_18 13
|
15
|
+
PF00312_22 14
|
16
|
+
PF02367_17 15
|
17
|
+
PF01951_16 16
|
18
|
+
PF00749_21 17
|
19
|
+
PF01655_18 18
|
20
|
+
PF00318_20 19
|
21
|
+
PF01813_17 20
|
22
|
+
PF01649_18 21
|
23
|
+
PF01025_19 22
|
24
|
+
PF00380_19 23
|
25
|
+
PF01282_19 24
|
26
|
+
PF01864_17 25
|
27
|
+
PF01783_23 26
|
28
|
+
PF01808_18 27
|
29
|
+
PF01982_16 28
|
30
|
+
PF01715_17 29
|
31
|
+
PF00213_18 30
|
32
|
+
PF00119_20 31
|
33
|
+
PF00573_22 32
|
34
|
+
PF01981_16 33
|
35
|
+
PF00281_19 34
|
36
|
+
PF00584_20 35
|
37
|
+
PF00825_18 36
|
38
|
+
PF00406_22 37
|
39
|
+
PF00177_21 38
|
40
|
+
PF01192_22 39
|
41
|
+
PF05833_11 40
|
42
|
+
PF02699_15 41
|
43
|
+
PF01016_19 42
|
44
|
+
PF01765_19 43
|
45
|
+
PF00453_18 44
|
46
|
+
PF01193_24 45
|
47
|
+
PF05221_17 46
|
48
|
+
PF00231_19 47
|
49
|
+
PF00416_22 48
|
50
|
+
PF02033_18 49
|
51
|
+
PF01668_18 50
|
52
|
+
PF00886_19 51
|
53
|
+
PF00252_18 52
|
54
|
+
PF00572_18 53
|
55
|
+
PF00366_20 54
|
56
|
+
PF04104_14 55
|
57
|
+
PF04919_12 56
|
58
|
+
PF01912_18 57
|
59
|
+
PF00276_20 58
|
60
|
+
PF00203_21 59
|
61
|
+
PF00889_19 60
|
62
|
+
PF02996_17 61
|
63
|
+
PF00121_18 62
|
64
|
+
PF01990_17 63
|
65
|
+
PF00344_20 64
|
66
|
+
PF00297_22 65
|
67
|
+
PF01196_19 66
|
68
|
+
PF01194_17 67
|
69
|
+
PF01725_16 68
|
70
|
+
PF00750_19 69
|
71
|
+
PF00338_22 70
|
72
|
+
PF00238_19 71
|
73
|
+
PF01200_18 72
|
74
|
+
PF00162_19 73
|
75
|
+
PF00181_23 74
|
76
|
+
PF01866_17 75
|
77
|
+
PF00709_21 76
|
78
|
+
PF02006_16 77
|
79
|
+
PF00164_25 78
|
80
|
+
PF00237_19 79
|
81
|
+
PF01139_17 80
|
82
|
+
PF01351_18 81
|
83
|
+
PF04010_13 82
|
84
|
+
PF06093_13 83
|
85
|
+
PF00828_19 84
|
86
|
+
PF02410_15 85
|
87
|
+
PF01176_19 86
|
88
|
+
PF02130_17 87
|
89
|
+
PF01948_18 88
|
90
|
+
PF01195_19 89
|
91
|
+
PF01746_21 90
|
92
|
+
PF01667_17 91
|
93
|
+
PF03874_16 92
|
94
|
+
PF01090_19 93
|
95
|
+
PF01198_19 94
|
96
|
+
PF01250_17 95
|
97
|
+
PF17136_4 96
|
98
|
+
PF06026_14 97
|
99
|
+
PF03652_15 98
|
100
|
+
PF04019_12 99
|
101
|
+
PF01201_22 100
|
102
|
+
PF00832_20 101
|
103
|
+
PF01264_21 102
|
104
|
+
PF03840_14 103
|
105
|
+
PF00831_23 104
|
106
|
+
PF00189_20 105
|
107
|
+
PF02601_15 106
|
108
|
+
PF01496_19 107
|
109
|
+
PF00411_19 108
|
110
|
+
PF00334_19 109
|
111
|
+
PF00687_21 110
|
112
|
+
PF01157_18 111
|
113
|
+
PF01245_20 112
|
114
|
+
PF01994_16 113
|
115
|
+
PF01632_19 114
|
116
|
+
PF00827_17 115
|
117
|
+
PF01015_18 116
|
118
|
+
PF00829_21 117
|
119
|
+
PF00410_19 118
|
120
|
+
PF00833_18 119
|
121
|
+
PF00935_19 120
|
122
|
+
PF01992_16 121
|
data/utils/distance/commands.rb
CHANGED
@@ -146,56 +146,84 @@ module MiGA::DistanceRunner::Commands
|
|
146
146
|
##
|
147
147
|
# Execute a FastAAI command
|
148
148
|
def fastaai_cmd(targets)
|
149
|
-
qry_idx = dataset.result(:essential_genes).file_path(:
|
149
|
+
qry_idx = dataset.result(:essential_genes).file_path(:fastaai_crystal)
|
150
150
|
return nil unless qry_idx
|
151
151
|
|
152
152
|
# Merge databases
|
153
153
|
donors = []
|
154
154
|
targets.each do |target|
|
155
|
-
tgt_idx = target&.result(:essential_genes)&.file_path(:
|
155
|
+
tgt_idx = target&.result(:essential_genes)&.file_path(:fastaai_crystal)
|
156
156
|
donors << tgt_idx if tgt_idx
|
157
157
|
end
|
158
158
|
return nil if donors.empty?
|
159
159
|
|
160
160
|
# Build target database
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
161
|
+
fastaai_dir = File.join(MiGA::MiGA.root_path, 'utils', 'FastAAI', 'fastaai')
|
162
|
+
t_db = tmp_file # Target database (from crystals)
|
163
|
+
q_db = tmp_file # Query database (from crystal)
|
164
|
+
File.open(crystals = tmp_file, 'w') { |fh| donors.each { |i| fh.puts i } }
|
165
|
+
script = File.join(fastaai_dir, 'fastaai_miga_crystals_to_db.py')
|
166
|
+
run_cmd(
|
167
|
+
<<~CMD
|
168
|
+
python3 "#{script}" \
|
169
|
+
--crystal_list "#{crystals}" --database_path "#{t_db}" --overwrite
|
170
|
+
CMD
|
171
|
+
)
|
172
|
+
raise "Cannot merge databases into: #{t_db}" unless File.size?(t_db)
|
173
|
+
run_cmd(
|
174
|
+
<<~CMD
|
175
|
+
echo "#{qry_idx}" | \
|
176
|
+
python3 "#{script}" \
|
177
|
+
--crystal_list /dev/stdin --database_path "#{q_db}" --overwrite
|
178
|
+
CMD
|
179
|
+
)
|
180
|
+
raise "Cannot merge databases into: #{q_db}" unless File.size?(q_db)
|
174
181
|
|
175
182
|
# Run FastAAI
|
183
|
+
script = File.join(fastaai_dir, 'fastaai')
|
176
184
|
run_cmd(
|
177
185
|
<<~CMD
|
178
|
-
|
179
|
-
--
|
180
|
-
--
|
186
|
+
python3 "#{script}" db_query \
|
187
|
+
--query "#{q_db}" --target "#{t_db}" \
|
188
|
+
--output "#{out_dir = tmp_file}" \
|
189
|
+
--threads 1 --do_stdev
|
181
190
|
CMD
|
182
191
|
)
|
183
|
-
|
192
|
+
#run_cmd(
|
193
|
+
# <<~CMD
|
194
|
+
# python3 "#{script}" db_query --query "#{q_db}" --target "#{t_db}" \
|
195
|
+
# --output "#{out_dir = tmp_file}" --threads #{opts[:thr]} \
|
196
|
+
# --do_stdev
|
197
|
+
# CMD
|
198
|
+
#)
|
199
|
+
raise "Cannot find FastAAI output: #{out_dir}" unless Dir.exist?(out_dir)
|
184
200
|
|
185
201
|
# Save values in the databases
|
186
202
|
haai_data = {}
|
187
203
|
aai_data = {}
|
188
204
|
# Ugly workaround to the insistence of FastAAI not to provide the files
|
189
205
|
# I ask for ;-)
|
190
|
-
qry_results = File.basename(qry_idx, '.
|
191
|
-
out_file = File.join(
|
206
|
+
# qry_results = File.basename(qry_idx, '.crystal') + '_results.txt'
|
207
|
+
# out_file = File.join(out_dir, 'results', qry_results)
|
208
|
+
out_file = Dir["#{out_dir}/results/*_results.txt"].first
|
209
|
+
unless out_file && File.exist?(out_file)
|
210
|
+
raise "Cannot find FastAAI results: #{Dir["#{out_dir}/**/*"]}"
|
211
|
+
end
|
192
212
|
File.open(out_file, 'r') do |fh|
|
193
213
|
fh.each do |ln|
|
194
214
|
out = ln.chomp.split("\t")
|
195
215
|
haai_data[out[1]] = [
|
196
216
|
out[2].to_f * 100, out[3].to_f * 100, out[4].to_i, out[5].to_i
|
197
217
|
]
|
198
|
-
|
218
|
+
if out[6] =~ /^>/
|
219
|
+
# J-bar = 0.843 <=> AAI-hat = 90%
|
220
|
+
# This approximation is not in the original FastAAI paper, but it
|
221
|
+
# allows to maintain monotonicity at AAI-hat ≥ 90%, which solves
|
222
|
+
# indexing issues the ML-estimate of "AAI ~ 95%"
|
223
|
+
out[6] = Math.sqrt(out[2].to_f) * 100
|
224
|
+
else
|
225
|
+
out[6] = out[6].to_f
|
226
|
+
end
|
199
227
|
aai_data[out[1]] = [out[6], 0, 0, 0]
|
200
228
|
end
|
201
229
|
end
|
@@ -204,7 +232,7 @@ module MiGA::DistanceRunner::Commands
|
|
204
232
|
batch_data_to_db(:aai, aai_data)
|
205
233
|
|
206
234
|
# Cleanup
|
207
|
-
[
|
235
|
+
FileUtils.rm_rf([crystals, t_db, q_db, out_dir])
|
208
236
|
end
|
209
237
|
|
210
238
|
##
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -287,13 +287,30 @@ files:
|
|
287
287
|
- test/test_helper.rb
|
288
288
|
- test/with_daemon_test.rb
|
289
289
|
- test/with_option_test.rb
|
290
|
-
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm
|
291
|
-
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm
|
292
|
-
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm
|
293
|
-
- utils/FastAAI/FastAAI
|
294
290
|
- utils/FastAAI/FastAAI-legacy/FastAAI
|
295
291
|
- utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py
|
292
|
+
- utils/FastAAI/LICENSE
|
296
293
|
- utils/FastAAI/README.md
|
294
|
+
- utils/FastAAI/__init__.py
|
295
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962915_1.fna.gz
|
296
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962925_1.fna.gz
|
297
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962935_1.fna.gz
|
298
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962945_1.fna.gz
|
299
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000962995_1.fna.gz
|
300
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963025_1.fna.gz
|
301
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963055_1.fna.gz
|
302
|
+
- utils/FastAAI/example_genomes/Xanthomonas_albilineans_GCA_000963065_1.fna.gz
|
303
|
+
- utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_002019225_1.fna.gz
|
304
|
+
- utils/FastAAI/example_genomes/_Pseudomonas__cissicola_GCA_008801575_1.fna.gz
|
305
|
+
- utils/FastAAI/fastaai/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm
|
306
|
+
- utils/FastAAI/fastaai/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm
|
307
|
+
- utils/FastAAI/fastaai/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm
|
308
|
+
- utils/FastAAI/fastaai/__init__.py
|
309
|
+
- utils/FastAAI/fastaai/fastaai
|
310
|
+
- utils/FastAAI/fastaai/fastaai.py
|
311
|
+
- utils/FastAAI/fastaai/fastaai_miga_crystals_to_db.py
|
312
|
+
- utils/FastAAI/fastaai/fastaai_miga_preproc.py
|
313
|
+
- utils/FastAAI/metadata/Accession_names_and_IDs.txt
|
297
314
|
- utils/adapters.fa
|
298
315
|
- utils/cleanup-databases.rb
|
299
316
|
- utils/core-pan-plot.R
|