miga-base 1.1.3.0 → 1.1.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/doctor/base.rb +6 -5
- data/lib/miga/version.rb +2 -2
- data/utils/FastAAI/FastAAI +18 -6
- data/utils/distance/commands.rb +25 -11
- data/utils/distance/database.rb +2 -0
- data/utils/subclade/pipeline.rb +9 -8
- data/utils/subclades.R +22 -18
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 648be93307cb7996121ae4bac560078d6e9647f104efc4f0ab4d3b43751cd97d
|
4
|
+
data.tar.gz: a84201cfcce7903702a62f6c8e73c2b0d02c364f680f93ef5ac27470c7ad3e1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93c58af91739aea6202085c0f4690cbd344fe873fb212e01780ac5fcdd240cdbd6a0b52047c4ea2582f56a0205976e3a46994646c2dd9b28d230fa655380e4e6
|
7
|
+
data.tar.gz: 0f5d9082d800fb54aff6b47413da9135b170d683b400a5641612936a541f238dd85e7ab768877ee67247be7c5ef5916f5b794fad9fb372a10184ca083ce98eb6
|
@@ -59,13 +59,14 @@ module MiGA::Cli::Action::Doctor::Base
|
|
59
59
|
next if (lineno += 1) == 1
|
60
60
|
|
61
61
|
r = ln.split("\t")
|
62
|
-
|
62
|
+
names = [r[0], r[1]]
|
63
|
+
next unless names.any? { |i| p.dataset(i).nil? }
|
63
64
|
|
64
|
-
|
65
|
-
if p.dataset(
|
66
|
-
notok[
|
65
|
+
names.each do |i|
|
66
|
+
if p.dataset(i).nil? || !p.dataset(i).active?
|
67
|
+
notok[i] = true
|
67
68
|
else
|
68
|
-
fix[
|
69
|
+
fix[i] = true
|
69
70
|
end
|
70
71
|
end
|
71
72
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.1, 3,
|
15
|
+
VERSION = [1.1, 3, 4].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2021, 11,
|
23
|
+
VERSION_DATE = Date.new(2021, 11, 30)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/utils/FastAAI/FastAAI
CHANGED
@@ -2702,6 +2702,8 @@ def merge_db_opts():
|
|
2702
2702
|
|
2703
2703
|
parser.add_argument('-d', '--donors', dest = 'donors', default = None, help = 'Comma-separated string of paths to one or more donor databases. The genomes FROM the donors will be added TO the recipient and the donors will be unaltered')
|
2704
2704
|
|
2705
|
+
parser.add_argument('--donor_file', dest = 'donor_file', default = None, help = 'Alternative way to supply donors. A file containing paths to the donor databases, 1 per line')
|
2706
|
+
|
2705
2707
|
parser.add_argument('-r', '--recipient', dest = 'recipient', default = None, help = 'Path to the recipient database. Any genomes FROM the donor database not already in the recipient will be added to this database.')
|
2706
2708
|
|
2707
2709
|
parser.add_argument('--verbose', dest = 'verbose', action='store_true', help = 'Print minor updates to console. Major updates are printed regardless.')
|
@@ -2720,16 +2722,23 @@ def merge_db_thread_starter(rev_index, per_db_accs):
|
|
2720
2722
|
|
2721
2723
|
|
2722
2724
|
|
2723
|
-
def merge_db(recipient, donors, verbose, threads):
|
2725
|
+
def merge_db(recipient, donors, donor_file, verbose, threads):
|
2724
2726
|
#Prettier on the CLI
|
2725
|
-
|
2727
|
+
|
2728
|
+
if donor_file is not None:
|
2729
|
+
fh = agnostic_reader(donor_file)
|
2730
|
+
donors = [line.strip() for line in fh]
|
2731
|
+
fh.close()
|
2732
|
+
|
2726
2733
|
if donors is None or recipient is None:
|
2727
2734
|
print("Either donor or target not given. FastAAI is exiting.")
|
2728
2735
|
return None
|
2729
2736
|
|
2730
2737
|
print("")
|
2731
2738
|
|
2732
|
-
|
2739
|
+
if donor_file is None:
|
2740
|
+
donors = donors.split(",")
|
2741
|
+
|
2733
2742
|
valid_donors = []
|
2734
2743
|
for d in donors:
|
2735
2744
|
if os.path.exists(d):
|
@@ -2945,10 +2954,12 @@ def merge_db(recipient, donors, verbose, threads):
|
|
2945
2954
|
pass
|
2946
2955
|
except:
|
2947
2956
|
#Error
|
2948
|
-
|
2957
|
+
if os.path.exists(temp_dir):
|
2958
|
+
shutil.rmtree(temp_dir)
|
2949
2959
|
finally:
|
2950
2960
|
#Success
|
2951
|
-
|
2961
|
+
if os.path.exists(temp_dir):
|
2962
|
+
shutil.rmtree(temp_dir)
|
2952
2963
|
|
2953
2964
|
print("\nDatabases merged!")
|
2954
2965
|
|
@@ -3454,10 +3465,11 @@ def main():
|
|
3454
3465
|
|
3455
3466
|
recipient = opts.recipient
|
3456
3467
|
donors = opts.donors
|
3468
|
+
donor_file = opts.donor_file
|
3457
3469
|
verbose = opts.verbose
|
3458
3470
|
threads = opts.threads
|
3459
3471
|
|
3460
|
-
merge_db(recipient, donors, verbose, threads)
|
3472
|
+
merge_db(recipient, donors, donor_file, verbose, threads)
|
3461
3473
|
|
3462
3474
|
#################### Query files vs DB ########################
|
3463
3475
|
|
data/utils/distance/commands.rb
CHANGED
@@ -151,22 +151,36 @@ module MiGA::DistanceRunner::Commands
|
|
151
151
|
donors << tgt_idx if tgt_idx
|
152
152
|
end
|
153
153
|
return nil if donors.empty?
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
154
|
+
|
155
|
+
# Build target database
|
156
|
+
f1 = tmp_file
|
157
|
+
if donors.size == 1
|
158
|
+
File.copy(donors.first, f1)
|
159
|
+
else
|
160
|
+
File.open(f0 = tmp_file, 'w') { |fh| donors.each { |i| fh.puts i } }
|
161
|
+
run_cmd(
|
162
|
+
<<~CMD
|
163
|
+
FastAAI merge_db --threads #{opts[:thr]} \
|
164
|
+
--donor_file "#{f0}" --recipient "#{f1}"
|
165
|
+
CMD
|
166
|
+
)
|
167
|
+
raise "Cannot merge databases into: #{f1}" unless File.size?(f1)
|
168
|
+
end
|
158
169
|
|
159
170
|
# Run FastAAI
|
160
|
-
run_cmd
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
171
|
+
run_cmd(
|
172
|
+
<<~CMD
|
173
|
+
FastAAI db_query --query "#{qry_idx}" --target "#{f1}" \
|
174
|
+
--output "#{f2 = tmp_file}" --threads #{opts[:thr]} \
|
175
|
+
--do_stdev
|
176
|
+
CMD
|
177
|
+
)
|
178
|
+
raise "Cannot find FastAAI output directory: #{f2}" unless Dir.exist?(f2)
|
165
179
|
|
166
180
|
# Save values in the databases
|
167
181
|
haai_data = {}
|
168
182
|
aai_data = {}
|
169
|
-
# Ugly workaround to the insistence of FastAAI to
|
183
|
+
# Ugly workaround to the insistence of FastAAI not to provide the files
|
170
184
|
# I ask for ;-)
|
171
185
|
qry_results = File.basename(qry_idx, '.faix') + '_results.txt'
|
172
186
|
out_file = File.join(f2, 'results', qry_results)
|
@@ -214,6 +228,6 @@ module MiGA::DistanceRunner::Commands
|
|
214
228
|
|
215
229
|
def run_cmd(cmd)
|
216
230
|
puts "CMD: #{cmd}"
|
217
|
-
`#{cmd}`
|
231
|
+
puts `#{cmd} 2>&1`
|
218
232
|
end
|
219
233
|
end
|
data/utils/distance/database.rb
CHANGED
@@ -127,6 +127,7 @@ module MiGA::DistanceRunner::Database
|
|
127
127
|
db = tmp_dbs[metric]
|
128
128
|
table = metric == :haai ? :aai : metric
|
129
129
|
SQLite3::Database.new(db) do |conn|
|
130
|
+
conn.execute('BEGIN TRANSACTION')
|
130
131
|
data.each do |k, v|
|
131
132
|
sql = <<~SQL
|
132
133
|
insert into #{table} (
|
@@ -135,6 +136,7 @@ module MiGA::DistanceRunner::Database
|
|
135
136
|
SQL
|
136
137
|
conn.execute(sql, [dataset.name, k] + v)
|
137
138
|
end
|
139
|
+
conn.execute('COMMIT')
|
138
140
|
end
|
139
141
|
checkpoint(metric)
|
140
142
|
end
|
data/utils/subclade/pipeline.rb
CHANGED
@@ -7,9 +7,12 @@ module MiGA::SubcladeRunner::Pipeline
|
|
7
7
|
aai90: [:aai_distances, opts[:gsp_aai], :aai]
|
8
8
|
}
|
9
9
|
tasks.each do |k, par|
|
10
|
+
# Run only the requested metric
|
11
|
+
next unless par[2].to_s == opts[:gsp_metric]
|
12
|
+
|
10
13
|
# Final output
|
11
14
|
ogs_file = "miga-project.#{k}-clades"
|
12
|
-
next if File.size?
|
15
|
+
next if File.size?(ogs_file)
|
13
16
|
|
14
17
|
# Build ABC files
|
15
18
|
abc_path = tmp_file("#{k}.abc")
|
@@ -20,7 +23,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
20
23
|
next if ln =~ /^a\tb\tvalue\t/
|
21
24
|
|
22
25
|
r = ln.chomp.split("\t")
|
23
|
-
ofh.puts
|
26
|
+
ofh.puts("G>#{r[0]}\tG>#{r[1]}\t#{r[2]}") if r[2].to_f >= par[1]
|
24
27
|
end
|
25
28
|
end
|
26
29
|
ofh.close
|
@@ -29,16 +32,14 @@ module MiGA::SubcladeRunner::Pipeline
|
|
29
32
|
`ogs.mcl.rb -o '#{ogs_file}.tmp' --abc '#{abc_path}' -t '#{opts[:thr]}'`
|
30
33
|
File.open(ogs_file, 'w') do |fh|
|
31
34
|
File.foreach("#{ogs_file}.tmp").with_index do |ln, lno|
|
32
|
-
fh.puts
|
35
|
+
fh.puts(ln) if lno > 0
|
33
36
|
end
|
34
37
|
end
|
35
38
|
File.unlink "#{ogs_file}.tmp"
|
36
39
|
else
|
37
|
-
FileUtils.touch
|
38
|
-
end
|
39
|
-
if par[2].to_s == opts[:gsp_metric]
|
40
|
-
FileUtils.cp(ogs_file, "miga-project.gsp-clades")
|
40
|
+
FileUtils.touch(ogs_file)
|
41
41
|
end
|
42
|
+
FileUtils.cp(ogs_file, 'miga-project.gsp-clades')
|
42
43
|
end
|
43
44
|
|
44
45
|
# Find genomospecies medoids
|
@@ -67,7 +68,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
67
68
|
metric_res = project.result(step) or raise "Incomplete step #{step}"
|
68
69
|
matrix = metric_res.file_path(:matrix)
|
69
70
|
`Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}' \
|
70
|
-
miga-project.
|
71
|
+
miga-project.gsp-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
|
71
72
|
if File.exist? 'miga-project.nwk'
|
72
73
|
File.rename('miga-project.nwk', "miga-project.#{metric}.nwk")
|
73
74
|
end
|
data/utils/subclades.R
CHANGED
@@ -26,14 +26,18 @@ subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
|
|
26
26
|
# Normalize input matrix
|
27
27
|
dist_rds <- paste(out_base, "dist.rds", sep = ".")
|
28
28
|
if (!missing(ani_file)) {
|
29
|
-
if(length(ani.d) == 0
|
30
|
-
|
31
|
-
|
32
|
-
if (is.null(ani.d)) {
|
33
|
-
generate_empty_files(out_base)
|
34
|
-
return(NULL)
|
29
|
+
if (length(ani.d) == 0) {
|
30
|
+
if (file.exists(dist_rds)) {
|
31
|
+
ani.d <- readRDS(dist_rds)
|
35
32
|
} else {
|
36
|
-
|
33
|
+
# Read from ani_file
|
34
|
+
ani.d <- ani_distance(ani_file, sel)
|
35
|
+
if (is.null(ani.d)) {
|
36
|
+
generate_empty_files(out_base)
|
37
|
+
return(NULL)
|
38
|
+
} else {
|
39
|
+
saveRDS(ani.d, dist_rds)
|
40
|
+
}
|
37
41
|
}
|
38
42
|
}
|
39
43
|
}
|
@@ -104,17 +108,6 @@ subclade_clustering <- function (out_base, thr, ani.d, dist_rds) {
|
|
104
108
|
}
|
105
109
|
if (length(labels(ani.d)) <= 8L) return(list())
|
106
110
|
|
107
|
-
# Build tree
|
108
|
-
say("Tree")
|
109
|
-
ani.ph <- bionj(ani.d)
|
110
|
-
say("- Write")
|
111
|
-
express.ori <- options("expressions")$expressions
|
112
|
-
if(express.ori < ani.ph$Nnode * 4){
|
113
|
-
options(expressions=min(c(5e7, ani.ph$Nnode * 4)))
|
114
|
-
}
|
115
|
-
write.tree(ani.ph, paste(out_base, ".nwk", sep = ""))
|
116
|
-
options(expressions=express.ori)
|
117
|
-
|
118
111
|
# Silhouette
|
119
112
|
say("Silhouette")
|
120
113
|
nn <- length(labels(ani.d))
|
@@ -146,6 +139,17 @@ subclade_clustering <- function (out_base, thr, ani.d, dist_rds) {
|
|
146
139
|
ani.types <- ani.cl$clustering
|
147
140
|
ani.medoids <- ani.cl$medoids
|
148
141
|
|
142
|
+
# Build tree
|
143
|
+
say("Tree")
|
144
|
+
ani.ph <- bionj(ani.d)
|
145
|
+
say("- Write")
|
146
|
+
express.ori <- options("expressions")$expressions
|
147
|
+
if(express.ori < ani.ph$Nnode * 4){
|
148
|
+
options(expressions=min(c(5e7, ani.ph$Nnode * 4)))
|
149
|
+
}
|
150
|
+
write.tree(ani.ph, paste(out_base, ".nwk", sep = ""))
|
151
|
+
options(expressions=express.ori)
|
152
|
+
|
149
153
|
# Generate graphic report
|
150
154
|
say("Graphic report")
|
151
155
|
pdf(paste(out_base, ".pdf", sep = ""), 7, 12)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.3.
|
4
|
+
version: 1.1.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-11-
|
11
|
+
date: 2021-11-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|