miga-base 1.1.3.0 → 1.1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/doctor/base.rb +6 -5
- data/lib/miga/version.rb +2 -2
- data/utils/FastAAI/FastAAI +18 -6
- data/utils/distance/commands.rb +25 -11
- data/utils/distance/database.rb +2 -0
- data/utils/subclade/pipeline.rb +9 -8
- data/utils/subclades.R +22 -18
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 648be93307cb7996121ae4bac560078d6e9647f104efc4f0ab4d3b43751cd97d
|
4
|
+
data.tar.gz: a84201cfcce7903702a62f6c8e73c2b0d02c364f680f93ef5ac27470c7ad3e1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93c58af91739aea6202085c0f4690cbd344fe873fb212e01780ac5fcdd240cdbd6a0b52047c4ea2582f56a0205976e3a46994646c2dd9b28d230fa655380e4e6
|
7
|
+
data.tar.gz: 0f5d9082d800fb54aff6b47413da9135b170d683b400a5641612936a541f238dd85e7ab768877ee67247be7c5ef5916f5b794fad9fb372a10184ca083ce98eb6
|
@@ -59,13 +59,14 @@ module MiGA::Cli::Action::Doctor::Base
|
|
59
59
|
next if (lineno += 1) == 1
|
60
60
|
|
61
61
|
r = ln.split("\t")
|
62
|
-
|
62
|
+
names = [r[0], r[1]]
|
63
|
+
next unless names.any? { |i| p.dataset(i).nil? }
|
63
64
|
|
64
|
-
|
65
|
-
if p.dataset(
|
66
|
-
notok[
|
65
|
+
names.each do |i|
|
66
|
+
if p.dataset(i).nil? || !p.dataset(i).active?
|
67
|
+
notok[i] = true
|
67
68
|
else
|
68
|
-
fix[
|
69
|
+
fix[i] = true
|
69
70
|
end
|
70
71
|
end
|
71
72
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.1, 3,
|
15
|
+
VERSION = [1.1, 3, 4].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2021, 11,
|
23
|
+
VERSION_DATE = Date.new(2021, 11, 30)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/utils/FastAAI/FastAAI
CHANGED
@@ -2702,6 +2702,8 @@ def merge_db_opts():
|
|
2702
2702
|
|
2703
2703
|
parser.add_argument('-d', '--donors', dest = 'donors', default = None, help = 'Comma-separated string of paths to one or more donor databases. The genomes FROM the donors will be added TO the recipient and the donors will be unaltered')
|
2704
2704
|
|
2705
|
+
parser.add_argument('--donor_file', dest = 'donor_file', default = None, help = 'Alternative way to supply donors. A file containing paths to the donor databases, 1 per line')
|
2706
|
+
|
2705
2707
|
parser.add_argument('-r', '--recipient', dest = 'recipient', default = None, help = 'Path to the recipient database. Any genomes FROM the donor database not already in the recipient will be added to this database.')
|
2706
2708
|
|
2707
2709
|
parser.add_argument('--verbose', dest = 'verbose', action='store_true', help = 'Print minor updates to console. Major updates are printed regardless.')
|
@@ -2720,16 +2722,23 @@ def merge_db_thread_starter(rev_index, per_db_accs):
|
|
2720
2722
|
|
2721
2723
|
|
2722
2724
|
|
2723
|
-
def merge_db(recipient, donors, verbose, threads):
|
2725
|
+
def merge_db(recipient, donors, donor_file, verbose, threads):
|
2724
2726
|
#Prettier on the CLI
|
2725
|
-
|
2727
|
+
|
2728
|
+
if donor_file is not None:
|
2729
|
+
fh = agnostic_reader(donor_file)
|
2730
|
+
donors = [line.strip() for line in fh]
|
2731
|
+
fh.close()
|
2732
|
+
|
2726
2733
|
if donors is None or recipient is None:
|
2727
2734
|
print("Either donor or target not given. FastAAI is exiting.")
|
2728
2735
|
return None
|
2729
2736
|
|
2730
2737
|
print("")
|
2731
2738
|
|
2732
|
-
|
2739
|
+
if donor_file is None:
|
2740
|
+
donors = donors.split(",")
|
2741
|
+
|
2733
2742
|
valid_donors = []
|
2734
2743
|
for d in donors:
|
2735
2744
|
if os.path.exists(d):
|
@@ -2945,10 +2954,12 @@ def merge_db(recipient, donors, verbose, threads):
|
|
2945
2954
|
pass
|
2946
2955
|
except:
|
2947
2956
|
#Error
|
2948
|
-
|
2957
|
+
if os.path.exists(temp_dir):
|
2958
|
+
shutil.rmtree(temp_dir)
|
2949
2959
|
finally:
|
2950
2960
|
#Success
|
2951
|
-
|
2961
|
+
if os.path.exists(temp_dir):
|
2962
|
+
shutil.rmtree(temp_dir)
|
2952
2963
|
|
2953
2964
|
print("\nDatabases merged!")
|
2954
2965
|
|
@@ -3454,10 +3465,11 @@ def main():
|
|
3454
3465
|
|
3455
3466
|
recipient = opts.recipient
|
3456
3467
|
donors = opts.donors
|
3468
|
+
donor_file = opts.donor_file
|
3457
3469
|
verbose = opts.verbose
|
3458
3470
|
threads = opts.threads
|
3459
3471
|
|
3460
|
-
merge_db(recipient, donors, verbose, threads)
|
3472
|
+
merge_db(recipient, donors, donor_file, verbose, threads)
|
3461
3473
|
|
3462
3474
|
#################### Query files vs DB ########################
|
3463
3475
|
|
data/utils/distance/commands.rb
CHANGED
@@ -151,22 +151,36 @@ module MiGA::DistanceRunner::Commands
|
|
151
151
|
donors << tgt_idx if tgt_idx
|
152
152
|
end
|
153
153
|
return nil if donors.empty?
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
154
|
+
|
155
|
+
# Build target database
|
156
|
+
f1 = tmp_file
|
157
|
+
if donors.size == 1
|
158
|
+
File.copy(donors.first, f1)
|
159
|
+
else
|
160
|
+
File.open(f0 = tmp_file, 'w') { |fh| donors.each { |i| fh.puts i } }
|
161
|
+
run_cmd(
|
162
|
+
<<~CMD
|
163
|
+
FastAAI merge_db --threads #{opts[:thr]} \
|
164
|
+
--donor_file "#{f0}" --recipient "#{f1}"
|
165
|
+
CMD
|
166
|
+
)
|
167
|
+
raise "Cannot merge databases into: #{f1}" unless File.size?(f1)
|
168
|
+
end
|
158
169
|
|
159
170
|
# Run FastAAI
|
160
|
-
run_cmd
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
171
|
+
run_cmd(
|
172
|
+
<<~CMD
|
173
|
+
FastAAI db_query --query "#{qry_idx}" --target "#{f1}" \
|
174
|
+
--output "#{f2 = tmp_file}" --threads #{opts[:thr]} \
|
175
|
+
--do_stdev
|
176
|
+
CMD
|
177
|
+
)
|
178
|
+
raise "Cannot find FastAAI output directory: #{f2}" unless Dir.exist?(f2)
|
165
179
|
|
166
180
|
# Save values in the databases
|
167
181
|
haai_data = {}
|
168
182
|
aai_data = {}
|
169
|
-
# Ugly workaround to the insistence of FastAAI to
|
183
|
+
# Ugly workaround to the insistence of FastAAI not to provide the files
|
170
184
|
# I ask for ;-)
|
171
185
|
qry_results = File.basename(qry_idx, '.faix') + '_results.txt'
|
172
186
|
out_file = File.join(f2, 'results', qry_results)
|
@@ -214,6 +228,6 @@ module MiGA::DistanceRunner::Commands
|
|
214
228
|
|
215
229
|
def run_cmd(cmd)
|
216
230
|
puts "CMD: #{cmd}"
|
217
|
-
`#{cmd}`
|
231
|
+
puts `#{cmd} 2>&1`
|
218
232
|
end
|
219
233
|
end
|
data/utils/distance/database.rb
CHANGED
@@ -127,6 +127,7 @@ module MiGA::DistanceRunner::Database
|
|
127
127
|
db = tmp_dbs[metric]
|
128
128
|
table = metric == :haai ? :aai : metric
|
129
129
|
SQLite3::Database.new(db) do |conn|
|
130
|
+
conn.execute('BEGIN TRANSACTION')
|
130
131
|
data.each do |k, v|
|
131
132
|
sql = <<~SQL
|
132
133
|
insert into #{table} (
|
@@ -135,6 +136,7 @@ module MiGA::DistanceRunner::Database
|
|
135
136
|
SQL
|
136
137
|
conn.execute(sql, [dataset.name, k] + v)
|
137
138
|
end
|
139
|
+
conn.execute('COMMIT')
|
138
140
|
end
|
139
141
|
checkpoint(metric)
|
140
142
|
end
|
data/utils/subclade/pipeline.rb
CHANGED
@@ -7,9 +7,12 @@ module MiGA::SubcladeRunner::Pipeline
|
|
7
7
|
aai90: [:aai_distances, opts[:gsp_aai], :aai]
|
8
8
|
}
|
9
9
|
tasks.each do |k, par|
|
10
|
+
# Run only the requested metric
|
11
|
+
next unless par[2].to_s == opts[:gsp_metric]
|
12
|
+
|
10
13
|
# Final output
|
11
14
|
ogs_file = "miga-project.#{k}-clades"
|
12
|
-
next if File.size?
|
15
|
+
next if File.size?(ogs_file)
|
13
16
|
|
14
17
|
# Build ABC files
|
15
18
|
abc_path = tmp_file("#{k}.abc")
|
@@ -20,7 +23,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
20
23
|
next if ln =~ /^a\tb\tvalue\t/
|
21
24
|
|
22
25
|
r = ln.chomp.split("\t")
|
23
|
-
ofh.puts
|
26
|
+
ofh.puts("G>#{r[0]}\tG>#{r[1]}\t#{r[2]}") if r[2].to_f >= par[1]
|
24
27
|
end
|
25
28
|
end
|
26
29
|
ofh.close
|
@@ -29,16 +32,14 @@ module MiGA::SubcladeRunner::Pipeline
|
|
29
32
|
`ogs.mcl.rb -o '#{ogs_file}.tmp' --abc '#{abc_path}' -t '#{opts[:thr]}'`
|
30
33
|
File.open(ogs_file, 'w') do |fh|
|
31
34
|
File.foreach("#{ogs_file}.tmp").with_index do |ln, lno|
|
32
|
-
fh.puts
|
35
|
+
fh.puts(ln) if lno > 0
|
33
36
|
end
|
34
37
|
end
|
35
38
|
File.unlink "#{ogs_file}.tmp"
|
36
39
|
else
|
37
|
-
FileUtils.touch
|
38
|
-
end
|
39
|
-
if par[2].to_s == opts[:gsp_metric]
|
40
|
-
FileUtils.cp(ogs_file, "miga-project.gsp-clades")
|
40
|
+
FileUtils.touch(ogs_file)
|
41
41
|
end
|
42
|
+
FileUtils.cp(ogs_file, 'miga-project.gsp-clades')
|
42
43
|
end
|
43
44
|
|
44
45
|
# Find genomospecies medoids
|
@@ -67,7 +68,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
67
68
|
metric_res = project.result(step) or raise "Incomplete step #{step}"
|
68
69
|
matrix = metric_res.file_path(:matrix)
|
69
70
|
`Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}' \
|
70
|
-
miga-project.
|
71
|
+
miga-project.gsp-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
|
71
72
|
if File.exist? 'miga-project.nwk'
|
72
73
|
File.rename('miga-project.nwk', "miga-project.#{metric}.nwk")
|
73
74
|
end
|
data/utils/subclades.R
CHANGED
@@ -26,14 +26,18 @@ subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
|
|
26
26
|
# Normalize input matrix
|
27
27
|
dist_rds <- paste(out_base, "dist.rds", sep = ".")
|
28
28
|
if (!missing(ani_file)) {
|
29
|
-
if(length(ani.d) == 0
|
30
|
-
|
31
|
-
|
32
|
-
if (is.null(ani.d)) {
|
33
|
-
generate_empty_files(out_base)
|
34
|
-
return(NULL)
|
29
|
+
if (length(ani.d) == 0) {
|
30
|
+
if (file.exists(dist_rds)) {
|
31
|
+
ani.d <- readRDS(dist_rds)
|
35
32
|
} else {
|
36
|
-
|
33
|
+
# Read from ani_file
|
34
|
+
ani.d <- ani_distance(ani_file, sel)
|
35
|
+
if (is.null(ani.d)) {
|
36
|
+
generate_empty_files(out_base)
|
37
|
+
return(NULL)
|
38
|
+
} else {
|
39
|
+
saveRDS(ani.d, dist_rds)
|
40
|
+
}
|
37
41
|
}
|
38
42
|
}
|
39
43
|
}
|
@@ -104,17 +108,6 @@ subclade_clustering <- function (out_base, thr, ani.d, dist_rds) {
|
|
104
108
|
}
|
105
109
|
if (length(labels(ani.d)) <= 8L) return(list())
|
106
110
|
|
107
|
-
# Build tree
|
108
|
-
say("Tree")
|
109
|
-
ani.ph <- bionj(ani.d)
|
110
|
-
say("- Write")
|
111
|
-
express.ori <- options("expressions")$expressions
|
112
|
-
if(express.ori < ani.ph$Nnode * 4){
|
113
|
-
options(expressions=min(c(5e7, ani.ph$Nnode * 4)))
|
114
|
-
}
|
115
|
-
write.tree(ani.ph, paste(out_base, ".nwk", sep = ""))
|
116
|
-
options(expressions=express.ori)
|
117
|
-
|
118
111
|
# Silhouette
|
119
112
|
say("Silhouette")
|
120
113
|
nn <- length(labels(ani.d))
|
@@ -146,6 +139,17 @@ subclade_clustering <- function (out_base, thr, ani.d, dist_rds) {
|
|
146
139
|
ani.types <- ani.cl$clustering
|
147
140
|
ani.medoids <- ani.cl$medoids
|
148
141
|
|
142
|
+
# Build tree
|
143
|
+
say("Tree")
|
144
|
+
ani.ph <- bionj(ani.d)
|
145
|
+
say("- Write")
|
146
|
+
express.ori <- options("expressions")$expressions
|
147
|
+
if(express.ori < ani.ph$Nnode * 4){
|
148
|
+
options(expressions=min(c(5e7, ani.ph$Nnode * 4)))
|
149
|
+
}
|
150
|
+
write.tree(ani.ph, paste(out_base, ".nwk", sep = ""))
|
151
|
+
options(expressions=express.ori)
|
152
|
+
|
149
153
|
# Generate graphic report
|
150
154
|
say("Graphic report")
|
151
155
|
pdf(paste(out_base, ".pdf", sep = ""), 7, 12)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.3.
|
4
|
+
version: 1.1.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-11-
|
11
|
+
date: 2021-11-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|