miga-base 0.7.26.2 → 0.7.26.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +27 -15
- data/lib/miga/cli/action/doctor/base.rb +44 -22
- data/lib/miga/common.rb +1 -0
- data/lib/miga/json.rb +4 -2
- data/lib/miga/parallel.rb +31 -0
- data/lib/miga/sqlite.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/test/remote_dataset_test.rb +1 -1
- data/utils/distance/database.rb +0 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
|
4
|
+
data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
|
7
|
+
data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
|
@@ -50,7 +50,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
50
50
|
# Input data
|
51
51
|
ref_db = reference_db
|
52
52
|
norun = %w[
|
53
|
-
|
53
|
+
haai_distances aai_distances ani_distances clade_finding
|
54
54
|
]
|
55
55
|
p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
|
56
56
|
p = create_project(
|
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
59
59
|
run_ssu: false, run_mytaxa_scan: false, run_distances: false
|
60
60
|
)
|
61
61
|
p.set_option(:ref_project, ref_db.path)
|
62
|
-
p.set_option(:tax_pvalue, cli[:pvalue]
|
62
|
+
p.set_option(:tax_pvalue, cli[:pvalue])
|
63
63
|
# Run
|
64
64
|
run_daemon
|
65
65
|
summarize(%w[cds assembly essential_genes]) if cli[:summaries]
|
@@ -52,7 +52,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
52
52
|
# Input data
|
53
53
|
p = create_project(
|
54
54
|
:assembly,
|
55
|
-
{
|
55
|
+
{ run_clades: false },
|
56
56
|
{ run_mytaxa_scan: false, run_ssu: false }
|
57
57
|
)
|
58
58
|
p.set_option(:gsp_metric, cli[:metric].to_s)
|
@@ -93,7 +93,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
93
93
|
k += 1
|
94
94
|
cli.advance('Datasets:', k, n, false) if i == 0
|
95
95
|
next unless k % cli[:threads] == i
|
96
|
-
each_database_file(d) do |db_file, metric, result|
|
96
|
+
each_database_file(d) do |db_file, metric, result, _rank|
|
97
97
|
check_sqlite3_database(db_file, metric) do
|
98
98
|
cli.say(
|
99
99
|
" > Removing malformed database from #{d.name}:#{result} "
|
@@ -119,24 +119,36 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
119
119
|
ref_ds = cli.load_project.each_dataset.select(&:ref?)
|
120
120
|
ref_names = ref_ds.map(&:name)
|
121
121
|
n = ref_ds.size
|
122
|
-
(0 .. cli[:threads] - 1).map do |i|
|
123
|
-
Process.fork do
|
124
|
-
k = 0
|
125
|
-
ref_ds.each do |d|
|
126
|
-
k += 1
|
127
|
-
cli.advance('Datasets:', k, n, false) if i == 0
|
128
|
-
next unless k % cli[:threads] == i
|
129
|
-
|
130
|
-
saved = saved_targets(d)
|
131
|
-
next if saved.nil?
|
132
122
|
|
133
|
-
|
134
|
-
|
135
|
-
|
123
|
+
# Read data first (threaded)
|
124
|
+
@distances = { aai: {}, ani: {} }
|
125
|
+
Dir.mktmpdir do |tmp|
|
126
|
+
MiGA::Parallel.process(cli[:threads]) do |thr|
|
127
|
+
idx = 0
|
128
|
+
ref_ds.each do |ds|
|
129
|
+
cli.advance('Reading:', idx + 1, n, false) if thr == 0
|
130
|
+
read_bidirectional(ds) if idx % cli[:threads] == thr
|
131
|
+
idx += 1
|
132
|
+
end
|
133
|
+
File.open("#{tmp}/#{thr}.json", 'w') do |fh|
|
134
|
+
fh.print JSON.fast_generate(@distances)
|
136
135
|
end
|
137
136
|
end
|
137
|
+
cli.say
|
138
|
+
|
139
|
+
cli[:threads].times do |i|
|
140
|
+
cli.advance('Merging:', i + 1, cli[:threads], false)
|
141
|
+
o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
|
142
|
+
o.each { |k, v| @distances[k.to_sym].merge!(v) }
|
143
|
+
end
|
144
|
+
cli.say
|
145
|
+
end
|
146
|
+
|
147
|
+
# Write missing values (threaded)
|
148
|
+
MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
|
149
|
+
cli.advance('Datasets:', idx + 1, n, false) if thr == 0
|
150
|
+
save_bidirectional(ds)
|
138
151
|
end
|
139
|
-
Process.waitall
|
140
152
|
cli.say
|
141
153
|
end
|
142
154
|
|
@@ -16,22 +16,28 @@ module MiGA::Cli::Action::Doctor::Base
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def each_database_file(dataset, &blk)
|
19
|
-
ref_db = {
|
20
|
-
|
19
|
+
ref_db = {
|
20
|
+
haai: ['01.haai', :aai], aai: ['02.aai', :aai], ani: ['03.ani', :ani]
|
21
|
+
}
|
22
|
+
qry_db = {
|
23
|
+
haai: ['.haai.db', :aai], aai: ['.aai.db', :aai], ani: ['.ani.db', :ani]
|
24
|
+
}
|
21
25
|
base = File.join(dataset.project.path, 'data', '09.distances')
|
22
26
|
result = :distances
|
23
27
|
if dataset.ref?
|
24
28
|
file_db = "#{dataset.name}.db"
|
25
|
-
ref_db.each do |
|
29
|
+
ref_db.each do |rank, v|
|
30
|
+
dir, metric = *v
|
26
31
|
file = File.join(base, dir, file_db)
|
27
|
-
blk[file, metric, result] if File.exist? file
|
32
|
+
blk[file, metric, result, rank] if File.exist? file
|
28
33
|
end
|
29
34
|
base = File.join(base, '05.taxonomy')
|
30
35
|
result = :taxonomy
|
31
36
|
end
|
32
|
-
qry_db.each do |
|
37
|
+
qry_db.each do |rank, v|
|
38
|
+
ext, metric = *v
|
33
39
|
file = File.join(base, "#{dataset.name}#{ext}")
|
34
|
-
blk[file, metric, result] if File.exist? file
|
40
|
+
blk[file, metric, result, rank] if File.exist? file
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
@@ -109,25 +115,41 @@ module MiGA::Cli::Action::Doctor::Base
|
|
109
115
|
end
|
110
116
|
|
111
117
|
##
|
112
|
-
#
|
113
|
-
#
|
114
|
-
def
|
115
|
-
each_database_file(a) do |db_file, metric, result|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
).first
|
118
|
+
# Reads all the distance estimates in +a+ -> *, and saves them in memory
|
119
|
+
# in the +@distances+ variable.
|
120
|
+
def read_bidirectional(a)
|
121
|
+
each_database_file(a) do |db_file, metric, result, rank|
|
122
|
+
next if rank == :haai # No need for hAAI to be bidirectional
|
123
|
+
|
124
|
+
sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
|
125
|
+
data = MiGA::SQLite.new(db_file).run(sql)
|
121
126
|
next if data.nil? || data.empty?
|
122
127
|
|
123
|
-
|
124
|
-
|
128
|
+
@distances[rank][a.name] ||= {}
|
129
|
+
data.each { |row| @distances[rank][a.name][row.shift] = row }
|
130
|
+
end
|
131
|
+
end
|
125
132
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
133
|
+
##
|
134
|
+
# Saves all the distance estimates in * -> +a+ into the +a+ databases
|
135
|
+
# (as +a+ -> *), where +a+ is a MiGA::Dataset object
|
136
|
+
def save_bidirectional(a)
|
137
|
+
each_database_file(a) do |db_file, metric, result, rank|
|
138
|
+
next if rank == :haai # No need for hAAI to be bidirectional
|
139
|
+
|
140
|
+
b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
|
141
|
+
a2b = @distances[rank][a.name].keys
|
142
|
+
SQLite3::Database.new(db_file) do |db|
|
143
|
+
sql = <<~SQL
|
144
|
+
insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
|
145
|
+
values(?, ?, ?, ?, ?, ?);
|
146
|
+
SQL
|
147
|
+
db.execute('BEGIN TRANSACTION;')
|
148
|
+
(b2a - a2b).each do |b_name|
|
149
|
+
db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
|
150
|
+
end
|
151
|
+
db.execute('COMMIT;')
|
152
|
+
end
|
131
153
|
end
|
132
154
|
end
|
133
155
|
end
|
data/lib/miga/common.rb
CHANGED
data/lib/miga/json.rb
CHANGED
@@ -45,8 +45,10 @@ class MiGA::Json < MiGA::MiGA
|
|
45
45
|
raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
|
46
46
|
|
47
47
|
# Parse JSON
|
48
|
-
params = {
|
49
|
-
|
48
|
+
params = {
|
49
|
+
symbolize_names: opts[:symbolize],
|
50
|
+
create_additions: opts[:additions]
|
51
|
+
}
|
50
52
|
y = JSON.parse(cont, params)
|
51
53
|
|
52
54
|
# Add defaults
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
##
|
4
|
+
# Parallel execution in MiGA.
|
5
|
+
class MiGA::Parallel < MiGA::MiGA
|
6
|
+
class << self
|
7
|
+
##
|
8
|
+
# Executes the passed block with the thread number as argument (0-numbered)
|
9
|
+
# in +threads+ processes
|
10
|
+
def process(threads)
|
11
|
+
threads.times do |i|
|
12
|
+
Process.fork { yield(i) }
|
13
|
+
end
|
14
|
+
Process.waitall
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# Distributes +enum+ across +threads+ and calls the passed block with args:
|
19
|
+
# 1. Unitary object from +enum+
|
20
|
+
# 2. Index of the unitary object
|
21
|
+
# 3. Index of the acting thread
|
22
|
+
def distribute(enum, threads)
|
23
|
+
process(threads) do |thr|
|
24
|
+
enum.each_with_index do |obj, idx|
|
25
|
+
yield(obj, idx, thr) if idx % threads == thr
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
data/lib/miga/sqlite.rb
CHANGED
@@ -37,8 +37,9 @@ class MiGA::SQLite < MiGA::MiGA
|
|
37
37
|
# Executes +cmd+ and returns the result
|
38
38
|
def run(*cmd)
|
39
39
|
busy_attempts ||= 0
|
40
|
-
|
41
|
-
conn.execute(*cmd)
|
40
|
+
y = nil
|
41
|
+
SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
|
42
|
+
y
|
42
43
|
rescue SQLite3::BusyException => e
|
43
44
|
busy_attempts += 1
|
44
45
|
raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.7, 26,
|
13
|
+
VERSION = [0.7, 26, 3].freeze
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2021, 3,
|
21
|
+
VERSION_DATE = Date.new(2021, 3, 11)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/test/remote_dataset_test.rb
CHANGED
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def test_net_ftp
|
50
|
-
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/
|
50
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
|
51
51
|
n = 'Cjac_L14'
|
52
52
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
53
53
|
assert_equal([cjac], rd.ids)
|
data/utils/distance/database.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.26.
|
4
|
+
version: 0.7.26.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -187,6 +187,7 @@ files:
|
|
187
187
|
- lib/miga/json.rb
|
188
188
|
- lib/miga/lair.rb
|
189
189
|
- lib/miga/metadata.rb
|
190
|
+
- lib/miga/parallel.rb
|
190
191
|
- lib/miga/project.rb
|
191
192
|
- lib/miga/project/base.rb
|
192
193
|
- lib/miga/project/dataset.rb
|