miga-base 0.7.26.2 → 0.7.26.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +27 -15
- data/lib/miga/cli/action/doctor/base.rb +44 -22
- data/lib/miga/common.rb +1 -0
- data/lib/miga/json.rb +4 -2
- data/lib/miga/parallel.rb +31 -0
- data/lib/miga/sqlite.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/test/remote_dataset_test.rb +1 -1
- data/utils/distance/database.rb +0 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
|
4
|
+
data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
|
7
|
+
data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
|
@@ -50,7 +50,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
50
50
|
# Input data
|
51
51
|
ref_db = reference_db
|
52
52
|
norun = %w[
|
53
|
-
|
53
|
+
haai_distances aai_distances ani_distances clade_finding
|
54
54
|
]
|
55
55
|
p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
|
56
56
|
p = create_project(
|
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
59
59
|
run_ssu: false, run_mytaxa_scan: false, run_distances: false
|
60
60
|
)
|
61
61
|
p.set_option(:ref_project, ref_db.path)
|
62
|
-
p.set_option(:tax_pvalue, cli[:pvalue]
|
62
|
+
p.set_option(:tax_pvalue, cli[:pvalue])
|
63
63
|
# Run
|
64
64
|
run_daemon
|
65
65
|
summarize(%w[cds assembly essential_genes]) if cli[:summaries]
|
@@ -52,7 +52,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
52
52
|
# Input data
|
53
53
|
p = create_project(
|
54
54
|
:assembly,
|
55
|
-
{
|
55
|
+
{ run_clades: false },
|
56
56
|
{ run_mytaxa_scan: false, run_ssu: false }
|
57
57
|
)
|
58
58
|
p.set_option(:gsp_metric, cli[:metric].to_s)
|
@@ -93,7 +93,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
93
93
|
k += 1
|
94
94
|
cli.advance('Datasets:', k, n, false) if i == 0
|
95
95
|
next unless k % cli[:threads] == i
|
96
|
-
each_database_file(d) do |db_file, metric, result|
|
96
|
+
each_database_file(d) do |db_file, metric, result, _rank|
|
97
97
|
check_sqlite3_database(db_file, metric) do
|
98
98
|
cli.say(
|
99
99
|
" > Removing malformed database from #{d.name}:#{result} "
|
@@ -119,24 +119,36 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
119
119
|
ref_ds = cli.load_project.each_dataset.select(&:ref?)
|
120
120
|
ref_names = ref_ds.map(&:name)
|
121
121
|
n = ref_ds.size
|
122
|
-
(0 .. cli[:threads] - 1).map do |i|
|
123
|
-
Process.fork do
|
124
|
-
k = 0
|
125
|
-
ref_ds.each do |d|
|
126
|
-
k += 1
|
127
|
-
cli.advance('Datasets:', k, n, false) if i == 0
|
128
|
-
next unless k % cli[:threads] == i
|
129
|
-
|
130
|
-
saved = saved_targets(d)
|
131
|
-
next if saved.nil?
|
132
122
|
|
133
|
-
|
134
|
-
|
135
|
-
|
123
|
+
# Read data first (threaded)
|
124
|
+
@distances = { aai: {}, ani: {} }
|
125
|
+
Dir.mktmpdir do |tmp|
|
126
|
+
MiGA::Parallel.process(cli[:threads]) do |thr|
|
127
|
+
idx = 0
|
128
|
+
ref_ds.each do |ds|
|
129
|
+
cli.advance('Reading:', idx + 1, n, false) if thr == 0
|
130
|
+
read_bidirectional(ds) if idx % cli[:threads] == thr
|
131
|
+
idx += 1
|
132
|
+
end
|
133
|
+
File.open("#{tmp}/#{thr}.json", 'w') do |fh|
|
134
|
+
fh.print JSON.fast_generate(@distances)
|
136
135
|
end
|
137
136
|
end
|
137
|
+
cli.say
|
138
|
+
|
139
|
+
cli[:threads].times do |i|
|
140
|
+
cli.advance('Merging:', i + 1, cli[:threads], false)
|
141
|
+
o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
|
142
|
+
o.each { |k, v| @distances[k.to_sym].merge!(v) }
|
143
|
+
end
|
144
|
+
cli.say
|
145
|
+
end
|
146
|
+
|
147
|
+
# Write missing values (threaded)
|
148
|
+
MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
|
149
|
+
cli.advance('Datasets:', idx + 1, n, false) if thr == 0
|
150
|
+
save_bidirectional(ds)
|
138
151
|
end
|
139
|
-
Process.waitall
|
140
152
|
cli.say
|
141
153
|
end
|
142
154
|
|
@@ -16,22 +16,28 @@ module MiGA::Cli::Action::Doctor::Base
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def each_database_file(dataset, &blk)
|
19
|
-
ref_db = {
|
20
|
-
|
19
|
+
ref_db = {
|
20
|
+
haai: ['01.haai', :aai], aai: ['02.aai', :aai], ani: ['03.ani', :ani]
|
21
|
+
}
|
22
|
+
qry_db = {
|
23
|
+
haai: ['.haai.db', :aai], aai: ['.aai.db', :aai], ani: ['.ani.db', :ani]
|
24
|
+
}
|
21
25
|
base = File.join(dataset.project.path, 'data', '09.distances')
|
22
26
|
result = :distances
|
23
27
|
if dataset.ref?
|
24
28
|
file_db = "#{dataset.name}.db"
|
25
|
-
ref_db.each do |
|
29
|
+
ref_db.each do |rank, v|
|
30
|
+
dir, metric = *v
|
26
31
|
file = File.join(base, dir, file_db)
|
27
|
-
blk[file, metric, result] if File.exist? file
|
32
|
+
blk[file, metric, result, rank] if File.exist? file
|
28
33
|
end
|
29
34
|
base = File.join(base, '05.taxonomy')
|
30
35
|
result = :taxonomy
|
31
36
|
end
|
32
|
-
qry_db.each do |
|
37
|
+
qry_db.each do |rank, v|
|
38
|
+
ext, metric = *v
|
33
39
|
file = File.join(base, "#{dataset.name}#{ext}")
|
34
|
-
blk[file, metric, result] if File.exist? file
|
40
|
+
blk[file, metric, result, rank] if File.exist? file
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
@@ -109,25 +115,41 @@ module MiGA::Cli::Action::Doctor::Base
|
|
109
115
|
end
|
110
116
|
|
111
117
|
##
|
112
|
-
#
|
113
|
-
#
|
114
|
-
def
|
115
|
-
each_database_file(a) do |db_file, metric, result|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
).first
|
118
|
+
# Reads all the distance estimates in +a+ -> *, and saves them in memory
|
119
|
+
# in the +@distances+ variable.
|
120
|
+
def read_bidirectional(a)
|
121
|
+
each_database_file(a) do |db_file, metric, result, rank|
|
122
|
+
next if rank == :haai # No need for hAAI to be bidirectional
|
123
|
+
|
124
|
+
sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
|
125
|
+
data = MiGA::SQLite.new(db_file).run(sql)
|
121
126
|
next if data.nil? || data.empty?
|
122
127
|
|
123
|
-
|
124
|
-
|
128
|
+
@distances[rank][a.name] ||= {}
|
129
|
+
data.each { |row| @distances[rank][a.name][row.shift] = row }
|
130
|
+
end
|
131
|
+
end
|
125
132
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
133
|
+
##
|
134
|
+
# Saves all the distance estimates in * -> +a+ into the +a+ databases
|
135
|
+
# (as +a+ -> *), where +a+ is a MiGA::Dataset object
|
136
|
+
def save_bidirectional(a)
|
137
|
+
each_database_file(a) do |db_file, metric, result, rank|
|
138
|
+
next if rank == :haai # No need for hAAI to be bidirectional
|
139
|
+
|
140
|
+
b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
|
141
|
+
a2b = @distances[rank][a.name].keys
|
142
|
+
SQLite3::Database.new(db_file) do |db|
|
143
|
+
sql = <<~SQL
|
144
|
+
insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
|
145
|
+
values(?, ?, ?, ?, ?, ?);
|
146
|
+
SQL
|
147
|
+
db.execute('BEGIN TRANSACTION;')
|
148
|
+
(b2a - a2b).each do |b_name|
|
149
|
+
db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
|
150
|
+
end
|
151
|
+
db.execute('COMMIT;')
|
152
|
+
end
|
131
153
|
end
|
132
154
|
end
|
133
155
|
end
|
data/lib/miga/common.rb
CHANGED
data/lib/miga/json.rb
CHANGED
@@ -45,8 +45,10 @@ class MiGA::Json < MiGA::MiGA
|
|
45
45
|
raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
|
46
46
|
|
47
47
|
# Parse JSON
|
48
|
-
params = {
|
49
|
-
|
48
|
+
params = {
|
49
|
+
symbolize_names: opts[:symbolize],
|
50
|
+
create_additions: opts[:additions]
|
51
|
+
}
|
50
52
|
y = JSON.parse(cont, params)
|
51
53
|
|
52
54
|
# Add defaults
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
##
|
4
|
+
# Parallel execution in MiGA.
|
5
|
+
class MiGA::Parallel < MiGA::MiGA
|
6
|
+
class << self
|
7
|
+
##
|
8
|
+
# Executes the passed block with the thread number as argument (0-numbered)
|
9
|
+
# in +threads+ processes
|
10
|
+
def process(threads)
|
11
|
+
threads.times do |i|
|
12
|
+
Process.fork { yield(i) }
|
13
|
+
end
|
14
|
+
Process.waitall
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# Distributes +enum+ across +threads+ and calls the passed block with args:
|
19
|
+
# 1. Unitary object from +enum+
|
20
|
+
# 2. Index of the unitary object
|
21
|
+
# 3. Index of the acting thread
|
22
|
+
def distribute(enum, threads)
|
23
|
+
process(threads) do |thr|
|
24
|
+
enum.each_with_index do |obj, idx|
|
25
|
+
yield(obj, idx, thr) if idx % threads == thr
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
data/lib/miga/sqlite.rb
CHANGED
@@ -37,8 +37,9 @@ class MiGA::SQLite < MiGA::MiGA
|
|
37
37
|
# Executes +cmd+ and returns the result
|
38
38
|
def run(*cmd)
|
39
39
|
busy_attempts ||= 0
|
40
|
-
|
41
|
-
conn.execute(*cmd)
|
40
|
+
y = nil
|
41
|
+
SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
|
42
|
+
y
|
42
43
|
rescue SQLite3::BusyException => e
|
43
44
|
busy_attempts += 1
|
44
45
|
raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.7, 26,
|
13
|
+
VERSION = [0.7, 26, 3].freeze
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2021, 3,
|
21
|
+
VERSION_DATE = Date.new(2021, 3, 11)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/test/remote_dataset_test.rb
CHANGED
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def test_net_ftp
|
50
|
-
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/
|
50
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
|
51
51
|
n = 'Cjac_L14'
|
52
52
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
53
53
|
assert_equal([cjac], rd.ids)
|
data/utils/distance/database.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.26.
|
4
|
+
version: 0.7.26.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -187,6 +187,7 @@ files:
|
|
187
187
|
- lib/miga/json.rb
|
188
188
|
- lib/miga/lair.rb
|
189
189
|
- lib/miga/metadata.rb
|
190
|
+
- lib/miga/parallel.rb
|
190
191
|
- lib/miga/project.rb
|
191
192
|
- lib/miga/project/base.rb
|
192
193
|
- lib/miga/project/dataset.rb
|