miga-base 0.7.25.2 → 0.7.26.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +27 -15
- data/lib/miga/cli/action/doctor/base.rb +44 -22
- data/lib/miga/cli/action/init.rb +23 -9
- data/lib/miga/common.rb +1 -0
- data/lib/miga/json.rb +4 -2
- data/lib/miga/parallel.rb +31 -0
- data/lib/miga/sqlite.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/test/remote_dataset_test.rb +1 -1
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -1
- data/utils/requirements.txt +4 -4
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
|
4
|
+
data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
|
7
|
+
data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
|
@@ -50,7 +50,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
50
50
|
# Input data
|
51
51
|
ref_db = reference_db
|
52
52
|
norun = %w[
|
53
|
-
|
53
|
+
haai_distances aai_distances ani_distances clade_finding
|
54
54
|
]
|
55
55
|
p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
|
56
56
|
p = create_project(
|
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
59
59
|
run_ssu: false, run_mytaxa_scan: false, run_distances: false
|
60
60
|
)
|
61
61
|
p.set_option(:ref_project, ref_db.path)
|
62
|
-
p.set_option(:tax_pvalue, cli[:pvalue]
|
62
|
+
p.set_option(:tax_pvalue, cli[:pvalue])
|
63
63
|
# Run
|
64
64
|
run_daemon
|
65
65
|
summarize(%w[cds assembly essential_genes]) if cli[:summaries]
|
@@ -52,7 +52,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
52
52
|
# Input data
|
53
53
|
p = create_project(
|
54
54
|
:assembly,
|
55
|
-
{
|
55
|
+
{ run_clades: false },
|
56
56
|
{ run_mytaxa_scan: false, run_ssu: false }
|
57
57
|
)
|
58
58
|
p.set_option(:gsp_metric, cli[:metric].to_s)
|
@@ -93,7 +93,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
93
93
|
k += 1
|
94
94
|
cli.advance('Datasets:', k, n, false) if i == 0
|
95
95
|
next unless k % cli[:threads] == i
|
96
|
-
each_database_file(d) do |db_file, metric, result|
|
96
|
+
each_database_file(d) do |db_file, metric, result, _rank|
|
97
97
|
check_sqlite3_database(db_file, metric) do
|
98
98
|
cli.say(
|
99
99
|
" > Removing malformed database from #{d.name}:#{result} "
|
@@ -119,24 +119,36 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
119
119
|
ref_ds = cli.load_project.each_dataset.select(&:ref?)
|
120
120
|
ref_names = ref_ds.map(&:name)
|
121
121
|
n = ref_ds.size
|
122
|
-
(0 .. cli[:threads] - 1).map do |i|
|
123
|
-
Process.fork do
|
124
|
-
k = 0
|
125
|
-
ref_ds.each do |d|
|
126
|
-
k += 1
|
127
|
-
cli.advance('Datasets:', k, n, false) if i == 0
|
128
|
-
next unless k % cli[:threads] == i
|
129
|
-
|
130
|
-
saved = saved_targets(d)
|
131
|
-
next if saved.nil?
|
132
122
|
|
133
|
-
|
134
|
-
|
135
|
-
|
123
|
+
# Read data first (threaded)
|
124
|
+
@distances = { aai: {}, ani: {} }
|
125
|
+
Dir.mktmpdir do |tmp|
|
126
|
+
MiGA::Parallel.process(cli[:threads]) do |thr|
|
127
|
+
idx = 0
|
128
|
+
ref_ds.each do |ds|
|
129
|
+
cli.advance('Reading:', idx + 1, n, false) if thr == 0
|
130
|
+
read_bidirectional(ds) if idx % cli[:threads] == thr
|
131
|
+
idx += 1
|
132
|
+
end
|
133
|
+
File.open("#{tmp}/#{thr}.json", 'w') do |fh|
|
134
|
+
fh.print JSON.fast_generate(@distances)
|
136
135
|
end
|
137
136
|
end
|
137
|
+
cli.say
|
138
|
+
|
139
|
+
cli[:threads].times do |i|
|
140
|
+
cli.advance('Merging:', i + 1, cli[:threads], false)
|
141
|
+
o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
|
142
|
+
o.each { |k, v| @distances[k.to_sym].merge!(v) }
|
143
|
+
end
|
144
|
+
cli.say
|
145
|
+
end
|
146
|
+
|
147
|
+
# Write missing values (threaded)
|
148
|
+
MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
|
149
|
+
cli.advance('Datasets:', idx + 1, n, false) if thr == 0
|
150
|
+
save_bidirectional(ds)
|
138
151
|
end
|
139
|
-
Process.waitall
|
140
152
|
cli.say
|
141
153
|
end
|
142
154
|
|
@@ -16,22 +16,28 @@ module MiGA::Cli::Action::Doctor::Base
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def each_database_file(dataset, &blk)
|
19
|
-
ref_db = {
|
20
|
-
|
19
|
+
ref_db = {
|
20
|
+
haai: ['01.haai', :aai], aai: ['02.aai', :aai], ani: ['03.ani', :ani]
|
21
|
+
}
|
22
|
+
qry_db = {
|
23
|
+
haai: ['.haai.db', :aai], aai: ['.aai.db', :aai], ani: ['.ani.db', :ani]
|
24
|
+
}
|
21
25
|
base = File.join(dataset.project.path, 'data', '09.distances')
|
22
26
|
result = :distances
|
23
27
|
if dataset.ref?
|
24
28
|
file_db = "#{dataset.name}.db"
|
25
|
-
ref_db.each do |
|
29
|
+
ref_db.each do |rank, v|
|
30
|
+
dir, metric = *v
|
26
31
|
file = File.join(base, dir, file_db)
|
27
|
-
blk[file, metric, result] if File.exist? file
|
32
|
+
blk[file, metric, result, rank] if File.exist? file
|
28
33
|
end
|
29
34
|
base = File.join(base, '05.taxonomy')
|
30
35
|
result = :taxonomy
|
31
36
|
end
|
32
|
-
qry_db.each do |
|
37
|
+
qry_db.each do |rank, v|
|
38
|
+
ext, metric = *v
|
33
39
|
file = File.join(base, "#{dataset.name}#{ext}")
|
34
|
-
blk[file, metric, result] if File.exist? file
|
40
|
+
blk[file, metric, result, rank] if File.exist? file
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
@@ -109,25 +115,41 @@ module MiGA::Cli::Action::Doctor::Base
|
|
109
115
|
end
|
110
116
|
|
111
117
|
##
|
112
|
-
#
|
113
|
-
#
|
114
|
-
def
|
115
|
-
each_database_file(a) do |db_file, metric, result|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
).first
|
118
|
+
# Reads all the distance estimates in +a+ -> *, and saves them in memory
|
119
|
+
# in the +@distances+ variable.
|
120
|
+
def read_bidirectional(a)
|
121
|
+
each_database_file(a) do |db_file, metric, result, rank|
|
122
|
+
next if rank == :haai # No need for hAAI to be bidirectional
|
123
|
+
|
124
|
+
sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
|
125
|
+
data = MiGA::SQLite.new(db_file).run(sql)
|
121
126
|
next if data.nil? || data.empty?
|
122
127
|
|
123
|
-
|
124
|
-
|
128
|
+
@distances[rank][a.name] ||= {}
|
129
|
+
data.each { |row| @distances[rank][a.name][row.shift] = row }
|
130
|
+
end
|
131
|
+
end
|
125
132
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
133
|
+
##
|
134
|
+
# Saves all the distance estimates in * -> +a+ into the +a+ databases
|
135
|
+
# (as +a+ -> *), where +a+ is a MiGA::Dataset object
|
136
|
+
def save_bidirectional(a)
|
137
|
+
each_database_file(a) do |db_file, metric, result, rank|
|
138
|
+
next if rank == :haai # No need for hAAI to be bidirectional
|
139
|
+
|
140
|
+
b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
|
141
|
+
a2b = @distances[rank][a.name].keys
|
142
|
+
SQLite3::Database.new(db_file) do |db|
|
143
|
+
sql = <<~SQL
|
144
|
+
insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
|
145
|
+
values(?, ?, ?, ?, ?, ?);
|
146
|
+
SQL
|
147
|
+
db.execute('BEGIN TRANSACTION;')
|
148
|
+
(b2a - a2b).each do |b_name|
|
149
|
+
db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
|
150
|
+
end
|
151
|
+
db.execute('COMMIT;')
|
152
|
+
end
|
131
153
|
end
|
132
154
|
end
|
133
155
|
end
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -14,6 +14,8 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
14
14
|
cli.defaults = {
|
15
15
|
mytaxa: nil,
|
16
16
|
rdp: nil,
|
17
|
+
reads: nil,
|
18
|
+
optional: nil,
|
17
19
|
config: File.join(ENV['MIGA_HOME'], '.miga_modules'),
|
18
20
|
ask: false,
|
19
21
|
auto: false,
|
@@ -35,6 +37,16 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
35
37
|
'Should I try setting up the RDP classifier?',
|
36
38
|
'By default: interactive (true if --auto)'
|
37
39
|
) { |v| cli[:rdp] = v }
|
40
|
+
opt.on(
|
41
|
+
'--[no-]read-processing',
|
42
|
+
'Should I try setting up read processing software?',
|
43
|
+
'By default: interactive (true if --auto)'
|
44
|
+
) { |v| cli[:reads] = v }
|
45
|
+
opt.on(
|
46
|
+
'--[no-]optional',
|
47
|
+
'Should I try setting up the optional software?',
|
48
|
+
'Automatically sets answers for mytaxa, rdp, and reads'
|
49
|
+
) { |v| cli[:optional] = v }
|
38
50
|
opt.on(
|
39
51
|
'--daemon-type STRING',
|
40
52
|
'Type of daemon launcher, one of: bash, ssh, qsub, msub, slurm',
|
@@ -104,6 +116,8 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
104
116
|
rc_fh.puts "export MIGA_MYTAXA='#{cli[:mytaxa] ? 'yes' : 'no'}'"
|
105
117
|
ask_for_optional(:rdp, 'RDP classifier')
|
106
118
|
rc_fh.puts "export MIGA_RDP='#{cli[:rdp] ? 'yes' : 'no'}'"
|
119
|
+
ask_for_optional(:reads, 'read processing')
|
120
|
+
rc_fh.puts "export MIGA_READS='#{cli[:reads] ? 'yes' : 'no'}'"
|
107
121
|
paths = {}
|
108
122
|
rc_fh.puts 'MIGA_PATH=""'
|
109
123
|
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
@@ -123,20 +137,20 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
123
137
|
def define_software(ln)
|
124
138
|
r = ln.chomp.split(/\t+/)
|
125
139
|
return if %w[Software --------].include?(r[0])
|
126
|
-
return if r[0] =~ /\(
|
127
|
-
return if r[0] =~ /\(rdp\)$/ && !cli[:rdp]
|
140
|
+
%i[mytaxa rdp reads].each { |i| return if r[0] =~ /\(#{i}\)$/ && !cli[i] }
|
128
141
|
|
129
142
|
r
|
130
143
|
end
|
131
144
|
|
132
145
|
def ask_for_optional(symbol, name)
|
133
|
-
if cli[symbol].nil?
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
146
|
+
cli[symbol] = cli[:optional] if !cli[:optional].nil? && cli[symbol].nil?
|
147
|
+
return cli[symbol] unless cli[symbol].nil?
|
148
|
+
|
149
|
+
cli[symbol] =
|
150
|
+
cli.ask_user(
|
151
|
+
"Should I include #{name} modules?",
|
152
|
+
'yes', %w(yes no)
|
153
|
+
) == 'yes'
|
140
154
|
end
|
141
155
|
|
142
156
|
def find_software(exec)
|
data/lib/miga/common.rb
CHANGED
data/lib/miga/json.rb
CHANGED
@@ -45,8 +45,10 @@ class MiGA::Json < MiGA::MiGA
|
|
45
45
|
raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
|
46
46
|
|
47
47
|
# Parse JSON
|
48
|
-
params = {
|
49
|
-
|
48
|
+
params = {
|
49
|
+
symbolize_names: opts[:symbolize],
|
50
|
+
create_additions: opts[:additions]
|
51
|
+
}
|
50
52
|
y = JSON.parse(cont, params)
|
51
53
|
|
52
54
|
# Add defaults
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
##
|
4
|
+
# Parallel execution in MiGA.
|
5
|
+
class MiGA::Parallel < MiGA::MiGA
|
6
|
+
class << self
|
7
|
+
##
|
8
|
+
# Executes the passed block with the thread number as argument (0-numbered)
|
9
|
+
# in +threads+ processes
|
10
|
+
def process(threads)
|
11
|
+
threads.times do |i|
|
12
|
+
Process.fork { yield(i) }
|
13
|
+
end
|
14
|
+
Process.waitall
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# Distributes +enum+ across +threads+ and calls the passed block with args:
|
19
|
+
# 1. Unitary object from +enum+
|
20
|
+
# 2. Index of the unitary object
|
21
|
+
# 3. Index of the acting thread
|
22
|
+
def distribute(enum, threads)
|
23
|
+
process(threads) do |thr|
|
24
|
+
enum.each_with_index do |obj, idx|
|
25
|
+
yield(obj, idx, thr) if idx % threads == thr
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
data/lib/miga/sqlite.rb
CHANGED
@@ -37,8 +37,9 @@ class MiGA::SQLite < MiGA::MiGA
|
|
37
37
|
# Executes +cmd+ and returns the result
|
38
38
|
def run(*cmd)
|
39
39
|
busy_attempts ||= 0
|
40
|
-
|
41
|
-
conn.execute(*cmd)
|
40
|
+
y = nil
|
41
|
+
SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
|
42
|
+
y
|
42
43
|
rescue SQLite3::BusyException => e
|
43
44
|
busy_attempts += 1
|
44
45
|
raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.7,
|
13
|
+
VERSION = [0.7, 26, 3].freeze
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2021,
|
21
|
+
VERSION_DATE = Date.new(2021, 3, 11)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/test/remote_dataset_test.rb
CHANGED
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def test_net_ftp
|
50
|
-
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/
|
50
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
|
51
51
|
n = 'Cjac_L14'
|
52
52
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
53
53
|
assert_equal([cjac], rd.ids)
|
data/utils/distance/database.rb
CHANGED
data/utils/distance/runner.rb
CHANGED
@@ -102,7 +102,8 @@ class MiGA::DistanceRunner
|
|
102
102
|
cl_path = res.file_path :clades_ani95
|
103
103
|
if !cl_path.nil? && File.size?(cl_path) && tsk[0] == :clade_finding
|
104
104
|
clades = File.foreach(cl_path).map { |i| i.chomp.split(',') }
|
105
|
-
|
105
|
+
sbj_dataset_names = clades.find { |i| i.include?(closest[:ds]) }
|
106
|
+
sbj_datasets = sbj_dataset_names&.map { |i| ref_project.dataset(i) }
|
106
107
|
ani_after_aai(sbj_datasets, 80.0) if sbj_datasets
|
107
108
|
end
|
108
109
|
|
data/utils/requirements.txt
CHANGED
@@ -10,12 +10,12 @@ FastANI fastANI https://github.com/ParBLiSS/FastANI Required version: 1.1+
|
|
10
10
|
HMMer 3.0+ hmmsearch http://hmmer.janelia.org/software
|
11
11
|
Bedtools bedtools http://bedtools.readthedocs.org/en/latest/
|
12
12
|
Prodigal prodigal http://prodigal.ornl.gov
|
13
|
-
IDBA idba_ud http://i.cs.hku.hk/~alse/hkubrg/projects/idba
|
14
13
|
MCL mcl http://micans.org/mcl/
|
15
14
|
Barrnap barrnap http://www.vicbioinformatics.com/software.barrnap.shtml
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
IDBA (reads) idba_ud http://i.cs.hku.hk/~alse/hkubrg/projects/idba
|
16
|
+
Scythe (reads) scythe https://github.com/vsbuffalo/scythe Required version: 0.991+
|
17
|
+
FastQC (reads) fastqc http://www.bioinformatics.babraham.ac.uk/projects/fastqc
|
18
|
+
SolexaQA++ (reads) SolexaQA++ http://solexaqa.sourceforge.net Required version: v3.1.3+
|
19
19
|
OpenJDK (rdp) java https://adoptopenjdk.net/ Any Java VM would work
|
20
20
|
MyTaxa (mytaxa) MyTaxa http://enve-omics.ce.gatech.edu/mytaxa
|
21
21
|
Krona (mytaxa) ktImportText https://github.com/marbl/Krona/wiki
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.26.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-03-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -187,6 +187,7 @@ files:
|
|
187
187
|
- lib/miga/json.rb
|
188
188
|
- lib/miga/lair.rb
|
189
189
|
- lib/miga/metadata.rb
|
190
|
+
- lib/miga/parallel.rb
|
190
191
|
- lib/miga/project.rb
|
191
192
|
- lib/miga/project/base.rb
|
192
193
|
- lib/miga/project/dataset.rb
|
@@ -554,7 +555,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
|
|
554
555
|
licenses:
|
555
556
|
- Artistic-2.0
|
556
557
|
metadata: {}
|
557
|
-
post_install_message:
|
558
|
+
post_install_message:
|
558
559
|
rdoc_options:
|
559
560
|
- lib
|
560
561
|
- README.md
|
@@ -576,7 +577,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
576
577
|
version: '0'
|
577
578
|
requirements: []
|
578
579
|
rubygems_version: 3.1.4
|
579
|
-
signing_key:
|
580
|
+
signing_key:
|
580
581
|
specification_version: 4
|
581
582
|
summary: MiGA
|
582
583
|
test_files: []
|