miga-base 0.7.24.0 → 0.7.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/add.rb +9 -6
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/index_wf.rb +4 -2
- data/lib/miga/cli/action/init.rb +60 -59
- data/lib/miga/cli/action/init/files_helper.rb +2 -1
- data/lib/miga/cli/action/preproc_wf.rb +7 -5
- data/lib/miga/cli/action/wf.rb +39 -23
- data/lib/miga/cli/base.rb +16 -5
- data/lib/miga/common/with_option.rb +1 -1
- data/lib/miga/dataset/result.rb +2 -1
- data/lib/miga/project/base.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/essential_genes.bash +17 -1
- data/scripts/miga.bash +8 -2
- data/test/lair_test.rb +1 -2
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/distance/base.rb +9 -0
- data/utils/distance/commands.rb +183 -81
- data/utils/distance/database.rb +68 -9
- data/utils/distance/pipeline.rb +14 -18
- data/utils/distance/runner.rb +16 -30
- data/utils/distance/temporal.rb +4 -2
- data/utils/distances.rb +2 -2
- data/utils/requirements.txt +1 -1
- metadata +8 -2
data/utils/distance/runner.rb
CHANGED
@@ -1,29 +1,17 @@
|
|
1
1
|
require_relative 'base.rb'
|
2
|
-
require_relative 'temporal.rb'
|
3
|
-
require_relative 'database.rb'
|
4
|
-
require_relative 'commands.rb'
|
5
|
-
require_relative 'pipeline.rb'
|
6
2
|
|
7
3
|
class MiGA::DistanceRunner
|
8
|
-
include MiGA::DistanceRunner::Temporal
|
9
|
-
include MiGA::DistanceRunner::Database
|
10
|
-
include MiGA::DistanceRunner::Commands
|
11
|
-
include MiGA::DistanceRunner::Pipeline
|
12
|
-
|
13
4
|
attr_reader :project, :ref_project, :dataset, :opts, :home
|
14
5
|
attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
|
15
6
|
|
16
7
|
def initialize(project_path, dataset_name, opts_hash = {})
|
17
8
|
@opts = opts_hash
|
18
|
-
@project = MiGA::Project.load(project_path)
|
19
|
-
|
9
|
+
@project = MiGA::Project.load(project_path)
|
10
|
+
@project or raise "No project at #{project_path}"
|
20
11
|
@dataset = project.dataset(dataset_name)
|
21
12
|
@home = File.expand_path('data/09.distances', project.path)
|
22
13
|
|
23
14
|
# Default opts
|
24
|
-
@opts[:aai_save_rbm] =
|
25
|
-
project.option(:aai_save_rbm) ? 'save-rbm' : 'no-save-rbm'
|
26
|
-
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
27
15
|
if opts[:run_taxonomy] && project.option(:ref_project)
|
28
16
|
ref_path = project.option(:ref_project)
|
29
17
|
@home = File.expand_path('05.taxonomy', @home)
|
@@ -39,7 +27,8 @@ class MiGA::DistanceRunner
|
|
39
27
|
else
|
40
28
|
@ref_project = project
|
41
29
|
end
|
42
|
-
|
30
|
+
@opts[:thr] ||= ENV.fetch('CORES') { 1 }.to_i
|
31
|
+
%i[haai_p aai_p ani_p distances_checkpoint aai_save_rbm].each do |m|
|
43
32
|
@opts[m] ||= ref_project.option(m)
|
44
33
|
end
|
45
34
|
$stderr.puts "Options: #{opts}"
|
@@ -64,12 +53,11 @@ class MiGA::DistanceRunner
|
|
64
53
|
initialize_dbs! true
|
65
54
|
|
66
55
|
# first-come-first-serve traverse
|
56
|
+
sbj = []
|
67
57
|
ref_project.each_dataset do |ds|
|
68
|
-
|
69
|
-
|
70
|
-
puts "[ #{Time.now} ] #{ds.name}"
|
71
|
-
ani_after_aai(ds)
|
58
|
+
sbj << ds if ds.ref? && !ds.multi? && ds.result(:essential_genes)
|
72
59
|
end
|
60
|
+
ani_after_aai(sbj)
|
73
61
|
|
74
62
|
# Finalize
|
75
63
|
%i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
@@ -97,27 +85,25 @@ class MiGA::DistanceRunner
|
|
97
85
|
par_dir = File.dirname(File.expand_path(classif, res.dir))
|
98
86
|
par = File.expand_path('miga-project.classif', par_dir)
|
99
87
|
closest = { dataset: nil, ani: 0.0 }
|
88
|
+
sbj_datasets = []
|
100
89
|
if File.size? par
|
101
90
|
File.open(par, 'r') do |fh|
|
102
91
|
fh.each_line do |ln|
|
103
92
|
r = ln.chomp.split("\t")
|
104
|
-
|
105
|
-
|
106
|
-
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
107
|
-
unless ani.nil? || ani < closest[:ani]
|
108
|
-
closest = { ds: r[0], ani: ani }
|
109
|
-
end
|
93
|
+
sbj_datasets << ref_project.dataset(r[0]) if r[1].to_i == val_cls
|
110
94
|
end
|
111
95
|
end
|
96
|
+
ani = ani_after_aai(sbj_datasets, 80.0)
|
97
|
+
ani_max = ani.map(&:to_f).each_with_index.max
|
98
|
+
closest = { ds: sbj_datasets[ani_max[1]].name, ani: ani_max[0] }
|
112
99
|
end
|
113
100
|
|
114
101
|
# Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
|
115
102
|
cl_path = res.file_path :clades_ani95
|
116
|
-
if !cl_path.nil?
|
117
|
-
File.foreach(cl_path)
|
118
|
-
|
119
|
-
|
120
|
-
.each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
|
103
|
+
if !cl_path.nil? && File.size?(cl_path) && tsk[0] == :clade_finding
|
104
|
+
clades = File.foreach(cl_path).map { |i| i.chomp.split(',') }
|
105
|
+
sbj_datasets = clades.find { |i| i.include?(closest[:ds]) }
|
106
|
+
ani_after_aai(sbj_datasets, 80.0) if sbj_datasets
|
121
107
|
end
|
122
108
|
|
123
109
|
# Finalize
|
data/utils/distance/temporal.rb
CHANGED
@@ -24,8 +24,10 @@ module MiGA::DistanceRunner::Temporal
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
# Temporal file with extension +ext+
|
28
|
-
def tmp_file(ext)
|
27
|
+
# Temporal file with extension +ext+, or a unique ID if +ext+ is +nil+
|
28
|
+
def tmp_file(ext = nil)
|
29
|
+
@_tmp_count ||= 0
|
30
|
+
ext ||= "#{@_tmp_count += 1}.tmp"
|
29
31
|
File.expand_path("#{dataset.name}.#{ext}", tmp)
|
30
32
|
end
|
31
33
|
|
data/utils/distances.rb
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
require_relative 'distance/runner.rb'
|
4
4
|
|
5
|
-
dataset = ARGV.shift
|
6
5
|
project = ARGV.shift
|
6
|
+
dataset = ARGV.shift
|
7
7
|
opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
|
-
runner = MiGA::DistanceRunner.new(
|
8
|
+
runner = MiGA::DistanceRunner.new(project, dataset, opts)
|
9
9
|
runner.go!
|
data/utils/requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Software Test exec Website Notes
|
2
2
|
-------- --------- ------- -----
|
3
3
|
Ruby ruby https://www.ruby-lang.org/ Required version: 2.3+
|
4
|
-
Python
|
4
|
+
Python python3 https://www.python.org/ Required version: 3+
|
5
5
|
R R http://www.r-project.org/
|
6
6
|
SQLite3 sqlite3 https://www.sqlite.org/
|
7
7
|
NCBI BLAST+ blastp ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.25.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -250,6 +250,12 @@ files:
|
|
250
250
|
- test/test_helper.rb
|
251
251
|
- test/with_daemon_test.rb
|
252
252
|
- test/with_option_test.rb
|
253
|
+
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm
|
254
|
+
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm
|
255
|
+
- utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm
|
256
|
+
- utils/FastAAI/FastAAI/FastAAI
|
257
|
+
- utils/FastAAI/README.md
|
258
|
+
- utils/FastAAI/kAAI_v1.0_virus.py
|
253
259
|
- utils/adapters.fa
|
254
260
|
- utils/cleanup-databases.rb
|
255
261
|
- utils/core-pan-plot.R
|