miga-base 0.7.22.0 → 0.7.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -0
  5. data/lib/miga/cli/action/add.rb +10 -8
  6. data/lib/miga/cli/action/classify_wf.rb +12 -11
  7. data/lib/miga/cli/action/derep_wf.rb +3 -9
  8. data/lib/miga/cli/action/edit.rb +0 -1
  9. data/lib/miga/cli/action/find.rb +1 -1
  10. data/lib/miga/cli/action/generic.rb +1 -1
  11. data/lib/miga/cli/action/get.rb +7 -2
  12. data/lib/miga/cli/action/get_db.rb +16 -21
  13. data/lib/miga/cli/action/index_wf.rb +4 -2
  14. data/lib/miga/cli/action/init.rb +93 -144
  15. data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
  16. data/lib/miga/cli/action/init/files_helper.rb +119 -0
  17. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  18. data/lib/miga/cli/action/new.rb +15 -9
  19. data/lib/miga/cli/action/option.rb +44 -0
  20. data/lib/miga/cli/action/preproc_wf.rb +7 -5
  21. data/lib/miga/cli/action/quality_wf.rb +3 -3
  22. data/lib/miga/cli/action/tax_dist.rb +1 -1
  23. data/lib/miga/cli/action/tax_test.rb +1 -1
  24. data/lib/miga/cli/action/wf.rb +71 -53
  25. data/lib/miga/cli/base.rb +17 -5
  26. data/lib/miga/cli/objects_helper.rb +23 -18
  27. data/lib/miga/common.rb +4 -2
  28. data/lib/miga/common/net.rb +74 -0
  29. data/lib/miga/common/with_option.rb +83 -0
  30. data/lib/miga/common/with_result.rb +3 -2
  31. data/lib/miga/dataset/base.rb +20 -2
  32. data/lib/miga/dataset/result.rb +5 -3
  33. data/lib/miga/metadata.rb +25 -13
  34. data/lib/miga/project/base.rb +82 -2
  35. data/lib/miga/project/result.rb +4 -4
  36. data/lib/miga/remote_dataset.rb +2 -0
  37. data/lib/miga/result/stats.rb +2 -2
  38. data/lib/miga/version.rb +4 -2
  39. data/scripts/essential_genes.bash +18 -3
  40. data/scripts/miga.bash +8 -2
  41. data/scripts/mytaxa.bash +6 -5
  42. data/scripts/mytaxa_scan.bash +8 -7
  43. data/scripts/ogs.bash +2 -3
  44. data/scripts/ssu.bash +16 -2
  45. data/test/dataset_test.rb +5 -5
  46. data/test/lair_test.rb +1 -2
  47. data/test/net_test.rb +34 -0
  48. data/test/with_option_test.rb +115 -0
  49. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  50. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  51. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  52. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  53. data/utils/FastAAI/README.md +84 -0
  54. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  55. data/utils/cleanup-databases.rb +2 -3
  56. data/utils/distance/base.rb +9 -0
  57. data/utils/distance/commands.rb +183 -81
  58. data/utils/distance/database.rb +69 -10
  59. data/utils/distance/pipeline.rb +15 -21
  60. data/utils/distance/runner.rb +27 -49
  61. data/utils/distance/temporal.rb +4 -2
  62. data/utils/distances.rb +2 -2
  63. data/utils/index_metadata.rb +1 -2
  64. data/utils/requirements.txt +6 -5
  65. data/utils/subclade/runner.rb +10 -11
  66. metadata +18 -6
@@ -1,67 +1,48 @@
1
1
  require_relative 'base.rb'
2
- require_relative 'temporal.rb'
3
- require_relative 'database.rb'
4
- require_relative 'commands.rb'
5
- require_relative 'pipeline.rb'
6
2
 
7
3
  class MiGA::DistanceRunner
8
- include MiGA::DistanceRunner::Temporal
9
- include MiGA::DistanceRunner::Database
10
- include MiGA::DistanceRunner::Commands
11
- include MiGA::DistanceRunner::Pipeline
12
-
13
4
  attr_reader :project, :ref_project, :dataset, :opts, :home
14
5
  attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
15
6
 
16
7
  def initialize(project_path, dataset_name, opts_hash = {})
17
8
  @opts = opts_hash
18
- @project = MiGA::Project.load(project_path) or
19
- raise "No project at #{project_path}"
9
+ @project = MiGA::Project.load(project_path)
10
+ @project or raise "No project at #{project_path}"
20
11
  @dataset = project.dataset(dataset_name)
21
12
  @home = File.expand_path('data/09.distances', project.path)
22
13
 
23
14
  # Default opts
24
- if project.metadata[:aai_save_rbm] == false
25
- @opts[:aai_save_rbm] ||= 'no-save-rbm'
26
- end
27
- @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
28
- project.is_clade? ? 'save-rbm' : 'no-save-rbm'
29
- end
30
- @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
31
- if opts[:run_taxonomy] and project.metadata[:ref_project]
32
- ref_path = project.metadata[:ref_project]
15
+ if opts[:run_taxonomy] && project.option(:ref_project)
16
+ ref_path = project.option(:ref_project)
33
17
  @home = File.expand_path('05.taxonomy', @home)
34
18
  @ref_project = MiGA::Project.load(ref_path)
35
19
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
36
- elsif !opts[:run_taxonomy] and dataset.metadata[:db_project]
37
- ref_path = dataset.metadata[:db_project]
38
- if project.metadata[:db_proj_dir]
39
- ref_path = File.expand_path(ref_path, project.metadata[:db_proj_dir])
20
+ elsif !opts[:run_taxonomy] && dataset.option(:db_project)
21
+ ref_path = dataset.option(:db_project)
22
+ if project.option(:db_proj_dir)
23
+ ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
40
24
  end
41
25
  @ref_project = MiGA::Project.load(ref_path)
42
26
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
43
27
  else
44
28
  @ref_project = project
45
29
  end
46
- [:haai_p, :aai_p, :ani_p, :distances_checkpoint].each do |m|
47
- @opts[m] ||= ref_project.metadata[m]
30
+ @opts[:thr] ||= ENV.fetch('CORES') { 1 }.to_i
31
+ %i[haai_p aai_p ani_p distances_checkpoint aai_save_rbm].each do |m|
32
+ @opts[m] ||= ref_project.option(m)
48
33
  end
49
- @opts[:aai_p] ||= 'blast+'
50
- @opts[:ani_p] ||= 'blast+'
51
- @opts[:distances_checkpoint] ||= 10
52
- @opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
53
34
  $stderr.puts "Options: #{opts}"
54
35
  end
55
36
 
56
37
  # Launch the appropriate analysis
57
38
  def go!
58
39
  $stderr.puts "Launching analysis"
59
- return if dataset.is_multi?
40
+ return if dataset.multi?
60
41
 
61
42
  Dir.mktmpdir do |tmp_dir|
62
43
  @tmp = tmp_dir
63
44
  create_temporals
64
- opts[:run_taxonomy] ? go_taxonomy! : dataset.is_ref? ? go_ref! : go_query!
45
+ opts[:run_taxonomy] ? go_taxonomy! : dataset.ref? ? go_ref! : go_query!
65
46
  end
66
47
  end
67
48
 
@@ -72,12 +53,11 @@ class MiGA::DistanceRunner
72
53
  initialize_dbs! true
73
54
 
74
55
  # first-come-first-serve traverse
56
+ sbj = []
75
57
  ref_project.each_dataset do |ds|
76
- next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
77
-
78
- puts "[ #{Time.now} ] #{ds.name}"
79
- ani_after_aai(ds)
58
+ sbj << ds if ds.ref? && !ds.multi? && ds.result(:essential_genes)
80
59
  end
60
+ ani_after_aai(sbj)
81
61
 
82
62
  # Finalize
83
63
  %i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
@@ -88,7 +68,7 @@ class MiGA::DistanceRunner
88
68
  def go_query!
89
69
  $stderr.puts 'Launching analysis for query dataset'
90
70
  # Check if project is ready
91
- tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
71
+ tsk = ref_project.clade? ? [:subclades, :ani] : [:clade_finding, :aai]
92
72
  res = ref_project.result(tsk[0])
93
73
  return if res.nil?
94
74
 
@@ -105,27 +85,25 @@ class MiGA::DistanceRunner
105
85
  par_dir = File.dirname(File.expand_path(classif, res.dir))
106
86
  par = File.expand_path('miga-project.classif', par_dir)
107
87
  closest = { dataset: nil, ani: 0.0 }
88
+ sbj_datasets = []
108
89
  if File.size? par
109
90
  File.open(par, 'r') do |fh|
110
91
  fh.each_line do |ln|
111
92
  r = ln.chomp.split("\t")
112
- next unless r[1].to_i == val_cls
113
-
114
- ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
115
- unless ani.nil? || ani < closest[:ani]
116
- closest = { ds: r[0], ani: ani }
117
- end
93
+ sbj_datasets << ref_project.dataset(r[0]) if r[1].to_i == val_cls
118
94
  end
119
95
  end
96
+ ani = ani_after_aai(sbj_datasets, 80.0)
97
+ ani_max = ani.map(&:to_f).each_with_index.max
98
+ closest = { ds: sbj_datasets[ani_max[1]].name, ani: ani_max[0] }
120
99
  end
121
100
 
122
101
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
123
102
  cl_path = res.file_path :clades_ani95
124
- if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
125
- File.foreach(cl_path)
126
- .map { |i| i.chomp.split(',') }
127
- .find(lambda { [] }) { |i| i.include? closest[:ds] }
128
- .each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
103
+ if !cl_path.nil? && File.size?(cl_path) && tsk[0] == :clade_finding
104
+ clades = File.foreach(cl_path).map { |i| i.chomp.split(',') }
105
+ sbj_datasets = clades.find { |i| i.include?(closest[:ds]) }
106
+ ani_after_aai(sbj_datasets, 80.0) if sbj_datasets
129
107
  end
130
108
 
131
109
  # Finalize
@@ -137,7 +115,7 @@ class MiGA::DistanceRunner
137
115
  # Launch analysis for taxonomy jobs
138
116
  def go_taxonomy!
139
117
  $stderr.puts 'Launching taxonomy analysis'
140
- return unless project.metadata[:ref_project]
118
+ return unless project.option(:ref_project)
141
119
 
142
120
  go_query! # <- yeah, it's actually the same, just different ref_project
143
121
  end
@@ -24,8 +24,10 @@ module MiGA::DistanceRunner::Temporal
24
24
  end
25
25
  end
26
26
 
27
- # Temporal file with extension +ext+
28
- def tmp_file(ext)
27
+ # Temporal file with extension +ext+, or a unique ID if +ext+ is +nil+
28
+ def tmp_file(ext = nil)
29
+ @_tmp_count ||= 0
30
+ ext ||= "#{@_tmp_count += 1}.tmp"
29
31
  File.expand_path("#{dataset.name}.#{ext}", tmp)
30
32
  end
31
33
 
data/utils/distances.rb CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  require_relative 'distance/runner.rb'
4
4
 
5
- dataset = ARGV.shift
6
5
  project = ARGV.shift
6
+ dataset = ARGV.shift
7
7
  opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
8
- runner = MiGA::DistanceRunner.new(dataset, project, opts)
8
+ runner = MiGA::DistanceRunner.new(project, dataset, opts)
9
9
  runner.go!
@@ -17,8 +17,7 @@ def searchable(db, d, k, v)
17
17
  end
18
18
 
19
19
  p.each_dataset do |d|
20
- next unless d.is_ref?
21
- next unless d.is_active?
20
+ next unless d.ref? && d.active?
22
21
 
23
22
  searchable(db, d, :name, d.name)
24
23
  d.metadata.each do |k, v|
@@ -1,10 +1,12 @@
1
1
  Software Test exec Website Notes
2
2
  -------- --------- ------- -----
3
3
  Ruby ruby https://www.ruby-lang.org/ Required version: 2.3+
4
- Python python https://www.python.org/
4
+ Python python3 https://www.python.org/ Required version: 3+
5
5
  R R http://www.r-project.org/
6
6
  SQLite3 sqlite3 https://www.sqlite.org/
7
7
  NCBI BLAST+ blastp ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST
8
+ DIAMOND diamond http://ab.inf.uni-tuebingen.de/software/diamond Required version: v0.9.20+
9
+ FastANI fastANI https://github.com/ParBLiSS/FastANI Required version: 1.1+
8
10
  HMMer 3.0+ hmmsearch http://hmmer.janelia.org/software
9
11
  Bedtools bedtools http://bedtools.readthedocs.org/en/latest/
10
12
  Prodigal prodigal http://prodigal.ornl.gov
@@ -14,7 +16,6 @@ Barrnap barrnap http://www.vicbioinformatics.com/software.barrnap.shtml
14
16
  Scythe scythe https://github.com/vsbuffalo/scythe Required version: 0.991+
15
17
  FastQC fastqc http://www.bioinformatics.babraham.ac.uk/projects/fastqc
16
18
  SolexaQA++ SolexaQA++ http://solexaqa.sourceforge.net Required version: v3.1.3+
17
- DIAMOND (opt) diamond http://ab.inf.uni-tuebingen.de/software/diamond Required version: v0.9.20+
18
- MyTaxa (opt) MyTaxa http://enve-omics.ce.gatech.edu/mytaxa The folder must contain the db and utils dirs, and the AllGenomes.faa.dmnd database
19
- Krona (opt) ktImportText https://github.com/marbl/Krona/wiki
20
- FastANI (opt) fastANI https://github.com/ParBLiSS/FastANI Required version: 1.1+
19
+ OpenJDK (rdp) java https://adoptopenjdk.net/ Any Java VM would work
20
+ MyTaxa (mytaxa) MyTaxa http://enve-omics.ce.gatech.edu/mytaxa
21
+ Krona (mytaxa) ktImportText https://github.com/marbl/Krona/wiki
@@ -15,22 +15,21 @@ class MiGA::SubcladeRunner
15
15
  @step = step.to_sym
16
16
  @home = File.join(
17
17
  File.join(project.path, 'data', '10.clades'),
18
- @step == :clade_finding ? '01.find' : '02.ani'
18
+ @step == :clade_finding ? '01.find.running' : '02.ani.running'
19
19
  )
20
20
  @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
21
- @opts[:run_clades] = !!@project.metadata.data.fetch(:run_clades) { true }
22
- @opts[:gsp_ani] = @project.metadata.data.fetch(:gsp_ani) { 95.0 }.to_f
23
- @opts[:gsp_aai] = @project.metadata.data.fetch(:gsp_aai) { 90.0 }.to_f
24
- @opts[:gsp_metric] =
25
- @project.metadata.data.fetch(:gsp_metric) { 'ani' }.to_s
21
+ @opts[:run_clades] = @project.option(:run_clades)
22
+ @opts[:gsp_ani] = @project.option(:gsp_ani)
23
+ @opts[:gsp_aai] = @project.option(:gsp_aai)
24
+ @opts[:gsp_metric] = @project.option(:gsp_metric)
26
25
  end
27
26
 
28
27
  # Launch the appropriate analysis
29
28
  def go!
30
29
  return if project.type == :metagenomes
31
30
 
32
- unless @project.dataset_names.any? { |i| @project.dataset(i).is_ref? }
33
- FileUtils.touch(File.expand_path('miga-project.empty', @home))
31
+ unless @project.dataset_names.any? { |i| @project.dataset(i).ref? }
32
+ FileUtils.touch(File.join(@home, 'miga-project.empty'))
34
33
  return
35
34
  end
36
35
  Dir.chdir home
@@ -44,15 +43,15 @@ class MiGA::SubcladeRunner
44
43
  # Launch analysis for clade_finding
45
44
  def go_clade_finding!
46
45
  cluster_species
47
- unless project.is_clade?
48
- subclades :aai
46
+ unless project.clade?
47
+ subclades(:aai)
49
48
  compile
50
49
  end
51
50
  end
52
51
 
53
52
  # Launch analysis for subclades
54
53
  def go_subclades!
55
- subclades :ani
54
+ subclades(:ani)
56
55
  compile
57
56
  end
58
57
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.22.0
4
+ version: 0.7.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-23 00:00:00.000000000 Z
11
+ date: 2021-02-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -145,12 +145,14 @@ files:
145
145
  - lib/miga/cli/action/index_wf.rb
146
146
  - lib/miga/cli/action/init.rb
147
147
  - lib/miga/cli/action/init/daemon_helper.rb
148
+ - lib/miga/cli/action/init/files_helper.rb
148
149
  - lib/miga/cli/action/lair.rb
149
150
  - lib/miga/cli/action/ln.rb
150
151
  - lib/miga/cli/action/ls.rb
151
152
  - lib/miga/cli/action/ncbi_get.rb
152
153
  - lib/miga/cli/action/new.rb
153
154
  - lib/miga/cli/action/next_step.rb
155
+ - lib/miga/cli/action/option.rb
154
156
  - lib/miga/cli/action/preproc_wf.rb
155
157
  - lib/miga/cli/action/quality_wf.rb
156
158
  - lib/miga/cli/action/rm.rb
@@ -169,9 +171,11 @@ files:
169
171
  - lib/miga/common/base.rb
170
172
  - lib/miga/common/format.rb
171
173
  - lib/miga/common/hooks.rb
174
+ - lib/miga/common/net.rb
172
175
  - lib/miga/common/path.rb
173
176
  - lib/miga/common/with_daemon.rb
174
177
  - lib/miga/common/with_daemon_class.rb
178
+ - lib/miga/common/with_option.rb
175
179
  - lib/miga/common/with_result.rb
176
180
  - lib/miga/daemon.rb
177
181
  - lib/miga/daemon/base.rb
@@ -235,6 +239,7 @@ files:
235
239
  - test/json_test.rb
236
240
  - test/lair_test.rb
237
241
  - test/metadata_test.rb
242
+ - test/net_test.rb
238
243
  - test/project_test.rb
239
244
  - test/remote_dataset_test.rb
240
245
  - test/result_stats_test.rb
@@ -244,6 +249,13 @@ files:
244
249
  - test/taxonomy_test.rb
245
250
  - test/test_helper.rb
246
251
  - test/with_daemon_test.rb
252
+ - test/with_option_test.rb
253
+ - utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm
254
+ - utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm
255
+ - utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm
256
+ - utils/FastAAI/FastAAI/FastAAI
257
+ - utils/FastAAI/README.md
258
+ - utils/FastAAI/kAAI_v1.0_virus.py
247
259
  - utils/adapters.fa
248
260
  - utils/cleanup-databases.rb
249
261
  - utils/core-pan-plot.R
@@ -542,7 +554,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
542
554
  licenses:
543
555
  - Artistic-2.0
544
556
  metadata: {}
545
- post_install_message:
557
+ post_install_message:
546
558
  rdoc_options:
547
559
  - lib
548
560
  - README.md
@@ -563,8 +575,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
563
575
  - !ruby/object:Gem::Version
564
576
  version: '0'
565
577
  requirements: []
566
- rubygems_version: 3.0.3
567
- signing_key:
578
+ rubygems_version: 3.1.4
579
+ signing_key:
568
580
  specification_version: 4
569
581
  summary: MiGA
570
582
  test_files: []