miga-base 0.7.26.3 → 1.0.0.sr1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/doctor.rb +50 -19
  7. data/lib/miga/cli/action/doctor/base.rb +20 -18
  8. data/lib/miga/cli/action/init.rb +11 -7
  9. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  10. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  11. data/lib/miga/cli/action/tax_dist.rb +2 -2
  12. data/lib/miga/cli/action/wf.rb +5 -4
  13. data/lib/miga/daemon.rb +11 -4
  14. data/lib/miga/dataset/result.rb +10 -6
  15. data/lib/miga/json.rb +1 -2
  16. data/lib/miga/metadata.rb +5 -1
  17. data/lib/miga/parallel.rb +11 -6
  18. data/lib/miga/project.rb +8 -8
  19. data/lib/miga/project/base.rb +4 -4
  20. data/lib/miga/project/result.rb +2 -2
  21. data/lib/miga/sqlite.rb +7 -0
  22. data/lib/miga/version.rb +23 -9
  23. data/scripts/aai_distances.bash +16 -18
  24. data/scripts/ani_distances.bash +16 -17
  25. data/scripts/assembly.bash +31 -16
  26. data/scripts/haai_distances.bash +3 -27
  27. data/scripts/miga.bash +6 -4
  28. data/scripts/p.bash +1 -1
  29. data/scripts/read_quality.bash +9 -18
  30. data/scripts/trimmed_fasta.bash +14 -30
  31. data/scripts/trimmed_reads.bash +36 -36
  32. data/test/parallel_test.rb +31 -0
  33. data/test/project_test.rb +2 -1
  34. data/utils/distance/commands.rb +1 -0
  35. data/utils/distance/runner.rb +2 -4
  36. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  37. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  38. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  39. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  41. data/utils/enveomics/Manifest/categories.json +13 -4
  42. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  43. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  44. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  45. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  46. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  47. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  48. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  49. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  50. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  51. data/utils/enveomics/Scripts/aai.rb +3 -2
  52. data/utils/enveomics/Scripts/anir.rb +137 -0
  53. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  54. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  55. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  56. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  57. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  64. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  65. data/utils/enveomics/Scripts/rbm.rb +87 -133
  66. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  67. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  68. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  69. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  70. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  71. data/utils/enveomics/enveomics.R/README.md +1 -0
  72. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  73. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  74. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  75. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  76. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  77. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  78. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  79. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  80. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  81. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  82. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  83. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  84. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  93. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  94. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  95. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  96. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  97. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  98. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  99. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  100. data/utils/multitrim/README.md +67 -0
  101. data/utils/multitrim/multitrim.py +1555 -0
  102. data/utils/multitrim/multitrim.yml +13 -0
  103. data/utils/requirements.txt +4 -3
  104. metadata +33 -6
  105. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
4
- data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
3
+ metadata.gz: a599c1e0c51a62f7303cbd1bbf9f8568649dbbeae768b518ad67250b1c3217a4
4
+ data.tar.gz: f8300cb5c44209d8a3639338319b50cdbce01cf14362006a9f61147854625bd9
5
5
  SHA512:
6
- metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
7
- data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
6
+ metadata.gz: 4bf676ee04f650e8f2388b0f0e732e9da941ca13ceb6b02d986800d353adf2de261ab61a56dde496210f2965255ca0ff9df2cbf5927f8643ddd706736511ef07
7
+ data.tar.gz: b5cffa3afdb384db2a7fd2d86d11f062491d3df403569cae8bb2209df9119012e539179453efc492227835553bade1ff03805d8b26be4b5988890a7bc9684475
@@ -116,38 +116,69 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
116
116
  # Perform bidirectional operation with MiGA::Cli +cli+
117
117
  def check_bidir(cli)
118
118
  cli.say 'Checking if reference distances are bidirectional'
119
- ref_ds = cli.load_project.each_dataset.select(&:ref?)
119
+ project = cli.load_project
120
+ ref_ds = project.each_dataset.select(&:ref?)
120
121
  ref_names = ref_ds.map(&:name)
121
122
  n = ref_ds.size
122
123
 
123
124
  # Read data first (threaded)
124
- @distances = { aai: {}, ani: {} }
125
- Dir.mktmpdir do |tmp|
126
- MiGA::Parallel.process(cli[:threads]) do |thr|
127
- idx = 0
128
- ref_ds.each do |ds|
129
- cli.advance('Reading:', idx + 1, n, false) if thr == 0
130
- read_bidirectional(ds) if idx % cli[:threads] == thr
131
- idx += 1
132
- end
133
- File.open("#{tmp}/#{thr}.json", 'w') do |fh|
134
- fh.print JSON.fast_generate(@distances)
125
+ tmp = File.join(project.path, 'doctor-bidirectional.tmp')
126
+ FileUtils.mkdir_p(tmp)
127
+ MiGA::Parallel.process(cli[:threads]) do |thr|
128
+ file = File.join(tmp, "#{thr}.json")
129
+ fh = File.open(file, 'w')
130
+ [:aai, :ani].each do |metric|
131
+ fh.puts "# #{metric}"
132
+ ref_ds.each_with_index do |ds, idx|
133
+ if idx % cli[:threads] == thr
134
+ cli.advance('Reading:', idx + 1, n, false) if thr == 0
135
+ row = read_bidirectional(ds, metric)
136
+ fh.puts "#{ds.name} #{JSON.fast_generate(row)}" unless row.empty?
137
+ end
135
138
  end
136
139
  end
137
- cli.say
140
+ fh.puts '# end'
141
+ fh.flush # necessary for large threaded runs
142
+ fh.close
143
+ if thr == 0
144
+ cli.advance('Reading:', n, n, false)
145
+ cli.say
146
+ end
147
+ end
138
148
 
139
- cli[:threads].times do |i|
140
- cli.advance('Merging:', i + 1, cli[:threads], false)
141
- o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
142
- o.each { |k, v| @distances[k.to_sym].merge!(v) }
149
+ # Merge pieces per thread
150
+ dist = { aai: {}, ani: {} }
151
+ cli[:threads].times do |i|
152
+ cli.advance('Merging:', i + 1, cli[:threads], false)
153
+ file = File.join(tmp, "#{i}.json")
154
+ File.open(file, 'r') do |fh|
155
+ metric = nil
156
+ fh.each do |ln|
157
+ qry, row = ln.chomp.split(' ', 2)
158
+ if qry == '#'
159
+ metric = row.to_sym
160
+ else
161
+ raise "Unrecognized metric: #{metric}" unless dist[metric]
162
+ JSON.parse(row).each do |sbj, val|
163
+ dist[metric][qry] ||= {}
164
+ if dist[metric][sbj]&.include?(qry)
165
+ dist[metric][sbj].delete(qry) # Already bidirectional
166
+ else
167
+ dist[metric][qry][sbj] = val
168
+ end
169
+ end
170
+ end
171
+ end
172
+ raise "Incomplete thread dump: #{file}" unless metric == :end
143
173
  end
144
- cli.say
145
174
  end
175
+ cli.say
176
+ FileUtils.rm_rf(tmp)
146
177
 
147
178
  # Write missing values (threaded)
148
179
  MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
149
180
  cli.advance('Datasets:', idx + 1, n, false) if thr == 0
150
- save_bidirectional(ds)
181
+ save_bidirectional(ds, dist)
151
182
  end
152
183
  cli.say
153
184
  end
@@ -115,30 +115,30 @@ module MiGA::Cli::Action::Doctor::Base
115
115
  end
116
116
 
117
117
  ##
118
- # Reads all the distance estimates in +a+ -> *, and saves them in memory
119
- # in the +@distances+ variable.
120
- def read_bidirectional(a)
121
- each_database_file(a) do |db_file, metric, result, rank|
122
- next if rank == :haai # No need for hAAI to be bidirectional
123
-
124
- sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
125
- data = MiGA::SQLite.new(db_file).run(sql)
126
- next if data.nil? || data.empty?
127
-
128
- @distances[rank][a.name] ||= {}
129
- data.each { |row| @distances[rank][a.name][row.shift] = row }
130
- end
118
+ # Reads all the distance estimates in +a+ -> * for +metric+ and
119
+ # returns them as a hash +{"b_name" => [val, sd, ...], ...}+
120
+ def read_bidirectional(a, metric)
121
+ db_file = a.result(:distances)&.file_path("#{metric}_db") or return {}
122
+ sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
123
+ data = MiGA::SQLite.new(db_file).run(sql) || []
124
+ Hash[
125
+ data.map do |row|
126
+ k, v = row.shift(2)
127
+ [k, row.all?(&:zero?) ? v : [v] + row]
128
+ end
129
+ ]
131
130
  end
132
131
 
133
132
  ##
134
133
  # Saves all the distance estimates in * -> +a+ into the +a+ databases
135
- # (as +a+ -> *), where +a+ is a MiGA::Dataset object
136
- def save_bidirectional(a)
134
+ # (as +a+ -> *), where +a+ is a MiGA::Dataset object, with currently
135
+ # saved values read from the hash +dist+
136
+ def save_bidirectional(a, dist)
137
137
  each_database_file(a) do |db_file, metric, result, rank|
138
138
  next if rank == :haai # No need for hAAI to be bidirectional
139
139
 
140
- b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
141
- a2b = @distances[rank][a.name].keys
140
+ b2a = dist[rank].map { |b_name, v| b_name if v[a.name] }.compact
141
+ a2b = dist[rank][a.name]&.keys || []
142
142
  SQLite3::Database.new(db_file) do |db|
143
143
  sql = <<~SQL
144
144
  insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
@@ -146,7 +146,9 @@ module MiGA::Cli::Action::Doctor::Base
146
146
  SQL
147
147
  db.execute('BEGIN TRANSACTION;')
148
148
  (b2a - a2b).each do |b_name|
149
- db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
149
+ val = dist[rank][b_name][a.name]
150
+ val = [val, 0, 0, 0] unless val.is_a?(Array)
151
+ db.execute(sql, [a.name, b_name] + val)
150
152
  end
151
153
  db.execute('COMMIT;')
152
154
  end
@@ -112,12 +112,15 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
112
112
 
113
113
  def check_software_requirements(rc_fh)
114
114
  cli.puts 'Looking for requirements:'
115
- ask_for_optional(:mytaxa, 'MyTaxa')
116
- rc_fh.puts "export MIGA_MYTAXA='#{cli[:mytaxa] ? 'yes' : 'no'}'"
117
- ask_for_optional(:rdp, 'RDP classifier')
118
- rc_fh.puts "export MIGA_RDP='#{cli[:rdp] ? 'yes' : 'no'}'"
119
- ask_for_optional(:reads, 'read processing')
120
- rc_fh.puts "export MIGA_READS='#{cli[:reads] ? 'yes' : 'no'}'"
115
+ opt_groups = {
116
+ mytaxa: 'MyTaxa',
117
+ rdp: 'RDP classifier',
118
+ reads: 'read processing'
119
+ }
120
+ opt_groups.each do |k, v|
121
+ ask_for_optional(k, v)
122
+ rc_fh.puts "export MIGA_#{k.to_s.upcase}='#{cli[k] ? 'yes' : 'no'}'"
123
+ end
121
124
  paths = {}
122
125
  rc_fh.puts 'MIGA_PATH=""'
123
126
  req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
@@ -196,8 +199,9 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
196
199
  cli.puts 'yes'
197
200
  else
198
201
  cli.puts 'no, installing'
199
- cli.print '' + install_library(cli, paths, language, library)
202
+ out = install_library(cli, paths, language, library)
200
203
  unless test_library(cli, paths, language, library)
204
+ cli.puts out
201
205
  raise "Cannot install #{language.to_s.capitalize} library: #{library}"
202
206
  end
203
207
  end
@@ -83,6 +83,7 @@ module MiGA::Cli::Action::Init::FilesHelper
83
83
  cli.puts 'yes'
84
84
  elsif File.exist?(mt_db)
85
85
  cli.puts 'yes, sym-linking'
86
+ FileUtils.mkdir_p(miga_db)
86
87
  File.symlink(mt_db, home_db)
87
88
  else
88
89
  cli.puts 'no, downloading'
@@ -18,7 +18,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
18
18
  cli.opt_object(opt, [:project])
19
19
  opt.on(
20
20
  '-T', '--taxon STRING',
21
- '(Mandatory unless --reference) Taxon name (e.g., a species binomial)'
21
+ '(Mandatory) Taxon name (e.g., a species binomial)'
22
22
  ) { |v| cli[:taxon] = v }
23
23
  opt.on(
24
24
  '-m', '--metadata STRING',
@@ -137,7 +137,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
137
137
  end
138
138
 
139
139
  def sanitize_cli
140
- cli.ensure_par(taxon: '-T') unless cli[:reference]
140
+ cli.ensure_par(taxon: '-T')
141
141
  tasks = %w[reference complete chromosome scaffold contig]
142
142
  unless tasks.any? { |i| cli[i.to_sym] }
143
143
  raise 'No action requested: pick at least one type of genome'
@@ -204,7 +204,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
204
204
  'from(GenomeAssemblies).' \
205
205
  'usingschema(/schema/GenomeAssemblies).' \
206
206
  'matching(tab==["Prokaryotes"] and q=="' \
207
- "#{cli[:taxon].tr('"', "'")}\"",
207
+ "#{cli[:taxon]&.tr('"', "'")}\"",
208
208
  fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
209
209
  'level|level,ftp_path_genbank|ftp_path_genbank,' \
210
210
  'release_date|release_date,strain|strain',
@@ -99,13 +99,13 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
99
99
  ds_name = []
100
100
  File.open(tab, 'r') do |fh|
101
101
  fh.each_line do |ln|
102
- if ln =~ /^ {#{(rank_i - 1) * 2}}\S+:\S+:/
102
+ if ln =~ /^ {0,#{(rank_i - 1) * 2}}\S+:\S+:/
103
103
  in_rank = nil
104
104
  ds_name = []
105
105
  elsif ln =~ /^ {#{rank_i * 2}}(#{rank}:(\S+)):/
106
106
  in_rank = $2 == '?' ? nil : $1
107
107
  ds_name = []
108
- elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
108
+ elsif ln =~ /^ *# (\S+)/ && !in_rank.nil?
109
109
  ds_i = $1
110
110
  ds_name << ds_i
111
111
  ds_name.each do |ds_j|
@@ -81,21 +81,22 @@ module MiGA::Cli::Action::Wf
81
81
  cli[:aai_p] = 'blast+'
82
82
  cli[:ani_p] = 'blast+'
83
83
  end
84
- opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
84
+ opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani (default)') do
85
85
  cli[:aai_p] = 'diamond'
86
86
  cli[:ani_p] = 'fastani'
87
87
  end
88
88
  opt.on(
89
89
  '--haai-p STRING',
90
- 'hAAI search engine. One of: blast+ (default), fastaai, blat, diamond, no'
90
+ 'hAAI search engine. One of: blast+, fastaai, blat, diamond, fastaai, no',
91
+ 'The default is "no" for clade projects and "fastaai" otherwise'
91
92
  ) { |v| cli[:haai_p] = v }
92
93
  opt.on(
93
94
  '--aai-p STRING',
94
- 'AAI search engine. One of: blast+ (default), blat, diamond'
95
+ 'AAI search engine. One of: blast+, blat, diamond (default)'
95
96
  ) { |v| cli[:aai_p] = v }
96
97
  opt.on(
97
98
  '--ani-p STRING',
98
- 'ANI search engine. One of: blast+ (default), blat, fastani'
99
+ 'ANI search engine. One of: blast+, blat, fastani (default)'
99
100
  ) { |v| cli[:ani_p] = v }
100
101
  end
101
102
 
data/lib/miga/daemon.rb CHANGED
@@ -73,10 +73,10 @@ class MiGA::Daemon < MiGA::MiGA
73
73
  say 'MiGA:%s launched' % project.name
74
74
  say '-----------------------------------'
75
75
  miga_say "Saving log to: #{output_file}" unless show_log?
76
- queue_maintenance
77
- load_status
78
76
  say 'Configuration options:'
79
77
  say @runopts.to_s
78
+ load_status
79
+ queue_maintenance(true)
80
80
  end
81
81
 
82
82
  ##
@@ -87,6 +87,7 @@ class MiGA::Daemon < MiGA::MiGA
87
87
  check_datasets or check_project
88
88
  if shutdown_when_done? && (jobs_running.size + jobs_to_run.size).zero?
89
89
  say 'Nothing else to do, shutting down'
90
+ exit_cleanup
90
91
  return false
91
92
  end
92
93
  flush!
@@ -102,13 +103,19 @@ class MiGA::Daemon < MiGA::MiGA
102
103
 
103
104
  ##
104
105
  # Queue maintenance tasks as an analysis job
105
- def queue_maintenance
106
- return if bypass_maintenance? || shutdown_when_done?
106
+ def queue_maintenance(force = false)
107
+ return if bypass_maintenance? || (!force && shutdown_when_done?)
107
108
 
108
109
  say 'Queueing maintenance tasks'
109
110
  queue_job(:maintenance)
110
111
  end
111
112
 
113
+ ##
114
+ # Remove temporary files on completion
115
+ def exit_cleanup
116
+ FileUtils.rm_f(File.join(daemon_home, 'status.json'))
117
+ end
118
+
112
119
  ##
113
120
  # Send +msg+ to +say+ as long as +level+ is at most +verbosity+
114
121
  def l_say(level, *msg)
@@ -181,26 +181,30 @@ module MiGA::Dataset::Result
181
181
  add_files_to_ds_result(
182
182
  MiGA::Result.new("#{base}.json"), name,
183
183
  if result_files_exist?(base, '.2.clipped.fastq')
184
- {
185
- pair1: '.1.clipped.fastq',
186
- pair2: '.2.clipped.fastq',
187
- single: '.1.clipped.single.fastq'
188
- }
184
+ { pair1: '.1.clipped.fastq', pair2: '.2.clipped.fastq' }
189
185
  else
190
186
  { single: '.1.clipped.fastq' }
191
187
  end
192
188
  ).tap do |r|
189
+ # Legacy files
193
190
  r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
191
+ r.add_file(:single, "#{name}.1.clipped.single.fastq")
194
192
  end
195
193
  end
196
194
 
197
195
  ##
198
196
  # Add result type +:read_quality+ at +base+ (no +_opts+ supported)
199
197
  def add_result_read_quality(base, _opts)
200
- return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
198
+ return nil unless
199
+ result_files_exist?(base, %w[.post.1.html]) ||
200
+ result_files_exist?(base, %w[.solexaqa .fastqc])
201
201
 
202
202
  add_files_to_ds_result(
203
203
  MiGA::Result.new("#{base}.json"), name,
204
+ pre_qc_1: '.pre.1.html', pre_qc_2: '.pre.2.html',
205
+ post_qc_1: '.post.1.html', post_qc_2: '.post.2.html',
206
+ adapter_detection: '.adapters.txt',
207
+ # Legacy files
204
208
  solexaqa: '.solexaqa', fastqc: '.fastqc'
205
209
  )
206
210
  end
data/lib/miga/json.rb CHANGED
@@ -1,5 +1,4 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
3
2
 
4
3
  require 'json'
5
4
 
data/lib/miga/metadata.rb CHANGED
@@ -105,7 +105,11 @@ class MiGA::Metadata < MiGA::MiGA
105
105
  ##
106
106
  # Return the value of +k+ in #data
107
107
  def [](k)
108
- data[k.to_sym]
108
+ if k.to_s =~ /(.+):(.+)/
109
+ data[$1.to_sym]&.fetch($2)
110
+ else
111
+ data[k.to_sym]
112
+ end
109
113
  end
110
114
 
111
115
  ##
data/lib/miga/parallel.rb CHANGED
@@ -19,13 +19,18 @@ class MiGA::Parallel < MiGA::MiGA
19
19
  # 1. Unitary object from +enum+
20
20
  # 2. Index of the unitary object
21
21
  # 3. Index of the acting thread
22
- def distribute(enum, threads)
23
- process(threads) do |thr|
24
- enum.each_with_index do |obj, idx|
25
- yield(obj, idx, thr) if idx % threads == thr
26
- end
22
+ def distribute(enum, threads, &blk)
23
+ process(threads) { |thr| thread_enum(enum, threads, thr, &blk) }
24
+ end
25
+
26
+ ##
27
+ # Enum through +enum+ executing the passed block only for thread with index
28
+ # +thr+, one of +threads+ threads. The passed block has the same arguments
29
+ # as the one in +#distribute+
30
+ def thread_enum(enum, threads, thr)
31
+ enum.each_with_index do |obj, idx|
32
+ yield(obj, idx, thr) if idx % threads == thr
27
33
  end
28
34
  end
29
35
  end
30
36
  end
31
-
data/lib/miga/project.rb CHANGED
@@ -42,18 +42,18 @@ class MiGA::Project < MiGA::MiGA
42
42
  # Create an empty project
43
43
  def create
44
44
  unless MiGA::MiGA.initialized?
45
- raise 'Impossible to create project in uninitialized MiGA.'
45
+ warn 'Projects cannot be processed yet, first run: miga init'
46
46
  end
47
47
 
48
- dirs = [path] + @@FOLDERS.map { |d| "#{path}/#{d}" } +
49
- @@DATA_FOLDERS.map { |d| "#{path}/data/#{d}" }
50
- dirs.each { |d| Dir.mkdir(d) unless Dir.exist? d }
48
+ dirs = @@FOLDERS.map { |d| File.join(path, d) }
49
+ dirs += @@DATA_FOLDERS.map { |d| File.join(path, 'data', d) }
50
+ dirs.each { |d| FileUtils.mkdir_p(d) }
51
51
  @metadata = MiGA::Metadata.new(
52
- File.expand_path('miga.project.json', path),
53
- { datasets: [], name: File.basename(path) }
52
+ File.join(path, 'miga.project.json'),
53
+ datasets: [], name: File.basename(path)
54
54
  )
55
- d_path = File.expand_path('daemon/daemon.json', path)
56
- File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
55
+ d_path = File.join(path, 'daemon', 'daemon.json')
56
+ File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?(d_path)
57
57
  pull_hook :on_create
58
58
  self.load
59
59
  end
@@ -131,15 +131,15 @@ module MiGA::Project::Base
131
131
  },
132
132
  haai_p: {
133
133
  desc: 'Value of aai.rb -p on hAAI', type: String,
134
- default: proc { |project| project.clade? ? 'no' : 'blast+' },
135
- in: %w[fastaai blast+ blast blat diamond no]
134
+ default: proc { |project| project.clade? ? 'no' : 'fastaai' },
135
+ in: %w[blast+ blast blat diamond fastaai no]
136
136
  },
137
137
  aai_p: {
138
- desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,
138
+ desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
139
139
  in: %w[blast+ blast blat diamond]
140
140
  },
141
141
  ani_p: {
142
- desc: 'Value of ani.rb -p on ANI', default: 'blast+', type: String,
142
+ desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
143
143
  in: %w[blast+ blast blat fastani]
144
144
  },
145
145
  max_try: {