miga-base 0.7.26.3 → 1.0.0.sr1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/doctor.rb +50 -19
  7. data/lib/miga/cli/action/doctor/base.rb +20 -18
  8. data/lib/miga/cli/action/init.rb +11 -7
  9. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  10. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  11. data/lib/miga/cli/action/tax_dist.rb +2 -2
  12. data/lib/miga/cli/action/wf.rb +5 -4
  13. data/lib/miga/daemon.rb +11 -4
  14. data/lib/miga/dataset/result.rb +10 -6
  15. data/lib/miga/json.rb +1 -2
  16. data/lib/miga/metadata.rb +5 -1
  17. data/lib/miga/parallel.rb +11 -6
  18. data/lib/miga/project.rb +8 -8
  19. data/lib/miga/project/base.rb +4 -4
  20. data/lib/miga/project/result.rb +2 -2
  21. data/lib/miga/sqlite.rb +7 -0
  22. data/lib/miga/version.rb +23 -9
  23. data/scripts/aai_distances.bash +16 -18
  24. data/scripts/ani_distances.bash +16 -17
  25. data/scripts/assembly.bash +31 -16
  26. data/scripts/haai_distances.bash +3 -27
  27. data/scripts/miga.bash +6 -4
  28. data/scripts/p.bash +1 -1
  29. data/scripts/read_quality.bash +9 -18
  30. data/scripts/trimmed_fasta.bash +14 -30
  31. data/scripts/trimmed_reads.bash +36 -36
  32. data/test/parallel_test.rb +31 -0
  33. data/test/project_test.rb +2 -1
  34. data/utils/distance/commands.rb +1 -0
  35. data/utils/distance/runner.rb +2 -4
  36. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  37. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  38. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  39. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  41. data/utils/enveomics/Manifest/categories.json +13 -4
  42. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  43. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  44. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  45. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  46. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  47. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  48. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  49. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  50. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  51. data/utils/enveomics/Scripts/aai.rb +3 -2
  52. data/utils/enveomics/Scripts/anir.rb +137 -0
  53. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  54. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  55. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  56. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  57. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  64. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  65. data/utils/enveomics/Scripts/rbm.rb +87 -133
  66. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  67. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  68. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  69. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  70. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  71. data/utils/enveomics/enveomics.R/README.md +1 -0
  72. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  73. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  74. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  75. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  76. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  77. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  78. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  79. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  80. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  81. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  82. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  83. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  84. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  93. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  94. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  95. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  96. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  97. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  98. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  99. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  100. data/utils/multitrim/README.md +67 -0
  101. data/utils/multitrim/multitrim.py +1555 -0
  102. data/utils/multitrim/multitrim.yml +13 -0
  103. data/utils/requirements.txt +4 -3
  104. metadata +33 -6
  105. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
4
- data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
3
+ metadata.gz: a599c1e0c51a62f7303cbd1bbf9f8568649dbbeae768b518ad67250b1c3217a4
4
+ data.tar.gz: f8300cb5c44209d8a3639338319b50cdbce01cf14362006a9f61147854625bd9
5
5
  SHA512:
6
- metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
7
- data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
6
+ metadata.gz: 4bf676ee04f650e8f2388b0f0e732e9da941ca13ceb6b02d986800d353adf2de261ab61a56dde496210f2965255ca0ff9df2cbf5927f8643ddd706736511ef07
7
+ data.tar.gz: b5cffa3afdb384db2a7fd2d86d11f062491d3df403569cae8bb2209df9119012e539179453efc492227835553bade1ff03805d8b26be4b5988890a7bc9684475
@@ -116,38 +116,69 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
116
116
  # Perform bidirectional operation with MiGA::Cli +cli+
117
117
  def check_bidir(cli)
118
118
  cli.say 'Checking if reference distances are bidirectional'
119
- ref_ds = cli.load_project.each_dataset.select(&:ref?)
119
+ project = cli.load_project
120
+ ref_ds = project.each_dataset.select(&:ref?)
120
121
  ref_names = ref_ds.map(&:name)
121
122
  n = ref_ds.size
122
123
 
123
124
  # Read data first (threaded)
124
- @distances = { aai: {}, ani: {} }
125
- Dir.mktmpdir do |tmp|
126
- MiGA::Parallel.process(cli[:threads]) do |thr|
127
- idx = 0
128
- ref_ds.each do |ds|
129
- cli.advance('Reading:', idx + 1, n, false) if thr == 0
130
- read_bidirectional(ds) if idx % cli[:threads] == thr
131
- idx += 1
132
- end
133
- File.open("#{tmp}/#{thr}.json", 'w') do |fh|
134
- fh.print JSON.fast_generate(@distances)
125
+ tmp = File.join(project.path, 'doctor-bidirectional.tmp')
126
+ FileUtils.mkdir_p(tmp)
127
+ MiGA::Parallel.process(cli[:threads]) do |thr|
128
+ file = File.join(tmp, "#{thr}.json")
129
+ fh = File.open(file, 'w')
130
+ [:aai, :ani].each do |metric|
131
+ fh.puts "# #{metric}"
132
+ ref_ds.each_with_index do |ds, idx|
133
+ if idx % cli[:threads] == thr
134
+ cli.advance('Reading:', idx + 1, n, false) if thr == 0
135
+ row = read_bidirectional(ds, metric)
136
+ fh.puts "#{ds.name} #{JSON.fast_generate(row)}" unless row.empty?
137
+ end
135
138
  end
136
139
  end
137
- cli.say
140
+ fh.puts '# end'
141
+ fh.flush # necessary for large threaded runs
142
+ fh.close
143
+ if thr == 0
144
+ cli.advance('Reading:', n, n, false)
145
+ cli.say
146
+ end
147
+ end
138
148
 
139
- cli[:threads].times do |i|
140
- cli.advance('Merging:', i + 1, cli[:threads], false)
141
- o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
142
- o.each { |k, v| @distances[k.to_sym].merge!(v) }
149
+ # Merge pieces per thread
150
+ dist = { aai: {}, ani: {} }
151
+ cli[:threads].times do |i|
152
+ cli.advance('Merging:', i + 1, cli[:threads], false)
153
+ file = File.join(tmp, "#{i}.json")
154
+ File.open(file, 'r') do |fh|
155
+ metric = nil
156
+ fh.each do |ln|
157
+ qry, row = ln.chomp.split(' ', 2)
158
+ if qry == '#'
159
+ metric = row.to_sym
160
+ else
161
+ raise "Unrecognized metric: #{metric}" unless dist[metric]
162
+ JSON.parse(row).each do |sbj, val|
163
+ dist[metric][qry] ||= {}
164
+ if dist[metric][sbj]&.include?(qry)
165
+ dist[metric][sbj].delete(qry) # Already bidirectional
166
+ else
167
+ dist[metric][qry][sbj] = val
168
+ end
169
+ end
170
+ end
171
+ end
172
+ raise "Incomplete thread dump: #{file}" unless metric == :end
143
173
  end
144
- cli.say
145
174
  end
175
+ cli.say
176
+ FileUtils.rm_rf(tmp)
146
177
 
147
178
  # Write missing values (threaded)
148
179
  MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
149
180
  cli.advance('Datasets:', idx + 1, n, false) if thr == 0
150
- save_bidirectional(ds)
181
+ save_bidirectional(ds, dist)
151
182
  end
152
183
  cli.say
153
184
  end
@@ -115,30 +115,30 @@ module MiGA::Cli::Action::Doctor::Base
115
115
  end
116
116
 
117
117
  ##
118
- # Reads all the distance estimates in +a+ -> *, and saves them in memory
119
- # in the +@distances+ variable.
120
- def read_bidirectional(a)
121
- each_database_file(a) do |db_file, metric, result, rank|
122
- next if rank == :haai # No need for hAAI to be bidirectional
123
-
124
- sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
125
- data = MiGA::SQLite.new(db_file).run(sql)
126
- next if data.nil? || data.empty?
127
-
128
- @distances[rank][a.name] ||= {}
129
- data.each { |row| @distances[rank][a.name][row.shift] = row }
130
- end
118
+ # Reads all the distance estimates in +a+ -> * for +metric+ and
119
+ # returns them as a hash +{"b_name" => [val, sd, ...], ...}+
120
+ def read_bidirectional(a, metric)
121
+ db_file = a.result(:distances)&.file_path("#{metric}_db") or return {}
122
+ sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
123
+ data = MiGA::SQLite.new(db_file).run(sql) || []
124
+ Hash[
125
+ data.map do |row|
126
+ k, v = row.shift(2)
127
+ [k, row.all?(&:zero?) ? v : [v] + row]
128
+ end
129
+ ]
131
130
  end
132
131
 
133
132
  ##
134
133
  # Saves all the distance estimates in * -> +a+ into the +a+ databases
135
- # (as +a+ -> *), where +a+ is a MiGA::Dataset object
136
- def save_bidirectional(a)
134
+ # (as +a+ -> *), where +a+ is a MiGA::Dataset object, with currently
135
+ # saved values read from the hash +dist+
136
+ def save_bidirectional(a, dist)
137
137
  each_database_file(a) do |db_file, metric, result, rank|
138
138
  next if rank == :haai # No need for hAAI to be bidirectional
139
139
 
140
- b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
141
- a2b = @distances[rank][a.name].keys
140
+ b2a = dist[rank].map { |b_name, v| b_name if v[a.name] }.compact
141
+ a2b = dist[rank][a.name]&.keys || []
142
142
  SQLite3::Database.new(db_file) do |db|
143
143
  sql = <<~SQL
144
144
  insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
@@ -146,7 +146,9 @@ module MiGA::Cli::Action::Doctor::Base
146
146
  SQL
147
147
  db.execute('BEGIN TRANSACTION;')
148
148
  (b2a - a2b).each do |b_name|
149
- db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
149
+ val = dist[rank][b_name][a.name]
150
+ val = [val, 0, 0, 0] unless val.is_a?(Array)
151
+ db.execute(sql, [a.name, b_name] + val)
150
152
  end
151
153
  db.execute('COMMIT;')
152
154
  end
@@ -112,12 +112,15 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
112
112
 
113
113
  def check_software_requirements(rc_fh)
114
114
  cli.puts 'Looking for requirements:'
115
- ask_for_optional(:mytaxa, 'MyTaxa')
116
- rc_fh.puts "export MIGA_MYTAXA='#{cli[:mytaxa] ? 'yes' : 'no'}'"
117
- ask_for_optional(:rdp, 'RDP classifier')
118
- rc_fh.puts "export MIGA_RDP='#{cli[:rdp] ? 'yes' : 'no'}'"
119
- ask_for_optional(:reads, 'read processing')
120
- rc_fh.puts "export MIGA_READS='#{cli[:reads] ? 'yes' : 'no'}'"
115
+ opt_groups = {
116
+ mytaxa: 'MyTaxa',
117
+ rdp: 'RDP classifier',
118
+ reads: 'read processing'
119
+ }
120
+ opt_groups.each do |k, v|
121
+ ask_for_optional(k, v)
122
+ rc_fh.puts "export MIGA_#{k.to_s.upcase}='#{cli[k] ? 'yes' : 'no'}'"
123
+ end
121
124
  paths = {}
122
125
  rc_fh.puts 'MIGA_PATH=""'
123
126
  req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
@@ -196,8 +199,9 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
196
199
  cli.puts 'yes'
197
200
  else
198
201
  cli.puts 'no, installing'
199
- cli.print '' + install_library(cli, paths, language, library)
202
+ out = install_library(cli, paths, language, library)
200
203
  unless test_library(cli, paths, language, library)
204
+ cli.puts out
201
205
  raise "Cannot install #{language.to_s.capitalize} library: #{library}"
202
206
  end
203
207
  end
@@ -83,6 +83,7 @@ module MiGA::Cli::Action::Init::FilesHelper
83
83
  cli.puts 'yes'
84
84
  elsif File.exist?(mt_db)
85
85
  cli.puts 'yes, sym-linking'
86
+ FileUtils.mkdir_p(miga_db)
86
87
  File.symlink(mt_db, home_db)
87
88
  else
88
89
  cli.puts 'no, downloading'
@@ -18,7 +18,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
18
18
  cli.opt_object(opt, [:project])
19
19
  opt.on(
20
20
  '-T', '--taxon STRING',
21
- '(Mandatory unless --reference) Taxon name (e.g., a species binomial)'
21
+ '(Mandatory) Taxon name (e.g., a species binomial)'
22
22
  ) { |v| cli[:taxon] = v }
23
23
  opt.on(
24
24
  '-m', '--metadata STRING',
@@ -137,7 +137,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
137
137
  end
138
138
 
139
139
  def sanitize_cli
140
- cli.ensure_par(taxon: '-T') unless cli[:reference]
140
+ cli.ensure_par(taxon: '-T')
141
141
  tasks = %w[reference complete chromosome scaffold contig]
142
142
  unless tasks.any? { |i| cli[i.to_sym] }
143
143
  raise 'No action requested: pick at least one type of genome'
@@ -204,7 +204,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
204
204
  'from(GenomeAssemblies).' \
205
205
  'usingschema(/schema/GenomeAssemblies).' \
206
206
  'matching(tab==["Prokaryotes"] and q=="' \
207
- "#{cli[:taxon].tr('"', "'")}\"",
207
+ "#{cli[:taxon]&.tr('"', "'")}\"",
208
208
  fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
209
209
  'level|level,ftp_path_genbank|ftp_path_genbank,' \
210
210
  'release_date|release_date,strain|strain',
@@ -99,13 +99,13 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
99
99
  ds_name = []
100
100
  File.open(tab, 'r') do |fh|
101
101
  fh.each_line do |ln|
102
- if ln =~ /^ {#{(rank_i - 1) * 2}}\S+:\S+:/
102
+ if ln =~ /^ {0,#{(rank_i - 1) * 2}}\S+:\S+:/
103
103
  in_rank = nil
104
104
  ds_name = []
105
105
  elsif ln =~ /^ {#{rank_i * 2}}(#{rank}:(\S+)):/
106
106
  in_rank = $2 == '?' ? nil : $1
107
107
  ds_name = []
108
- elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
108
+ elsif ln =~ /^ *# (\S+)/ && !in_rank.nil?
109
109
  ds_i = $1
110
110
  ds_name << ds_i
111
111
  ds_name.each do |ds_j|
@@ -81,21 +81,22 @@ module MiGA::Cli::Action::Wf
81
81
  cli[:aai_p] = 'blast+'
82
82
  cli[:ani_p] = 'blast+'
83
83
  end
84
- opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
84
+ opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani (default)') do
85
85
  cli[:aai_p] = 'diamond'
86
86
  cli[:ani_p] = 'fastani'
87
87
  end
88
88
  opt.on(
89
89
  '--haai-p STRING',
90
- 'hAAI search engine. One of: blast+ (default), fastaai, blat, diamond, no'
90
+ 'hAAI search engine. One of: blast+, fastaai, blat, diamond, fastaai, no',
91
+ 'The default is "no" for clade projects and "fastaai" otherwise'
91
92
  ) { |v| cli[:haai_p] = v }
92
93
  opt.on(
93
94
  '--aai-p STRING',
94
- 'AAI search engine. One of: blast+ (default), blat, diamond'
95
+ 'AAI search engine. One of: blast+, blat, diamond (default)'
95
96
  ) { |v| cli[:aai_p] = v }
96
97
  opt.on(
97
98
  '--ani-p STRING',
98
- 'ANI search engine. One of: blast+ (default), blat, fastani'
99
+ 'ANI search engine. One of: blast+, blat, fastani (default)'
99
100
  ) { |v| cli[:ani_p] = v }
100
101
  end
101
102
 
data/lib/miga/daemon.rb CHANGED
@@ -73,10 +73,10 @@ class MiGA::Daemon < MiGA::MiGA
73
73
  say 'MiGA:%s launched' % project.name
74
74
  say '-----------------------------------'
75
75
  miga_say "Saving log to: #{output_file}" unless show_log?
76
- queue_maintenance
77
- load_status
78
76
  say 'Configuration options:'
79
77
  say @runopts.to_s
78
+ load_status
79
+ queue_maintenance(true)
80
80
  end
81
81
 
82
82
  ##
@@ -87,6 +87,7 @@ class MiGA::Daemon < MiGA::MiGA
87
87
  check_datasets or check_project
88
88
  if shutdown_when_done? && (jobs_running.size + jobs_to_run.size).zero?
89
89
  say 'Nothing else to do, shutting down'
90
+ exit_cleanup
90
91
  return false
91
92
  end
92
93
  flush!
@@ -102,13 +103,19 @@ class MiGA::Daemon < MiGA::MiGA
102
103
 
103
104
  ##
104
105
  # Queue maintenance tasks as an analysis job
105
- def queue_maintenance
106
- return if bypass_maintenance? || shutdown_when_done?
106
+ def queue_maintenance(force = false)
107
+ return if bypass_maintenance? || (!force && shutdown_when_done?)
107
108
 
108
109
  say 'Queueing maintenance tasks'
109
110
  queue_job(:maintenance)
110
111
  end
111
112
 
113
+ ##
114
+ # Remove temporary files on completion
115
+ def exit_cleanup
116
+ FileUtils.rm_f(File.join(daemon_home, 'status.json'))
117
+ end
118
+
112
119
  ##
113
120
  # Send +msg+ to +say+ as long as +level+ is at most +verbosity+
114
121
  def l_say(level, *msg)
@@ -181,26 +181,30 @@ module MiGA::Dataset::Result
181
181
  add_files_to_ds_result(
182
182
  MiGA::Result.new("#{base}.json"), name,
183
183
  if result_files_exist?(base, '.2.clipped.fastq')
184
- {
185
- pair1: '.1.clipped.fastq',
186
- pair2: '.2.clipped.fastq',
187
- single: '.1.clipped.single.fastq'
188
- }
184
+ { pair1: '.1.clipped.fastq', pair2: '.2.clipped.fastq' }
189
185
  else
190
186
  { single: '.1.clipped.fastq' }
191
187
  end
192
188
  ).tap do |r|
189
+ # Legacy files
193
190
  r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
191
+ r.add_file(:single, "#{name}.1.clipped.single.fastq")
194
192
  end
195
193
  end
196
194
 
197
195
  ##
198
196
  # Add result type +:read_quality+ at +base+ (no +_opts+ supported)
199
197
  def add_result_read_quality(base, _opts)
200
- return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
198
+ return nil unless
199
+ result_files_exist?(base, %w[.post.1.html]) ||
200
+ result_files_exist?(base, %w[.solexaqa .fastqc])
201
201
 
202
202
  add_files_to_ds_result(
203
203
  MiGA::Result.new("#{base}.json"), name,
204
+ pre_qc_1: '.pre.1.html', pre_qc_2: '.pre.2.html',
205
+ post_qc_1: '.post.1.html', post_qc_2: '.post.2.html',
206
+ adapter_detection: '.adapters.txt',
207
+ # Legacy files
204
208
  solexaqa: '.solexaqa', fastqc: '.fastqc'
205
209
  )
206
210
  end
data/lib/miga/json.rb CHANGED
@@ -1,5 +1,4 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
3
2
 
4
3
  require 'json'
5
4
 
data/lib/miga/metadata.rb CHANGED
@@ -105,7 +105,11 @@ class MiGA::Metadata < MiGA::MiGA
105
105
  ##
106
106
  # Return the value of +k+ in #data
107
107
  def [](k)
108
- data[k.to_sym]
108
+ if k.to_s =~ /(.+):(.+)/
109
+ data[$1.to_sym]&.fetch($2)
110
+ else
111
+ data[k.to_sym]
112
+ end
109
113
  end
110
114
 
111
115
  ##
data/lib/miga/parallel.rb CHANGED
@@ -19,13 +19,18 @@ class MiGA::Parallel < MiGA::MiGA
19
19
  # 1. Unitary object from +enum+
20
20
  # 2. Index of the unitary object
21
21
  # 3. Index of the acting thread
22
- def distribute(enum, threads)
23
- process(threads) do |thr|
24
- enum.each_with_index do |obj, idx|
25
- yield(obj, idx, thr) if idx % threads == thr
26
- end
22
+ def distribute(enum, threads, &blk)
23
+ process(threads) { |thr| thread_enum(enum, threads, thr, &blk) }
24
+ end
25
+
26
+ ##
27
+ # Enum through +enum+ executing the passed block only for thread with index
28
+ # +thr+, one of +threads+ threads. The passed block has the same arguments
29
+ # as the one in +#distribute+
30
+ def thread_enum(enum, threads, thr)
31
+ enum.each_with_index do |obj, idx|
32
+ yield(obj, idx, thr) if idx % threads == thr
27
33
  end
28
34
  end
29
35
  end
30
36
  end
31
-
data/lib/miga/project.rb CHANGED
@@ -42,18 +42,18 @@ class MiGA::Project < MiGA::MiGA
42
42
  # Create an empty project
43
43
  def create
44
44
  unless MiGA::MiGA.initialized?
45
- raise 'Impossible to create project in uninitialized MiGA.'
45
+ warn 'Projects cannot be processed yet, first run: miga init'
46
46
  end
47
47
 
48
- dirs = [path] + @@FOLDERS.map { |d| "#{path}/#{d}" } +
49
- @@DATA_FOLDERS.map { |d| "#{path}/data/#{d}" }
50
- dirs.each { |d| Dir.mkdir(d) unless Dir.exist? d }
48
+ dirs = @@FOLDERS.map { |d| File.join(path, d) }
49
+ dirs += @@DATA_FOLDERS.map { |d| File.join(path, 'data', d) }
50
+ dirs.each { |d| FileUtils.mkdir_p(d) }
51
51
  @metadata = MiGA::Metadata.new(
52
- File.expand_path('miga.project.json', path),
53
- { datasets: [], name: File.basename(path) }
52
+ File.join(path, 'miga.project.json'),
53
+ datasets: [], name: File.basename(path)
54
54
  )
55
- d_path = File.expand_path('daemon/daemon.json', path)
56
- File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
55
+ d_path = File.join(path, 'daemon', 'daemon.json')
56
+ File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?(d_path)
57
57
  pull_hook :on_create
58
58
  self.load
59
59
  end
@@ -131,15 +131,15 @@ module MiGA::Project::Base
131
131
  },
132
132
  haai_p: {
133
133
  desc: 'Value of aai.rb -p on hAAI', type: String,
134
- default: proc { |project| project.clade? ? 'no' : 'blast+' },
135
- in: %w[fastaai blast+ blast blat diamond no]
134
+ default: proc { |project| project.clade? ? 'no' : 'fastaai' },
135
+ in: %w[blast+ blast blat diamond fastaai no]
136
136
  },
137
137
  aai_p: {
138
- desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,
138
+ desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
139
139
  in: %w[blast+ blast blat diamond]
140
140
  },
141
141
  ani_p: {
142
- desc: 'Value of ani.rb -p on ANI', default: 'blast+', type: String,
142
+ desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
143
143
  in: %w[blast+ blast blat fastani]
144
144
  },
145
145
  max_try: {