miga-base 1.2.15.0 → 1.2.15.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'miga/result'
2
4
  require 'miga/dataset/base'
3
5
  require 'miga/common/with_result'
@@ -13,56 +15,17 @@ module MiGA::Dataset::Result
13
15
  include MiGA::Dataset::Base
14
16
  include MiGA::Common::WithResult
15
17
 
18
+ require 'miga/dataset/result/ignore'
19
+ require 'miga/dataset/result/add'
20
+ include MiGA::Dataset::Result::Ignore
21
+ include MiGA::Dataset::Result::Add
22
+
16
23
  ##
17
24
  # Return the basename for results
18
25
  def result_base
19
26
  name
20
27
  end
21
28
 
22
- ##
23
- # Should I ignore +task+ for this dataset?
24
- def ignore_task?(task)
25
- why_ignore(task) != :execute
26
- end
27
-
28
- ##
29
- # Return a code explaining why a task is ignored.
30
- # The values are symbols:
31
- # - empty: the dataset has no data
32
- # - inactive: the dataset is inactive
33
- # - upstream: the task is upstream from dataset's input
34
- # - force: forced to ignore by metadata
35
- # - project: incompatible project
36
- # - noref: incompatible dataset, only for reference
37
- # - multi: incompatible dataset, only for multi
38
- # - nonmulti: incompatible dataset, only for nonmulti
39
- # - complete: the task is already complete
40
- # - execute: do not ignore, execute the task
41
- def why_ignore(task)
42
- if !get_result(task).nil?
43
- :complete
44
- elsif !active?
45
- :inactive
46
- elsif first_preprocessing.nil?
47
- :empty
48
- elsif @@PREPROCESSING_TASKS.index(task) <
49
- @@PREPROCESSING_TASKS.index(first_preprocessing)
50
- :upstream
51
- elsif !metadata["run_#{task}"].nil?
52
- metadata["run_#{task}"] ? :execute : :force
53
- elsif task == :taxonomy && project.option(:ref_project).nil?
54
- :project
55
- elsif @@_EXCLUDE_NOREF_TASKS_H[task] && !ref?
56
- :noref
57
- elsif @@_ONLY_MULTI_TASKS_H[task] && !multi?
58
- :multi
59
- elsif @@_ONLY_NONMULTI_TASKS_H[task] && !nonmulti?
60
- :nonmulti
61
- else
62
- :execute
63
- end
64
- end
65
-
66
29
  ##
67
30
  # Returns the key symbol of the first registered result (sorted by the
68
31
  # execution order). This typically corresponds to the result used as the
@@ -95,18 +58,18 @@ module MiGA::Dataset::Result
95
58
  # - 2 for a queued result (a task yet to be executed).
96
59
  # It passes +save+ to #add_result
97
60
  def profile_advance(save = false)
98
- first_task = first_preprocessing(save)
99
- return Array.new(@@PREPROCESSING_TASKS.size, 0) if first_task.nil?
61
+ # Determine the start point
62
+ first_task = first_preprocessing(save) or
63
+ return Array.new(self.class.PREPROCESSING_TASKS.size, 0)
100
64
 
101
- adv = []
65
+ # Traverse all tasks and determine the corresponding state
102
66
  state = 0
103
67
  next_task = next_preprocessing(save)
104
- @@PREPROCESSING_TASKS.each do |task|
68
+ self.class.PREPROCESSING_TASKS.map do |task|
105
69
  state = 1 if first_task == task
106
70
  state = 2 if !next_task.nil? && next_task == task
107
- adv << state
71
+ state
108
72
  end
109
- adv
110
73
  end
111
74
 
112
75
  ##
@@ -125,9 +88,9 @@ module MiGA::Dataset::Result
125
88
  def result_status(task)
126
89
  reason = why_ignore(task)
127
90
  case reason
128
- when :upstream; :-
129
- when :execute; :pending
130
- when :complete; :complete
91
+ when :upstream then :-
92
+ when :execute then :pending
93
+ when :complete then :complete
131
94
  else; :"ignore_#{reason}"
132
95
  end
133
96
  end
@@ -136,306 +99,32 @@ module MiGA::Dataset::Result
136
99
  # Clean-up all the stored distances, removing values for datasets no longer in
137
100
  # the project as reference datasets.
138
101
  def cleanup_distances!
139
- r = get_result(:distances)
140
- return if r.nil?
102
+ return if get_result(:distances).nil?
141
103
 
142
104
  require 'miga/sqlite'
143
105
  ref = project.datasets.select(&:ref?).select(&:active?).map(&:name)
144
- %i[haai_db aai_db ani_db].each do |db_type|
145
- db = r.file_path(db_type)
146
- next if db.nil? || !File.size?(db)
147
-
148
- sqlite_db = MiGA::SQLite.new(db)
149
- table = db_type[-6..-4]
150
- val = sqlite_db.run("select seq2 from #{table}")
151
- next if val.empty?
152
-
153
- (val.map(&:first) - ref).each do |extra|
154
- sqlite_db.run("delete from #{table} where seq2=?", extra)
155
- end
106
+ %i[haai aai ani].each do |metric|
107
+ cleanup_distances_by_metric!(ref, metric)
156
108
  end
157
109
  end
158
110
 
159
111
  private
160
112
 
161
113
  ##
162
- # Add result type +:raw_reads+ at +base+ (no +_opts+ supported)
163
- def add_result_raw_reads(base, _opts)
164
- return nil unless result_files_exist?(base, '.1.fastq')
165
-
166
- add_files_to_ds_result(
167
- MiGA::Result.new("#{base}.json"), name,
168
- if result_files_exist?(base, '.2.fastq')
169
- { pair1: '.1.fastq', pair2: '.2.fastq' }
170
- else
171
- { single: '.1.fastq' }
172
- end
173
- )
174
- end
175
-
176
- ##
177
- # Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported)
178
- def add_result_trimmed_reads(base, _opts)
179
- return nil unless result_files_exist?(base, '.1.clipped.fastq')
180
-
181
- add_files_to_ds_result(
182
- MiGA::Result.new("#{base}.json"), name,
183
- if result_files_exist?(base, '.2.clipped.fastq')
184
- { pair1: '.1.clipped.fastq', pair2: '.2.clipped.fastq' }
185
- else
186
- { single: '.1.clipped.fastq' }
187
- end
188
- ).tap do |r|
189
- # Legacy files
190
- r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
191
- r.add_file(:single, "#{name}.1.clipped.single.fastq")
192
- end
193
- end
194
-
195
- ##
196
- # Add result type +:read_quality+ at +base+ (no +_opts+ supported)
197
- def add_result_read_quality(base, _opts)
198
- return nil unless
199
- result_files_exist?(base, %w[.post.1.html]) ||
200
- result_files_exist?(base, %w[.solexaqa .fastqc])
201
-
202
- add_files_to_ds_result(
203
- MiGA::Result.new("#{base}.json"), name,
204
- pre_qc_1: '.pre.1.html', pre_qc_2: '.pre.2.html',
205
- post_qc_1: '.post.1.html', post_qc_2: '.post.2.html',
206
- adapter_detection: '.adapters.txt',
207
- # Legacy files
208
- solexaqa: '.solexaqa', fastqc: '.fastqc'
209
- )
210
- end
211
-
212
- ##
213
- # Add result type +:trimmed_fasta+ at +base+ (no +_opts+ supported)
214
- def add_result_trimmed_fasta(base, _opts)
215
- return nil unless
216
- result_files_exist?(base, '.CoupledReads.fa') ||
217
- result_files_exist?(base, '.SingleReads.fa') ||
218
- result_files_exist?(base, %w[.1.fasta .2.fasta])
219
-
220
- add_files_to_ds_result(
221
- MiGA::Result.new("#{base}.json"), name,
222
- coupled: '.CoupledReads.fa',
223
- single: '.SingleReads.fa',
224
- pair1: '.1.fasta',
225
- pair2: '.2.fasta'
226
- )
227
- end
228
-
229
- ##
230
- # Add result type +:assembly+ at +base+. Hash +opts+ supports
231
- # +is_clean: Boolean+.
232
- def add_result_assembly(base, opts)
233
- return nil unless result_files_exist?(base, '.LargeContigs.fna')
234
-
235
- r = add_files_to_ds_result(
236
- MiGA::Result.new("#{base}.json"), name,
237
- largecontigs: '.LargeContigs.fna',
238
- allcontigs: '.AllContigs.fna',
239
- assembly_data: ''
240
- )
241
- opts[:is_clean] ||= false
242
- r.clean! if opts[:is_clean]
243
- unless r.clean?
244
- MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
245
- r.clean!
246
- end
247
- r
248
- end
249
-
250
- ##
251
- # Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
252
- def add_result_cds(base, opts)
253
- return nil unless result_files_exist?(base, %w[.faa])
254
-
255
- r = add_files_to_ds_result(
256
- MiGA::Result.new("#{base}.json"), name,
257
- proteins: '.faa',
258
- genes: '.fna',
259
- gff2: '.gff2',
260
- gff3: '.gff3',
261
- tab: '.tab'
262
- )
263
- opts[:is_clean] ||= false
264
- r.clean! if opts[:is_clean]
265
- unless r.clean?
266
- MiGA::MiGA.clean_fasta_file(r.file_path(:proteins))
267
- MiGA::MiGA.clean_fasta_file(r.file_path(:genes)) if r.file_path(:genes)
268
- r.clean!
269
- end
270
- r
271
- end
272
-
273
- ##
274
- # Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
275
- def add_result_essential_genes(base, _opts)
276
- return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
114
+ # Cleanup the tables of a specific +metric+ (symbol) removing all values
115
+ # against dataset names not in +ref+ (Array of string)
116
+ def cleanup_distances_by_metric!(ref, metric)
117
+ db_type = :"#{metric}_db"
118
+ db = get_result(:distances).file_path(db_type)
119
+ return if db.nil? || !File.size?(db)
277
120
 
278
- add_files_to_ds_result(
279
- MiGA::Result.new("#{base}.json"), name,
280
- ess_genes: '.ess.faa',
281
- collection: '.ess',
282
- report: '.ess/log',
283
- alignments: '.ess/proteins.aln',
284
- fastaai_index: '.faix.db.gz',
285
- fastaai_index_2: '.faix'
286
- )
287
- end
288
-
289
- ##
290
- # Add result type +:ssu+ at +base+. Hash +opts+ supports +is_clean: Boolean+
291
- def add_result_ssu(base, opts)
292
- return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
293
- return nil unless result_files_exist?(base, '.ssu.fa')
294
-
295
- r = add_files_to_ds_result(
296
- MiGA::Result.new("#{base}.json"), name,
297
- longest_ssu_gene: '.ssu.fa',
298
- ssu_gff: '.ssu.gff', # DEPRECATED
299
- gff: '.gff',
300
- all_ssu_genes: '.ssu.all.fa',
301
- classification: '.rdp.tsv',
302
- trna_list: '.trna.txt'
303
- )
304
- opts[:is_clean] ||= false
305
- r.clean! if opts[:is_clean]
306
- unless r.clean?
307
- MiGA::MiGA.clean_fasta_file(r.file_path(:longest_ssu_gene))
308
- r.clean!
309
- end
310
- r
311
- end
312
-
313
- ##
314
- # Add result type +:mytaxa+ at +base+ (no +_opts+ supported)
315
- def add_result_mytaxa(base, _opts)
316
- if multi?
317
- return nil unless
318
- result_files_exist?(base, '.mytaxa') ||
319
- result_files_exist?(base, '.nomytaxa.txt')
320
-
321
- add_files_to_ds_result(
322
- MiGA::Result.new("#{base}.json"), name,
323
- mytaxa: '.mytaxa',
324
- blast: '.blast',
325
- mytaxain: '.mytaxain',
326
- nomytaxa: '.nomytaxa.txt',
327
- species: '.mytaxa.Species.txt',
328
- genus: '.mytaxa.Genus.txt',
329
- phylum: '.mytaxa.Phylum.txt',
330
- innominate: '.mytaxa.innominate',
331
- kronain: '.mytaxa.krona',
332
- krona: '.html'
333
- )
334
- else
335
- MiGA::Result.new("#{base}.json")
336
- end
337
- end
338
-
339
- ##
340
- # Add result type +:mytaxa_scan+ at +base+ (no +_opts+ supported)
341
- def add_result_mytaxa_scan(base, _opts)
342
- if nonmulti?
343
- return nil unless
344
- result_files_exist?(base, %w[.pdf .mytaxa]) ||
345
- result_files_exist?(base, '.nomytaxa.txt')
346
-
347
- add_files_to_ds_result(
348
- MiGA::Result.new("#{base}.json"), name,
349
- nomytaxa: '.nomytaxa.txt',
350
- mytaxa: '.mytaxa',
351
- report: '.pdf',
352
- regions_archive: '.reg.tar',
353
- # Intermediate / Deprecated:
354
- blast: '.blast',
355
- mytaxain: '.mytaxain',
356
- wintax: '.wintax',
357
- gene_ids: '.wintax.genes',
358
- region_ids: '.wintax.regions',
359
- regions: '.reg'
360
- )
361
- else
362
- MiGA::Result.new("#{base}.json")
363
- end
364
- end
365
-
366
- ##
367
- # Add result type +:distances+ at +base+ (no +_opts+ supported)
368
- def add_result_distances(base, _opts)
369
- if nonmulti?
370
- if ref?
371
- add_result_distances_ref(base)
372
- else
373
- add_result_distances_nonref(base)
374
- end
375
- else
376
- add_result_distances_multi(base)
377
- end
378
- end
379
-
380
- ##
381
- # Add result type +:taxonomy+ at +base+ (no +_opts+ supported)
382
- def add_result_taxonomy(base, _opts)
383
- add_result_distances_nonref(base)
384
- end
385
-
386
- ##
387
- # Add result type +:stats+ at +base+ (no +_opts+ supported)
388
- def add_result_stats(base, _opts)
389
- MiGA::Result.new("#{base}.json")
390
- end
391
-
392
- ##
393
- # Add result type +:distances+ for _multi_ datasets at +base+
394
- def add_result_distances_multi(base)
395
- MiGA::Result.new("#{base}.json")
396
- end
121
+ sqlite_db = MiGA::SQLite.new(db)
122
+ table = db_type[-6..-4]
123
+ val = sqlite_db.run("select seq2 from #{table}")
124
+ return if val.empty?
397
125
 
398
- ##
399
- # Add result type +:distances+ for _nonmulti_ reference datasets at +base+
400
- def add_result_distances_ref(base)
401
- pref = File.dirname(base)
402
- return nil unless File.exist?("#{pref}/01.haai/#{name}.db")
403
-
404
- MiGA::Result.new("#{base}.json").tap do |r|
405
- r.add_files(
406
- haai_db: "01.haai/#{name}.db",
407
- aai_db: "02.aai/#{name}.db",
408
- ani_db: "03.ani/#{name}.db"
409
- )
126
+ (val.map(&:first) - ref).each do |extra|
127
+ sqlite_db.run("delete from #{table} where seq2=?", extra)
410
128
  end
411
129
  end
412
-
413
- ##
414
- # Add result type +:distances+ for _nonmulti_ query datasets at +base+
415
- def add_result_distances_nonref(base)
416
- return nil unless
417
- result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) ||
418
- result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
419
-
420
- add_files_to_ds_result(
421
- MiGA::Result.new("#{base}.json"), name,
422
- aai_medoids: '.aai-medoids.tsv',
423
- haai_db: '.haai.db',
424
- aai_db: '.aai.db',
425
- ani_medoids: '.ani-medoids.tsv',
426
- ani_db: '.ani.db',
427
- ref_tree: '.nwk',
428
- ref_tree_pdf: '.nwk.pdf',
429
- intax_test: '.intax.txt'
430
- )
431
- end
432
-
433
- ##
434
- # Add files in +rel_files+ Hash to the result +r+ with dataset name +name+
435
- def add_files_to_ds_result(r, name, rel_files)
436
- files = {}
437
- rel_files.each { |k, v| files[k] = name + v }
438
- r.add_files(files)
439
- r
440
- end
441
130
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.2, 15, 0].freeze
15
+ VERSION = [1.2, 15, 1].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2023, 1, 17)
23
+ VERSION_DATE = Date.new(2023, 1, 24)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -106,6 +106,12 @@ class RemoteDatasetTest < Test::Unit::TestCase
106
106
  assert { rd.get_metadata[:is_ref_type] }
107
107
  end
108
108
 
109
+ def test_missing_data
110
+ declare_remote_access
111
+ rd = MiGA::RemoteDataset.new('GCA_000484975.1', :assembly, :ncbi)
112
+ assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
113
+ end
114
+
109
115
  # This test is too expensive (too much time to run it!)
110
116
  # def test_net_timeout
111
117
  # declare_remote_access
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.15.0
4
+ version: 1.2.15.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-17 00:00:00.000000000 Z
11
+ date: 2023-01-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -141,6 +141,9 @@ files:
141
141
  - lib/miga/cli/action/derep_wf.rb
142
142
  - lib/miga/cli/action/doctor.rb
143
143
  - lib/miga/cli/action/doctor/base.rb
144
+ - lib/miga/cli/action/doctor/databases.rb
145
+ - lib/miga/cli/action/doctor/distances.rb
146
+ - lib/miga/cli/action/doctor/operations.rb
144
147
  - lib/miga/cli/action/download/base.rb
145
148
  - lib/miga/cli/action/download/gtdb.rb
146
149
  - lib/miga/cli/action/download/ncbi.rb
@@ -196,6 +199,8 @@ files:
196
199
  - lib/miga/dataset/base.rb
197
200
  - lib/miga/dataset/hooks.rb
198
201
  - lib/miga/dataset/result.rb
202
+ - lib/miga/dataset/result/add.rb
203
+ - lib/miga/dataset/result/ignore.rb
199
204
  - lib/miga/dataset/status.rb
200
205
  - lib/miga/json.rb
201
206
  - lib/miga/lair.rb