miga-base 1.2.15.0 → 1.2.15.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,286 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiGA::Dataset::Result::Add
4
+ ##
5
+ # Add result type +:raw_reads+ at +base+ (no +_opts+ supported)
6
+ def add_result_raw_reads(base, _opts)
7
+ return nil unless result_files_exist?(base, '.1.fastq')
8
+
9
+ add_files_to_ds_result(
10
+ MiGA::Result.new("#{base}.json"), name,
11
+ if result_files_exist?(base, '.2.fastq')
12
+ { pair1: '.1.fastq', pair2: '.2.fastq' }
13
+ else
14
+ { single: '.1.fastq' }
15
+ end
16
+ )
17
+ end
18
+
19
+ ##
20
+ # Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported)
21
+ def add_result_trimmed_reads(base, _opts)
22
+ return nil unless result_files_exist?(base, '.1.clipped.fastq')
23
+
24
+ add_files_to_ds_result(
25
+ MiGA::Result.new("#{base}.json"), name,
26
+ if result_files_exist?(base, '.2.clipped.fastq')
27
+ { pair1: '.1.clipped.fastq', pair2: '.2.clipped.fastq' }
28
+ else
29
+ { single: '.1.clipped.fastq' }
30
+ end
31
+ ).tap do |r|
32
+ # Legacy files
33
+ r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
34
+ r.add_file(:single, "#{name}.1.clipped.single.fastq")
35
+ end
36
+ end
37
+
38
+ ##
39
+ # Add result type +:read_quality+ at +base+ (no +_opts+ supported)
40
+ def add_result_read_quality(base, _opts)
41
+ return nil unless
42
+ result_files_exist?(base, %w[.post.1.html]) ||
43
+ result_files_exist?(base, %w[.solexaqa .fastqc])
44
+
45
+ add_files_to_ds_result(
46
+ MiGA::Result.new("#{base}.json"), name,
47
+ pre_qc_1: '.pre.1.html', pre_qc_2: '.pre.2.html',
48
+ post_qc_1: '.post.1.html', post_qc_2: '.post.2.html',
49
+ adapter_detection: '.adapters.txt',
50
+ # Legacy files
51
+ solexaqa: '.solexaqa', fastqc: '.fastqc'
52
+ )
53
+ end
54
+
55
+ ##
56
+ # Add result type +:trimmed_fasta+ at +base+ (no +_opts+ supported)
57
+ def add_result_trimmed_fasta(base, _opts)
58
+ return nil unless
59
+ result_files_exist?(base, '.CoupledReads.fa') ||
60
+ result_files_exist?(base, '.SingleReads.fa') ||
61
+ result_files_exist?(base, %w[.1.fasta .2.fasta])
62
+
63
+ add_files_to_ds_result(
64
+ MiGA::Result.new("#{base}.json"), name,
65
+ coupled: '.CoupledReads.fa',
66
+ single: '.SingleReads.fa',
67
+ pair1: '.1.fasta',
68
+ pair2: '.2.fasta'
69
+ )
70
+ end
71
+
72
+ ##
73
+ # Add result type +:assembly+ at +base+. Hash +opts+ supports
74
+ # +is_clean: Boolean+.
75
+ def add_result_assembly(base, opts)
76
+ return nil unless result_files_exist?(base, '.LargeContigs.fna')
77
+
78
+ r = add_files_to_ds_result(
79
+ MiGA::Result.new("#{base}.json"), name,
80
+ largecontigs: '.LargeContigs.fna',
81
+ allcontigs: '.AllContigs.fna',
82
+ assembly_data: ''
83
+ )
84
+ opts[:is_clean] ||= false
85
+ r.clean! if opts[:is_clean]
86
+ unless r.clean?
87
+ MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
88
+ r.clean!
89
+ end
90
+ r
91
+ end
92
+
93
+ ##
94
+ # Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
95
+ def add_result_cds(base, opts)
96
+ return nil unless result_files_exist?(base, %w[.faa])
97
+
98
+ r = add_files_to_ds_result(
99
+ MiGA::Result.new("#{base}.json"), name,
100
+ proteins: '.faa',
101
+ genes: '.fna',
102
+ gff2: '.gff2',
103
+ gff3: '.gff3',
104
+ tab: '.tab'
105
+ )
106
+ opts[:is_clean] ||= false
107
+ r.clean! if opts[:is_clean]
108
+ unless r.clean?
109
+ MiGA::MiGA.clean_fasta_file(r.file_path(:proteins))
110
+ MiGA::MiGA.clean_fasta_file(r.file_path(:genes)) if r.file_path(:genes)
111
+ r.clean!
112
+ end
113
+ r
114
+ end
115
+
116
+ ##
117
+ # Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
118
+ def add_result_essential_genes(base, _opts)
119
+ return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
120
+
121
+ add_files_to_ds_result(
122
+ MiGA::Result.new("#{base}.json"), name,
123
+ ess_genes: '.ess.faa',
124
+ collection: '.ess',
125
+ report: '.ess/log',
126
+ alignments: '.ess/proteins.aln',
127
+ fastaai_index: '.faix.db.gz',
128
+ fastaai_index_2: '.faix'
129
+ )
130
+ end
131
+
132
+ ##
133
+ # Add result type +:ssu+ at +base+. Hash +opts+ supports +is_clean: Boolean+
134
+ def add_result_ssu(base, opts)
135
+ return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
136
+ return nil unless result_files_exist?(base, '.ssu.fa')
137
+
138
+ r = add_files_to_ds_result(
139
+ MiGA::Result.new("#{base}.json"), name,
140
+ longest_ssu_gene: '.ssu.fa',
141
+ ssu_gff: '.ssu.gff', # DEPRECATED
142
+ gff: '.gff',
143
+ all_ssu_genes: '.ssu.all.fa',
144
+ classification: '.rdp.tsv',
145
+ trna_list: '.trna.txt'
146
+ )
147
+ opts[:is_clean] ||= false
148
+ r.clean! if opts[:is_clean]
149
+ unless r.clean?
150
+ MiGA::MiGA.clean_fasta_file(r.file_path(:longest_ssu_gene))
151
+ r.clean!
152
+ end
153
+ r
154
+ end
155
+
156
+ ##
157
+ # Add result type +:mytaxa+ at +base+ (no +_opts+ supported)
158
+ def add_result_mytaxa(base, _opts)
159
+ if multi?
160
+ return nil unless
161
+ result_files_exist?(base, '.mytaxa') ||
162
+ result_files_exist?(base, '.nomytaxa.txt')
163
+
164
+ add_files_to_ds_result(
165
+ MiGA::Result.new("#{base}.json"), name,
166
+ mytaxa: '.mytaxa',
167
+ blast: '.blast',
168
+ mytaxain: '.mytaxain',
169
+ nomytaxa: '.nomytaxa.txt',
170
+ species: '.mytaxa.Species.txt',
171
+ genus: '.mytaxa.Genus.txt',
172
+ phylum: '.mytaxa.Phylum.txt',
173
+ innominate: '.mytaxa.innominate',
174
+ kronain: '.mytaxa.krona',
175
+ krona: '.html'
176
+ )
177
+ else
178
+ MiGA::Result.new("#{base}.json")
179
+ end
180
+ end
181
+
182
+ ##
183
+ # Add result type +:mytaxa_scan+ at +base+ (no +_opts+ supported)
184
+ def add_result_mytaxa_scan(base, _opts)
185
+ if nonmulti?
186
+ return nil unless
187
+ result_files_exist?(base, %w[.pdf .mytaxa]) ||
188
+ result_files_exist?(base, '.nomytaxa.txt')
189
+
190
+ add_files_to_ds_result(
191
+ MiGA::Result.new("#{base}.json"), name,
192
+ nomytaxa: '.nomytaxa.txt',
193
+ mytaxa: '.mytaxa',
194
+ report: '.pdf',
195
+ regions_archive: '.reg.tar',
196
+ # Intermediate / Deprecated:
197
+ blast: '.blast',
198
+ mytaxain: '.mytaxain',
199
+ wintax: '.wintax',
200
+ gene_ids: '.wintax.genes',
201
+ region_ids: '.wintax.regions',
202
+ regions: '.reg'
203
+ )
204
+ else
205
+ MiGA::Result.new("#{base}.json")
206
+ end
207
+ end
208
+
209
+ ##
210
+ # Add result type +:distances+ at +base+ (no +_opts+ supported)
211
+ def add_result_distances(base, _opts)
212
+ if nonmulti?
213
+ if ref?
214
+ add_result_distances_ref(base)
215
+ else
216
+ add_result_distances_nonref(base)
217
+ end
218
+ else
219
+ add_result_distances_multi(base)
220
+ end
221
+ end
222
+
223
+ ##
224
+ # Add result type +:taxonomy+ at +base+ (no +_opts+ supported)
225
+ def add_result_taxonomy(base, _opts)
226
+ add_result_distances_nonref(base)
227
+ end
228
+
229
+ ##
230
+ # Add result type +:stats+ at +base+ (no +_opts+ supported)
231
+ def add_result_stats(base, _opts)
232
+ MiGA::Result.new("#{base}.json")
233
+ end
234
+
235
+ private
236
+
237
+ ##
238
+ # Add result type +:distances+ for _multi_ datasets at +base+
239
+ def add_result_distances_multi(base)
240
+ MiGA::Result.new("#{base}.json")
241
+ end
242
+
243
+ ##
244
+ # Add result type +:distances+ for _nonmulti_ reference datasets at +base+
245
+ def add_result_distances_ref(base)
246
+ pref = File.dirname(base)
247
+ return nil unless File.exist?("#{pref}/01.haai/#{name}.db")
248
+
249
+ MiGA::Result.new("#{base}.json").tap do |r|
250
+ r.add_files(
251
+ haai_db: "01.haai/#{name}.db",
252
+ aai_db: "02.aai/#{name}.db",
253
+ ani_db: "03.ani/#{name}.db"
254
+ )
255
+ end
256
+ end
257
+
258
+ ##
259
+ # Add result type +:distances+ for _nonmulti_ query datasets at +base+
260
+ def add_result_distances_nonref(base)
261
+ return nil unless
262
+ result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) ||
263
+ result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
264
+
265
+ add_files_to_ds_result(
266
+ MiGA::Result.new("#{base}.json"), name,
267
+ aai_medoids: '.aai-medoids.tsv',
268
+ haai_db: '.haai.db',
269
+ aai_db: '.aai.db',
270
+ ani_medoids: '.ani-medoids.tsv',
271
+ ani_db: '.ani.db',
272
+ ref_tree: '.nwk',
273
+ ref_tree_pdf: '.nwk.pdf',
274
+ intax_test: '.intax.txt'
275
+ )
276
+ end
277
+
278
+ ##
279
+ # Add files in +rel_files+ Hash to the result +r+ with dataset name +name+
280
+ def add_files_to_ds_result(r, name, rel_files)
281
+ files = {}
282
+ rel_files.each { |k, v| files[k] = name + v }
283
+ r.add_files(files)
284
+ r
285
+ end
286
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiGA::Dataset::Result::Ignore
4
+ ##
5
+ # Should I ignore +task+ for this dataset?
6
+ def ignore_task?(task)
7
+ why_ignore(task) != :execute
8
+ end
9
+
10
+ ##
11
+ # Returns an array of symbols indicating all the possible reasons why a
12
+ # given task migh be ignored:
13
+ # - empty: the dataset has no data
14
+ # - inactive: the dataset is inactive
15
+ # - upstream: the task is upstream from dataset's input
16
+ # - force: forced to ignore by metadata
17
+ # - project: incompatible project
18
+ # - noref: incompatible dataset, only for reference
19
+ # - multi: incompatible dataset, only for multi
20
+ # - nonmulti: incompatible dataset, only for nonmulti
21
+ # - complete: the task is already complete
22
+ def ignore_reasons
23
+ %i[empty inactive upstream force project noref multi nonmulti complete]
24
+ end
25
+
26
+ ##
27
+ # Return a code explaining why a task is ignored (see +ignore_reasons+) or
28
+ # the symbol +:execute+ (do not ignore, execute the task)
29
+ def why_ignore(task)
30
+ # Find a reason to ignore it
31
+ ignore_reasons.each do |i|
32
+ return i if send(:"ignore_#{i}?", task)
33
+ end
34
+
35
+ # Otherwise, execute
36
+ return :execute
37
+ end
38
+
39
+ ##
40
+ # Ignore +task+ because it's already done
41
+ def ignore_complete?(task)
42
+ !get_result(task).nil?
43
+ end
44
+
45
+ ##
46
+ # Ignore any task because the dataset is inactive (+_task+ is ignored)
47
+ def ignore_inactive?(_task)
48
+ !active?
49
+ end
50
+
51
+ ##
52
+ # Ignore any task because the dataset is empty (+_task+ is ignored)
53
+ def ignore_empty?(_task)
54
+ first_preprocessing.nil?
55
+ end
56
+
57
+ ##
58
+ # Ignore +task+ because it's upstream from the entry point
59
+ def ignore_upstream?(task)
60
+ self.class.PREPROCESSING_TASKS.index(task) <
61
+ self.class.PREPROCESSING_TASKS.index(first_preprocessing)
62
+ end
63
+
64
+ ##
65
+ # Ignore +task+ because the metadata says so
66
+ def ignore_force?(task)
67
+ !(metadata["run_#{task}"].nil? || metadata["run_#{task}"])
68
+ end
69
+
70
+ ##
71
+ # Ignore +task+ because the project is not compatible
72
+ def ignore_project?(task)
73
+ task == :taxonomy && project.option(:ref_project).nil?
74
+ end
75
+
76
+ ##
77
+ # Ignore +task+ because it's not a reference dataset
78
+ def ignore_noref?(task)
79
+ self.class.EXCLUDE_NOREF_TASKS.include?(task) && !ref?
80
+ end
81
+
82
+ ##
83
+ # Ignore +task+ because it's not a multi dataset
84
+ def ignore_multi?(task)
85
+ self.class.ONLY_MULTI_TASKS.include?(task) && !multi?
86
+ end
87
+
88
+ ##
89
+ # Ignore +task+ because it's not a nonmulti dataset
90
+ def ignore_nonmulti?(task)
91
+ self.class.ONLY_NONMULTI_TASKS.include?(task) && !nonmulti?
92
+ end
93
+ end