miga-base 0.2.0.6 → 0.2.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE +201 -0
  4. data/README.md +17 -335
  5. data/Rakefile +31 -0
  6. data/actions/add_result +2 -5
  7. data/actions/add_taxonomy +4 -7
  8. data/actions/create_dataset +5 -6
  9. data/actions/create_project +2 -5
  10. data/actions/daemon +2 -5
  11. data/actions/download_dataset +88 -58
  12. data/actions/find_datasets +36 -38
  13. data/actions/import_datasets +2 -5
  14. data/actions/index_taxonomy +2 -5
  15. data/actions/list_datasets +47 -49
  16. data/actions/list_files +7 -11
  17. data/actions/unlink_dataset +2 -5
  18. data/bin/miga +1 -1
  19. data/lib/miga/common.rb +132 -0
  20. data/lib/miga/daemon.rb +229 -168
  21. data/lib/miga/dataset.rb +354 -277
  22. data/lib/miga/gui.rb +346 -269
  23. data/lib/miga/metadata.rb +115 -71
  24. data/lib/miga/project.rb +361 -259
  25. data/lib/miga/remote_dataset.rb +200 -148
  26. data/lib/miga/result.rb +150 -99
  27. data/lib/miga/tax_index.rb +124 -67
  28. data/lib/miga/taxonomy.rb +129 -100
  29. data/lib/miga/version.rb +57 -0
  30. data/lib/miga.rb +2 -77
  31. data/scripts/_distances_noref_nomulti.bash +2 -0
  32. data/scripts/_distances_ref_nomulti.bash +2 -0
  33. data/scripts/aai_distances.bash +1 -0
  34. data/scripts/ani_distances.bash +1 -0
  35. data/scripts/assembly.bash +1 -0
  36. data/scripts/cds.bash +1 -0
  37. data/scripts/clade_finding.bash +17 -1
  38. data/scripts/distances.bash +1 -0
  39. data/scripts/essential_genes.bash +1 -0
  40. data/scripts/haai_distances.bash +1 -0
  41. data/scripts/init.bash +2 -0
  42. data/scripts/mytaxa.bash +1 -0
  43. data/scripts/mytaxa_scan.bash +1 -0
  44. data/scripts/ogs.bash +1 -0
  45. data/scripts/read_quality.bash +1 -0
  46. data/scripts/ssu.bash +1 -0
  47. data/scripts/subclades.bash +1 -0
  48. data/scripts/trimmed_fasta.bash +1 -0
  49. data/scripts/trimmed_reads.bash +1 -0
  50. data/test/common_test.rb +82 -0
  51. data/test/daemon_test.rb +53 -0
  52. data/test/dataset_test.rb +156 -0
  53. data/test/jruby_gui_test.rb +20 -0
  54. data/test/metadata_test.rb +48 -0
  55. data/test/project_test.rb +54 -0
  56. data/test/remote_dataset_test.rb +41 -0
  57. data/test/tax_index_test.rb +44 -0
  58. data/test/taxonomy_test.rb +36 -0
  59. data/test/test_helper.rb +32 -0
  60. metadata +53 -38
data/lib/miga/dataset.rb CHANGED
@@ -1,286 +1,363 @@
1
- #
2
1
  # @package MiGA
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Jan-18-2016
6
- #
2
+ # @license Artistic-2.0
7
3
 
8
4
  require "miga/metadata"
9
- require "miga/project"
10
5
  require "miga/result"
11
6
 
12
- module MiGA
13
- class Dataset
14
- # Class
15
- @@RESULT_DIRS = {
16
- # Preprocessing
17
- raw_reads: "01.raw_reads", trimmed_reads: "02.trimmed_reads",
18
- read_quality: "03.read_quality", trimmed_fasta: "04.trimmed_fasta",
19
- assembly: "05.assembly", cds: "06.cds",
20
- # Annotation
21
- essential_genes: "07.annotation/01.function/01.essential",
22
- ssu: "07.annotation/01.function/02.ssu",
23
- mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
24
- mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
25
- # Mapping
26
- mapping_on_contigs: "08.mapping/01.read-ctg",
27
- mapping_on_genes: "08.mapping/02.read-gene",
28
- # Distances (for single-species datasets)
29
- distances: "09.distances"
30
- }
31
- @@KNOWN_TYPES = {
32
- genome: {description: "The genome from an isolate.", multi: false},
33
- metagenome: {description: "A metagenome (excluding viromes).",
34
- multi: true},
35
- virome: {description: "A viral metagenome.", multi: true},
36
- scgenome: {description: "A genome from a single cell.", multi: false},
37
- popgenome: {description: "The genome of a population (including " +
38
- "microdiversity).", :multi=>false}
39
- }
40
- @@PREPROCESSING_TASKS = [:raw_reads, :trimmed_reads, :read_quality,
41
- :trimmed_fasta, :assembly, :cds, :essential_genes, :ssu, :mytaxa,
42
- :mytaxa_scan, :distances]
43
- @@EXCLUDE_NOREF_TASKS = [:essential_genes, :mytaxa_scan]
44
- @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances]
45
- @@ONLY_MULTI_TASKS = [:mytaxa]
46
- def self.PREPROCESSING_TASKS ; @@PREPROCESSING_TASKS ; end
47
- def self.RESULT_DIRS ; @@RESULT_DIRS end
48
- def self.KNOWN_TYPES ; @@KNOWN_TYPES end
49
- def self.exist?(project, name)
50
- File.exist? project.path + "/metadata/" + name + ".json"
51
- end
52
- def self.INFO_FIELDS
53
- %w(name created updated type ref user description comments)
54
- end
55
- # Instance
56
- attr_reader :project, :name, :metadata
57
- def initialize(project, name, is_ref=true, metadata={})
58
- abort "Invalid name '#{name}', please use only alphanumerics and " +
59
- "underscores." unless name.miga_name?
60
- @project = project
61
- @name = name
62
- metadata[:ref] = is_ref
63
- @metadata = Metadata.new(project.path + "/metadata/" + name + ".json",
64
- metadata)
65
- end
66
- def save
67
- self.metadata[:type] = :metagenome if !metadata[:tax].nil? and
68
- !metadata[:tax][:ns].nil? and
69
- metadata[:tax][:ns]=="COMMUNITY"
70
- self.metadata.save
71
- self.load
72
- end
73
- def load
74
- # Nothing here...
75
- end
76
- def remove!
77
- self.results.each{ |r| r.remove! }
78
- self.metadata.remove!
79
- end
80
- def info()
81
- Dataset.INFO_FIELDS.map do |k|
82
- (k=="name") ? self.name : self.metadata[k.to_sym]
83
- end
84
- end
85
- def is_ref?() !!self.metadata[:ref] end
86
- def is_multi?
87
- return false if self.metadata[:type].nil?
88
- return @@KNOWN_TYPES[self.metadata[:type]][:multi]
89
- end
90
- def is_nonmulti?
91
- return false if self.metadata[:type].nil?
92
- return !@@KNOWN_TYPES[self.metadata[:type]][:multi]
93
- end
94
- def result(k)
95
- return nil if @@RESULT_DIRS[k.to_sym].nil?
96
- Result.load(project.path + "/data/" + @@RESULT_DIRS[k.to_sym] +
97
- "/" + name + ".json")
98
- end
99
- def results() @@RESULT_DIRS.keys.map{ |k| self.result k }.compact end
100
- def each_result(&blk)
101
- @@RESULT_DIRS.keys.each do |k|
102
- v = self.result k
103
- blk.call(k,v) unless v.nil?
104
- end
105
- end
106
- def add_result result_type
107
- return nil if @@RESULT_DIRS[result_type].nil?
108
- base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
109
- "/" + name
110
- return nil unless File.exist? base + ".done"
111
- r = nil
112
- case result_type
113
- when :raw_reads
114
- return nil unless
115
- File.exist? base + ".1.fastq" or
116
- File.exist? base + ".1.fastq.gz"
117
- r = Result.new base + ".json"
118
- r.data[:gz] = File.exist?(base + ".1.fastq.gz")
119
- if File.exist? base + ".2.fastq" + (r.data[:gz] ? ".gz" : "")
120
- r.add_file :pair1, name + ".1.fastq"
121
- r.add_file :pair2, name + ".2.fastq"
122
- else
123
- r.add_file :single, name + ".1.fastq"
124
- end
125
- when :trimmed_reads
126
- return nil unless
127
- File.exist?(base + ".1.clipped.fastq") or
128
- File.exist?(base + ".1.clipped.fastq.gz")
129
- r = Result.new base + ".json"
130
- r.data[:gz] = File.exist?(base + ".1.clipped.fastq.gz")
131
- if File.exist? base + ".2.clipped.fastq" + (r.data[:gz] ? ".gz":"")
132
- r.add_file :pair1, name + ".1.clipped.fastq"
133
- r.add_file :pair2, name + ".2.clipped.fastq"
134
- end
135
- r.add_file :single, name + ".1.clipped.single.fastq"
136
- add_result :raw_reads #-> Post gunzip (if any)
137
- when :read_quality
138
- return nil unless
139
- Dir.exist?(base + ".solexaqa") and
140
- Dir.exist?(base + ".fastqc")
141
- r = Result.new base + ".json"
142
- r.add_file :solexaqa, self.name + ".solexaqa"
143
- r.add_file :fastqc, self.name + ".fastqc"
144
- add_result :trimmed_reads #-> Post cleaning
145
- when :trimmed_fasta
146
- return nil unless
147
- File.exist?(base + ".CoupledReads.fa") or
148
- File.exist?(base + ".SingleReads.fa")
149
- r = Result.new base + ".json"
150
- if File.exist?(base + ".CoupledReads.fa")
151
- r.add_file :coupled, name + ".CoupledReads.fa"
152
- r.add_file :pair1, name + ".1.fa"
153
- r.add_file :pair2, name + ".2.fa"
154
- end
155
- r.add_file :single, name + ".SingleReads.fa"
156
- add_result :raw_reads #-> Post gzip
157
- when :assembly
158
- return nil unless
159
- File.exist?(base + ".LargeContigs.fna")
160
- r = Result.new base + ".json"
161
- r.add_file :largecontigs, name + ".LargeContigs.fna"
162
- r.add_file :allcontigs, name + ".AllContigs.fna"
163
- when :cds
164
- return nil unless
165
- File.exist?(base + ".faa") and
166
- File.exist?(base + ".fna")
167
- r = Result.new base + ".json"
168
- r.add_file :proteins, name + ".faa"
169
- r.add_file :genes, name + ".fna"
170
- %w(gff2 gff3 tab).each do |ext|
171
- r.add_file ext, "#{name}.#{ext}"
172
- end
173
- when :essential_genes
174
- return nil unless
175
- File.exist?(base + ".ess.faa") and
176
- Dir.exist?(base + ".ess") and
177
- File.exist?(base + ".ess/log")
178
- r = Result.new base + ".json"
179
- r.add_file :ess_genes, name + ".ess.faa"
180
- r.add_file :collection, name + ".ess"
181
- r.add_file :report, name + ".ess/log"
182
- when :ssu
183
- if result(:assembly).nil?
184
- r = Result.new base + ".json"
185
- else
186
- return nil unless
187
- File.exist?(base + ".ssu.fa") or
188
- File.exist?(base + ".ssu.fa.gz")
189
- r = Result.new base + ".json"
190
- r.data[:gz] = File.exist?(base + ".ssu.fa.gz")
191
- r.add_file :longest_ssu_gene, name + ".ssu.fa"
192
- r.add_file :gff, name + ".ssu.gff"
193
- r.add_file :all_ssu_genes, name + ".ssu.all.fa"
194
- end
195
- when :mytaxa
196
- if is_multi?
197
- return nil unless File.exist?(base + ".mytaxa")
198
- r = Result.new base + ".json"
199
- r.data[:gz] = File.exist?(base + ".mytaxain.gz")
200
- r.add_file :mytaxa, name + ".mytaxa"
201
- r.add_file :blast, name + ".blast"
202
- r.add_file :mytaxain, name + ".mytaxain"
203
- else
204
- r = Result.new base + ".json"
205
- r.data[:files] = {}
206
- end
207
- when :mytaxa_scan
208
- if is_nonmulti?
209
- return nil unless
210
- File.exists?(base + ".pdf") and
211
- File.exist?(base + ".wintax") and
212
- File.exist?(base + ".mytaxa") and
213
- Dir.exist?(base + ".reg")
214
- r = Result.new base + ".json"
215
- r.add_file :mytaxa, name + ".mytaxa"
216
- r.add_file :wintax, name + ".wintax"
217
- r.add_file :report, name + ".pdf"
218
- r.add_file :regions, name + ".reg"
219
- r.add_file :gene_ids, name + ".wintax.genes"
220
- r.add_file :region_ids, name + ".wintax.regions"
221
- r.add_file :blast, name + ".blast"
222
- r.add_file :mytaxain, name + ".mytaxain"
223
- else
224
- r = Result.new base + ".json"
225
- r.data[:files] = {}
226
- end
227
- when :distances
228
- if is_nonmulti?
229
- pref = project.path + "/data/" + @@RESULT_DIRS[result_type]
230
- if is_ref?
231
- return nil unless
232
- File.exist?(pref + "/01.haai/" + name + ".db")
233
- else
234
- return nil unless
235
- File.exist?(pref + "/02.aai/" + name + ".db")
236
- end
237
- r = Result.new base + ".json"
238
- r.add_file :haai_db, "01.haai/" + name + ".db"
239
- r.add_file :aai_db, "02.aai/" + name + ".db"
240
- r.add_file :ani_db, "03.ani/" + name + ".db"
241
- else
242
- r = Result.new base + ".json"
243
- r.data[:files] = {}
244
- end
245
- end
246
- r.save
247
- r
248
- end # def add_result
249
- def first_preprocessing
250
- @@PREPROCESSING_TASKS.find{ |t| not self.add_result(t).nil? }
251
- end
252
- def next_preprocessing
253
- after_first = false
254
- first = self.first_preprocessing
255
- return nil if first.nil?
256
- @@PREPROCESSING_TASKS.each do |t|
257
- next if @@EXCLUDE_NOREF_TASKS.include?(t) and not is_ref?
258
- next if @@ONLY_MULTI_TASKS.include?(t) and not is_multi?
259
- next if @@ONLY_NONMULTI_TASKS.include?(t) and not is_nonmulti?
260
- return t if after_first and add_result(t).nil?
261
- after_first = (after_first or (t==first))
262
- end
263
- nil
7
+ ##
8
+ # Dataset representation in MiGA.
9
+ class MiGA::Dataset < MiGA::MiGA
10
+
11
+ # Class-level
12
+
13
+ ##
14
+ # Directories containing the results from dataset-specific tasks.
15
+ def self.RESULT_DIRS ; @@RESULT_DIRS end
16
+ @@RESULT_DIRS = {
17
+ # Preprocessing
18
+ raw_reads: "01.raw_reads", trimmed_reads: "02.trimmed_reads",
19
+ read_quality: "03.read_quality", trimmed_fasta: "04.trimmed_fasta",
20
+ assembly: "05.assembly", cds: "06.cds",
21
+ # Annotation
22
+ essential_genes: "07.annotation/01.function/01.essential",
23
+ ssu: "07.annotation/01.function/02.ssu",
24
+ mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
25
+ mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
26
+ # Mapping
27
+ mapping_on_contigs: "08.mapping/01.read-ctg",
28
+ mapping_on_genes: "08.mapping/02.read-gene",
29
+ # Distances (for single-species datasets)
30
+ distances: "09.distances"
31
+ }
32
+
33
+ ##
34
+ # Supported dataset types.
35
+ def self.KNOWN_TYPES ; @@KNOWN_TYPES end
36
+ @@KNOWN_TYPES = {
37
+ genome: {description: "The genome from an isolate.", multi: false},
38
+ metagenome: {description: "A metagenome (excluding viromes).",
39
+ multi: true},
40
+ virome: {description: "A viral metagenome.", multi: true},
41
+ scgenome: {description: "A genome from a single cell.", multi: false},
42
+ popgenome: {description: "The genome of a population (including " +
43
+ "microdiversity).", :multi=>false}
44
+ }
45
+
46
+ ##
47
+ # Returns an Array of tasks to be executed before project-wide tasks.
48
+ def self.PREPROCESSING_TASKS ; @@PREPROCESSING_TASKS ; end
49
+ @@PREPROCESSING_TASKS = [:raw_reads, :trimmed_reads, :read_quality,
50
+ :trimmed_fasta, :assembly, :cds, :essential_genes, :ssu, :mytaxa,
51
+ :mytaxa_scan, :distances]
52
+
53
+ ##
54
+ # Tasks to be excluded from query datasets.
55
+ @@EXCLUDE_NOREF_TASKS = [:essential_genes, :mytaxa_scan]
56
+
57
+ ##
58
+ # Tasks to be executed only in datasets that are not multi-organism. These
59
+ # tasks are ignored for multi-organism datasets or for unknown types.
60
+ @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances]
61
+
62
+ ##
63
+ # Tasks to be executed only in datasets that are multi-organism. These
64
+ # tasks are ignored for single-organism datasets or for unknwon types.
65
+ @@ONLY_MULTI_TASKS = [:mytaxa]
66
+
67
+ ##
68
+ # Does the +project+ already have a dataset with that +name+?
69
+ def self.exist?(project, name)
70
+ File.exist? project.path + "/metadata/" + name + ".json"
71
+ end
72
+
73
+ ##
74
+ # Standard fields of metadata for datasets.
75
+ def self.INFO_FIELDS
76
+ %w(name created updated type ref user description comments)
77
+ end
78
+
79
+ # Instance-level
80
+
81
+ ##
82
+ # MiGA::Project that contains the dataset.
83
+ attr_reader :project
84
+
85
+ ##
86
+ # Datasets are uniquely identified by +name+ in a project.
87
+ attr_reader :name
88
+
89
+ ##
90
+ # MiGA::Metadata with information about the dataset.
91
+ attr_reader :metadata
92
+
93
+ ##
94
+ # Create a MiGA::Dataset object in a +project+ MiGA::Project with a
95
+ # uniquely identifying +name+. +is_ref+ indicates if the dataset is to
96
+ # be treated as reference (true, default) or query (false). Pass any
97
+ # additional +metadata+ as a Hash.
98
+ def initialize(project, name, is_ref=true, metadata={})
99
+ raise "Invalid name '#{name}', please use only alphanumerics and " +
100
+ "underscores." unless name.miga_name?
101
+ @project = project
102
+ @name = name
103
+ metadata[:ref] = is_ref
104
+ @metadata = MiGA::Metadata.new(project.path + "/metadata/" + name + ".json",
105
+ metadata)
106
+ end
107
+
108
+ ##
109
+ # Save any changes you've made in the dataset.
110
+ def save
111
+ self.metadata[:type] = :metagenome if !metadata[:tax].nil? and
112
+ !metadata[:tax][:ns].nil? and metadata[:tax][:ns]=="COMMUNITY"
113
+ self.metadata.save
114
+ end
115
+
116
+ ##
117
+ # Delete the dataset with all it's contents (including results) and returns
118
+ # nil.
119
+ def remove!
120
+ self.results.each{ |r| r.remove! }
121
+ self.metadata.remove!
122
+ end
123
+
124
+ ##
125
+ # Get standard metadata values for the dataset as Array.
126
+ def info
127
+ MiGA::Dataset.INFO_FIELDS.map do |k|
128
+ (k=="name") ? self.name : self.metadata[k.to_sym]
129
+ end
130
+ end
131
+
132
+ ##
133
+ # Is this dataset a reference?
134
+ def is_ref? ; !!self.metadata[:ref] ; end
135
+
136
+ ##
137
+ # Is this dataset known to be multi-organism?
138
+ def is_multi?
139
+ return false if self.metadata[:type].nil?
140
+ return @@KNOWN_TYPES[self.metadata[:type]][:multi]
141
+ end
142
+
143
+ ##
144
+ # Is this dataset known to be single-organism?
145
+ def is_nonmulti?
146
+ return false if self.metadata[:type].nil?
147
+ return !@@KNOWN_TYPES[self.metadata[:type]][:multi]
148
+ end
149
+
150
+ ##
151
+ # Get the result MiGA::Result in this dataset identified by the symbol +k+.
152
+ def result(k)
153
+ return nil if @@RESULT_DIRS[k.to_sym].nil?
154
+ MiGA::Result.load(project.path + "/data/" + @@RESULT_DIRS[k.to_sym] +
155
+ "/" + name + ".json")
156
+ end
157
+
158
+ ##
159
+ # Get all the results (Array of MiGA::Result) in this dataset.
160
+ def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
161
+
162
+ ##
163
+ # For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
164
+ def each_result(&blk)
165
+ @@RESULT_DIRS.keys.each do |k|
166
+ blk.call(k, result(k)) unless result(k).nil?
167
+ end
168
+ end
169
+
170
+ ##
171
+ # Look for the result with symbol key +result_type+ and register it in the
172
+ # dataset. If +save+ is false, it doesn't register the result, but it still
173
+ # returns a result if the expected files are complete. Returns MiGA::Result
174
+ # or nil.
175
+ def add_result(result_type, save=true)
176
+ return nil if @@RESULT_DIRS[result_type].nil?
177
+ base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
178
+ "/" + name
179
+ return MiGA::Result.load(base + ".json") unless save
180
+ return nil unless result_files_exist?(base, ".done")
181
+ r = self.send("add_result_#{result_type}", base)
182
+ r.save unless r.nil?
183
+ r
184
+ end
185
+
186
+ ##
187
+ # Returns the key symbol of the first registered result (sorted by the
188
+ # execution order). This typically corresponds to the result used as the
189
+ # initial input. Passes +save+ to #add_result.
190
+ def first_preprocessing(save=false)
191
+ @@PREPROCESSING_TASKS.find{ |t| not add_result(t, save).nil? }
192
+ end
193
+
194
+ ##
195
+ # Returns the key symbol of the next task that needs to be executed. Passes
196
+ # +save+ to #add_result.
197
+ def next_preprocessing(save=false)
198
+ after_first = false
199
+ first = first_preprocessing(save)
200
+ return nil if first.nil?
201
+ @@PREPROCESSING_TASKS.each do |t|
202
+ next if ignore_task? t
203
+ return t if after_first and add_result(t, save).nil?
204
+ after_first = (after_first or (t==first))
205
+ end
206
+ nil
207
+ end
208
+
209
+ ##
210
+ # Should I ignore +task+ for this dataset?
211
+ def ignore_task?(task)
212
+ ( (@@EXCLUDE_NOREF_TASKS.include?(task) and not is_ref?) or
213
+ (@@ONLY_MULTI_TASKS.include?(task) and not is_multi?) or
214
+ (@@ONLY_NONMULTI_TASKS.include?(task) and not is_nonmulti?))
215
+ end
216
+
217
+ ##
218
+ # Are all the dataset-specific tasks done? Passes +save+ to #add_result.
219
+ def done_preprocessing?(save=false)
220
+ !first_preprocessing(save).nil? and next_preprocessing(save).nil?
221
+ end
222
+
223
+ ##
224
+ # Returns an array indicating the stage of each task (sorted by execution
225
+ # order). The values are integers:
226
+ # - 0 for an undefined result (a task before the initial input).
227
+ # - 1 for a registered result (a completed task).
228
+ # - 2 for a queued result (a task yet to be executed).
229
+ # It passes +save+ to #add_result
230
+ def profile_advance(save=false)
231
+ first_task = first_preprocessing(save)
232
+ return Array.new(@@PREPROCESSING_TASKS.size, 0) if first_task.nil?
233
+ adv = []
234
+ state = 0
235
+ next_task = next_preprocessing(save)
236
+ @@PREPROCESSING_TASKS.each do |task|
237
+ state = 1 if first_task==task
238
+ state = 2 if !next_task.nil? and next_task==task
239
+ adv << state
240
+ end
241
+ adv
242
+ end
243
+
244
+ private
245
+
246
+ def add_result_raw_reads(base)
247
+ return nil unless result_files_exist?(base, ".1.fastq")
248
+ r = MiGA::Result.new(base + ".json")
249
+ add_files_to_ds_result(r, name,
250
+ ( result_files_exist?(base, ".2.fastq") ?
251
+ {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
252
+ {:single=>".1.fastq"} ))
253
+ end
254
+
255
+ def add_result_trimmed_reads(base)
256
+ return nil unless result_files_exist?(base, ".1.clipped.fastq")
257
+ r = MiGA::Result.new base + ".json"
258
+ r = add_files_to_ds_result(r, name,
259
+ {:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
260
+ result_files_exist?(base, ".2.clipped.fastq")
261
+ r.add_file(:single, name + ".1.clipped.single.fastq")
262
+ add_result(:raw_reads) #-> Post gunzip
263
+ r
264
+ end
265
+
266
+ def add_result_read_quality(base)
267
+ return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
268
+ r = MiGA::Result.new(base + ".json")
269
+ r = add_files_to_ds_result(r, name,
270
+ {:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
271
+ add_result(:trimmed_reads) #-> Post cleaning
272
+ r
273
+ end
274
+
275
+ def add_result_trimmed_fasta(base)
276
+ return nil unless
277
+ result_files_exist?(base, ".CoupledReads.fa") or
278
+ result_files_exist?(base, ".SingleReads.fa")
279
+ r = MiGA::Result.new base + ".json"
280
+ r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
281
+ :pair1=>".1.fa", :pair2=>".2.fa"}) if
282
+ result_files_exist?(base, ".CoupledReads.fa")
283
+ r.add_file(:single, name + ".SingleReads.fa")
284
+ add_result(:raw_reads) #-> Post gzip
285
+ r
286
+ end
287
+
288
+ def add_result_assembly(base)
289
+ return nil unless result_files_exist?(base, ".LargeContigs.fna")
290
+ r = MiGA::Result.new(base + ".json")
291
+ add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
292
+ :allcontigs=>".AllContigs.fna"})
293
+ end
294
+
295
+ def add_result_cds(base)
296
+ return nil unless result_files_exist?(base, %w[.faa .fna])
297
+ r = MiGA::Result.new(base + ".json")
298
+ add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
299
+ :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
300
+ end
301
+
302
+ def add_result_essential_genes(base)
303
+ return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
304
+ r = MiGA::Result.new(base + ".json")
305
+ add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
306
+ :collection=>".ess", :report=>".ess/log"})
307
+ end
308
+
309
+ def add_result_ssu(base)
310
+ return MiGA::Result.new(base + ".json") if result(:assembly).nil?
311
+ return nil unless result_files_exist?(base, ".ssu.fa")
312
+ r = MiGA::Result.new(base + ".json")
313
+ add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
314
+ :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
315
+ end
316
+
317
+ def add_result_mytaxa(base)
318
+ if is_multi?
319
+ return nil unless result_files_exist?(base, ".mytaxa")
320
+ r = MiGA::Result.new(base + ".json")
321
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
322
+ :mytaxain=>".mytaxain"})
323
+ else
324
+ MiGA::Result.new base + ".json"
264
325
  end
265
- def done_preprocessing?
266
- !first_preprocessing.nil? and next_preprocessing.nil?
326
+ end
327
+
328
+ def add_result_mytaxa_scan(base)
329
+ if is_nonmulti?
330
+ return nil unless
331
+ result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
332
+ r = MiGA::Result.new(base + ".json")
333
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
334
+ :blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
335
+ :regions=>".reg", :gene_ids=>".wintax.genes",
336
+ :region_ids=>".wintax.regions"})
337
+ else
338
+ MiGA::Result.new base + ".json"
267
339
  end
268
- def profile_advance
269
- if first_preprocessing.nil?
270
- adv = Array.new(@@PREPROCESSING_TASKS.size, 0)
271
- else
272
- adv = []
273
- state = 0
274
- first_task = first_preprocessing
275
- next_task = next_preprocessing
276
- @@PREPROCESSING_TASKS.each do |task|
277
- state = 1 if first_task==task
278
- state = 2 if !next_task.nil? and next_task==task
279
- adv << state
280
- end
281
- end
282
- adv
340
+ end
341
+
342
+ def add_result_distances(base)
343
+ if is_nonmulti?
344
+ pref = File.dirname(base)
345
+ return nil unless
346
+ File.exist?("#{pref}/#{is_ref? ? "01.haai" : "02.aai"}/#{name}.db")
347
+ r = MiGA::Result.new(base + ".json")
348
+ r.add_files({:haai_db=>"01.haai/#{name}.db",
349
+ :aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
350
+ else
351
+ r = MiGA::Result.new "#{base}.json"
283
352
  end
284
- end # class Dataset
285
- end # module MiGA
353
+ r
354
+ end
355
+
356
+ def add_files_to_ds_result(r, name, rel_files)
357
+ files = {}
358
+ rel_files.each{ |k,v| files[k] = name + v }
359
+ r.add_files(files)
360
+ r
361
+ end
286
362
 
363
+ end # class MiGA::Dataset