miga-base 0.2.0.6 → 0.2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE +201 -0
  4. data/README.md +17 -335
  5. data/Rakefile +31 -0
  6. data/actions/add_result +2 -5
  7. data/actions/add_taxonomy +4 -7
  8. data/actions/create_dataset +5 -6
  9. data/actions/create_project +2 -5
  10. data/actions/daemon +2 -5
  11. data/actions/download_dataset +88 -58
  12. data/actions/find_datasets +36 -38
  13. data/actions/import_datasets +2 -5
  14. data/actions/index_taxonomy +2 -5
  15. data/actions/list_datasets +47 -49
  16. data/actions/list_files +7 -11
  17. data/actions/unlink_dataset +2 -5
  18. data/bin/miga +1 -1
  19. data/lib/miga/common.rb +132 -0
  20. data/lib/miga/daemon.rb +229 -168
  21. data/lib/miga/dataset.rb +354 -277
  22. data/lib/miga/gui.rb +346 -269
  23. data/lib/miga/metadata.rb +115 -71
  24. data/lib/miga/project.rb +361 -259
  25. data/lib/miga/remote_dataset.rb +200 -148
  26. data/lib/miga/result.rb +150 -99
  27. data/lib/miga/tax_index.rb +124 -67
  28. data/lib/miga/taxonomy.rb +129 -100
  29. data/lib/miga/version.rb +57 -0
  30. data/lib/miga.rb +2 -77
  31. data/scripts/_distances_noref_nomulti.bash +2 -0
  32. data/scripts/_distances_ref_nomulti.bash +2 -0
  33. data/scripts/aai_distances.bash +1 -0
  34. data/scripts/ani_distances.bash +1 -0
  35. data/scripts/assembly.bash +1 -0
  36. data/scripts/cds.bash +1 -0
  37. data/scripts/clade_finding.bash +17 -1
  38. data/scripts/distances.bash +1 -0
  39. data/scripts/essential_genes.bash +1 -0
  40. data/scripts/haai_distances.bash +1 -0
  41. data/scripts/init.bash +2 -0
  42. data/scripts/mytaxa.bash +1 -0
  43. data/scripts/mytaxa_scan.bash +1 -0
  44. data/scripts/ogs.bash +1 -0
  45. data/scripts/read_quality.bash +1 -0
  46. data/scripts/ssu.bash +1 -0
  47. data/scripts/subclades.bash +1 -0
  48. data/scripts/trimmed_fasta.bash +1 -0
  49. data/scripts/trimmed_reads.bash +1 -0
  50. data/test/common_test.rb +82 -0
  51. data/test/daemon_test.rb +53 -0
  52. data/test/dataset_test.rb +156 -0
  53. data/test/jruby_gui_test.rb +20 -0
  54. data/test/metadata_test.rb +48 -0
  55. data/test/project_test.rb +54 -0
  56. data/test/remote_dataset_test.rb +41 -0
  57. data/test/tax_index_test.rb +44 -0
  58. data/test/taxonomy_test.rb +36 -0
  59. data/test/test_helper.rb +32 -0
  60. metadata +53 -38
data/lib/miga/dataset.rb CHANGED
@@ -1,286 +1,363 @@
1
- #
2
1
  # @package MiGA
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Jan-18-2016
6
- #
2
+ # @license Artistic-2.0
7
3
 
8
4
  require "miga/metadata"
9
- require "miga/project"
10
5
  require "miga/result"
11
6
 
12
- module MiGA
13
- class Dataset
14
- # Class
15
- @@RESULT_DIRS = {
16
- # Preprocessing
17
- raw_reads: "01.raw_reads", trimmed_reads: "02.trimmed_reads",
18
- read_quality: "03.read_quality", trimmed_fasta: "04.trimmed_fasta",
19
- assembly: "05.assembly", cds: "06.cds",
20
- # Annotation
21
- essential_genes: "07.annotation/01.function/01.essential",
22
- ssu: "07.annotation/01.function/02.ssu",
23
- mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
24
- mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
25
- # Mapping
26
- mapping_on_contigs: "08.mapping/01.read-ctg",
27
- mapping_on_genes: "08.mapping/02.read-gene",
28
- # Distances (for single-species datasets)
29
- distances: "09.distances"
30
- }
31
- @@KNOWN_TYPES = {
32
- genome: {description: "The genome from an isolate.", multi: false},
33
- metagenome: {description: "A metagenome (excluding viromes).",
34
- multi: true},
35
- virome: {description: "A viral metagenome.", multi: true},
36
- scgenome: {description: "A genome from a single cell.", multi: false},
37
- popgenome: {description: "The genome of a population (including " +
38
- "microdiversity).", :multi=>false}
39
- }
40
- @@PREPROCESSING_TASKS = [:raw_reads, :trimmed_reads, :read_quality,
41
- :trimmed_fasta, :assembly, :cds, :essential_genes, :ssu, :mytaxa,
42
- :mytaxa_scan, :distances]
43
- @@EXCLUDE_NOREF_TASKS = [:essential_genes, :mytaxa_scan]
44
- @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances]
45
- @@ONLY_MULTI_TASKS = [:mytaxa]
46
- def self.PREPROCESSING_TASKS ; @@PREPROCESSING_TASKS ; end
47
- def self.RESULT_DIRS ; @@RESULT_DIRS end
48
- def self.KNOWN_TYPES ; @@KNOWN_TYPES end
49
- def self.exist?(project, name)
50
- File.exist? project.path + "/metadata/" + name + ".json"
51
- end
52
- def self.INFO_FIELDS
53
- %w(name created updated type ref user description comments)
54
- end
55
- # Instance
56
- attr_reader :project, :name, :metadata
57
- def initialize(project, name, is_ref=true, metadata={})
58
- abort "Invalid name '#{name}', please use only alphanumerics and " +
59
- "underscores." unless name.miga_name?
60
- @project = project
61
- @name = name
62
- metadata[:ref] = is_ref
63
- @metadata = Metadata.new(project.path + "/metadata/" + name + ".json",
64
- metadata)
65
- end
66
- def save
67
- self.metadata[:type] = :metagenome if !metadata[:tax].nil? and
68
- !metadata[:tax][:ns].nil? and
69
- metadata[:tax][:ns]=="COMMUNITY"
70
- self.metadata.save
71
- self.load
72
- end
73
- def load
74
- # Nothing here...
75
- end
76
- def remove!
77
- self.results.each{ |r| r.remove! }
78
- self.metadata.remove!
79
- end
80
- def info()
81
- Dataset.INFO_FIELDS.map do |k|
82
- (k=="name") ? self.name : self.metadata[k.to_sym]
83
- end
84
- end
85
- def is_ref?() !!self.metadata[:ref] end
86
- def is_multi?
87
- return false if self.metadata[:type].nil?
88
- return @@KNOWN_TYPES[self.metadata[:type]][:multi]
89
- end
90
- def is_nonmulti?
91
- return false if self.metadata[:type].nil?
92
- return !@@KNOWN_TYPES[self.metadata[:type]][:multi]
93
- end
94
- def result(k)
95
- return nil if @@RESULT_DIRS[k.to_sym].nil?
96
- Result.load(project.path + "/data/" + @@RESULT_DIRS[k.to_sym] +
97
- "/" + name + ".json")
98
- end
99
- def results() @@RESULT_DIRS.keys.map{ |k| self.result k }.compact end
100
- def each_result(&blk)
101
- @@RESULT_DIRS.keys.each do |k|
102
- v = self.result k
103
- blk.call(k,v) unless v.nil?
104
- end
105
- end
106
- def add_result result_type
107
- return nil if @@RESULT_DIRS[result_type].nil?
108
- base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
109
- "/" + name
110
- return nil unless File.exist? base + ".done"
111
- r = nil
112
- case result_type
113
- when :raw_reads
114
- return nil unless
115
- File.exist? base + ".1.fastq" or
116
- File.exist? base + ".1.fastq.gz"
117
- r = Result.new base + ".json"
118
- r.data[:gz] = File.exist?(base + ".1.fastq.gz")
119
- if File.exist? base + ".2.fastq" + (r.data[:gz] ? ".gz" : "")
120
- r.add_file :pair1, name + ".1.fastq"
121
- r.add_file :pair2, name + ".2.fastq"
122
- else
123
- r.add_file :single, name + ".1.fastq"
124
- end
125
- when :trimmed_reads
126
- return nil unless
127
- File.exist?(base + ".1.clipped.fastq") or
128
- File.exist?(base + ".1.clipped.fastq.gz")
129
- r = Result.new base + ".json"
130
- r.data[:gz] = File.exist?(base + ".1.clipped.fastq.gz")
131
- if File.exist? base + ".2.clipped.fastq" + (r.data[:gz] ? ".gz":"")
132
- r.add_file :pair1, name + ".1.clipped.fastq"
133
- r.add_file :pair2, name + ".2.clipped.fastq"
134
- end
135
- r.add_file :single, name + ".1.clipped.single.fastq"
136
- add_result :raw_reads #-> Post gunzip (if any)
137
- when :read_quality
138
- return nil unless
139
- Dir.exist?(base + ".solexaqa") and
140
- Dir.exist?(base + ".fastqc")
141
- r = Result.new base + ".json"
142
- r.add_file :solexaqa, self.name + ".solexaqa"
143
- r.add_file :fastqc, self.name + ".fastqc"
144
- add_result :trimmed_reads #-> Post cleaning
145
- when :trimmed_fasta
146
- return nil unless
147
- File.exist?(base + ".CoupledReads.fa") or
148
- File.exist?(base + ".SingleReads.fa")
149
- r = Result.new base + ".json"
150
- if File.exist?(base + ".CoupledReads.fa")
151
- r.add_file :coupled, name + ".CoupledReads.fa"
152
- r.add_file :pair1, name + ".1.fa"
153
- r.add_file :pair2, name + ".2.fa"
154
- end
155
- r.add_file :single, name + ".SingleReads.fa"
156
- add_result :raw_reads #-> Post gzip
157
- when :assembly
158
- return nil unless
159
- File.exist?(base + ".LargeContigs.fna")
160
- r = Result.new base + ".json"
161
- r.add_file :largecontigs, name + ".LargeContigs.fna"
162
- r.add_file :allcontigs, name + ".AllContigs.fna"
163
- when :cds
164
- return nil unless
165
- File.exist?(base + ".faa") and
166
- File.exist?(base + ".fna")
167
- r = Result.new base + ".json"
168
- r.add_file :proteins, name + ".faa"
169
- r.add_file :genes, name + ".fna"
170
- %w(gff2 gff3 tab).each do |ext|
171
- r.add_file ext, "#{name}.#{ext}"
172
- end
173
- when :essential_genes
174
- return nil unless
175
- File.exist?(base + ".ess.faa") and
176
- Dir.exist?(base + ".ess") and
177
- File.exist?(base + ".ess/log")
178
- r = Result.new base + ".json"
179
- r.add_file :ess_genes, name + ".ess.faa"
180
- r.add_file :collection, name + ".ess"
181
- r.add_file :report, name + ".ess/log"
182
- when :ssu
183
- if result(:assembly).nil?
184
- r = Result.new base + ".json"
185
- else
186
- return nil unless
187
- File.exist?(base + ".ssu.fa") or
188
- File.exist?(base + ".ssu.fa.gz")
189
- r = Result.new base + ".json"
190
- r.data[:gz] = File.exist?(base + ".ssu.fa.gz")
191
- r.add_file :longest_ssu_gene, name + ".ssu.fa"
192
- r.add_file :gff, name + ".ssu.gff"
193
- r.add_file :all_ssu_genes, name + ".ssu.all.fa"
194
- end
195
- when :mytaxa
196
- if is_multi?
197
- return nil unless File.exist?(base + ".mytaxa")
198
- r = Result.new base + ".json"
199
- r.data[:gz] = File.exist?(base + ".mytaxain.gz")
200
- r.add_file :mytaxa, name + ".mytaxa"
201
- r.add_file :blast, name + ".blast"
202
- r.add_file :mytaxain, name + ".mytaxain"
203
- else
204
- r = Result.new base + ".json"
205
- r.data[:files] = {}
206
- end
207
- when :mytaxa_scan
208
- if is_nonmulti?
209
- return nil unless
210
- File.exists?(base + ".pdf") and
211
- File.exist?(base + ".wintax") and
212
- File.exist?(base + ".mytaxa") and
213
- Dir.exist?(base + ".reg")
214
- r = Result.new base + ".json"
215
- r.add_file :mytaxa, name + ".mytaxa"
216
- r.add_file :wintax, name + ".wintax"
217
- r.add_file :report, name + ".pdf"
218
- r.add_file :regions, name + ".reg"
219
- r.add_file :gene_ids, name + ".wintax.genes"
220
- r.add_file :region_ids, name + ".wintax.regions"
221
- r.add_file :blast, name + ".blast"
222
- r.add_file :mytaxain, name + ".mytaxain"
223
- else
224
- r = Result.new base + ".json"
225
- r.data[:files] = {}
226
- end
227
- when :distances
228
- if is_nonmulti?
229
- pref = project.path + "/data/" + @@RESULT_DIRS[result_type]
230
- if is_ref?
231
- return nil unless
232
- File.exist?(pref + "/01.haai/" + name + ".db")
233
- else
234
- return nil unless
235
- File.exist?(pref + "/02.aai/" + name + ".db")
236
- end
237
- r = Result.new base + ".json"
238
- r.add_file :haai_db, "01.haai/" + name + ".db"
239
- r.add_file :aai_db, "02.aai/" + name + ".db"
240
- r.add_file :ani_db, "03.ani/" + name + ".db"
241
- else
242
- r = Result.new base + ".json"
243
- r.data[:files] = {}
244
- end
245
- end
246
- r.save
247
- r
248
- end # def add_result
249
- def first_preprocessing
250
- @@PREPROCESSING_TASKS.find{ |t| not self.add_result(t).nil? }
251
- end
252
- def next_preprocessing
253
- after_first = false
254
- first = self.first_preprocessing
255
- return nil if first.nil?
256
- @@PREPROCESSING_TASKS.each do |t|
257
- next if @@EXCLUDE_NOREF_TASKS.include?(t) and not is_ref?
258
- next if @@ONLY_MULTI_TASKS.include?(t) and not is_multi?
259
- next if @@ONLY_NONMULTI_TASKS.include?(t) and not is_nonmulti?
260
- return t if after_first and add_result(t).nil?
261
- after_first = (after_first or (t==first))
262
- end
263
- nil
7
+ ##
8
+ # Dataset representation in MiGA.
9
+ class MiGA::Dataset < MiGA::MiGA
10
+
11
+ # Class-level
12
+
13
+ ##
14
+ # Directories containing the results from dataset-specific tasks.
15
+ def self.RESULT_DIRS ; @@RESULT_DIRS end
16
+ @@RESULT_DIRS = {
17
+ # Preprocessing
18
+ raw_reads: "01.raw_reads", trimmed_reads: "02.trimmed_reads",
19
+ read_quality: "03.read_quality", trimmed_fasta: "04.trimmed_fasta",
20
+ assembly: "05.assembly", cds: "06.cds",
21
+ # Annotation
22
+ essential_genes: "07.annotation/01.function/01.essential",
23
+ ssu: "07.annotation/01.function/02.ssu",
24
+ mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
25
+ mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
26
+ # Mapping
27
+ mapping_on_contigs: "08.mapping/01.read-ctg",
28
+ mapping_on_genes: "08.mapping/02.read-gene",
29
+ # Distances (for single-species datasets)
30
+ distances: "09.distances"
31
+ }
32
+
33
+ ##
34
+ # Supported dataset types.
35
+ def self.KNOWN_TYPES ; @@KNOWN_TYPES end
36
+ @@KNOWN_TYPES = {
37
+ genome: {description: "The genome from an isolate.", multi: false},
38
+ metagenome: {description: "A metagenome (excluding viromes).",
39
+ multi: true},
40
+ virome: {description: "A viral metagenome.", multi: true},
41
+ scgenome: {description: "A genome from a single cell.", multi: false},
42
+ popgenome: {description: "The genome of a population (including " +
43
+ "microdiversity).", :multi=>false}
44
+ }
45
+
46
+ ##
47
+ # Returns an Array of tasks to be executed before project-wide tasks.
48
+ def self.PREPROCESSING_TASKS ; @@PREPROCESSING_TASKS ; end
49
+ @@PREPROCESSING_TASKS = [:raw_reads, :trimmed_reads, :read_quality,
50
+ :trimmed_fasta, :assembly, :cds, :essential_genes, :ssu, :mytaxa,
51
+ :mytaxa_scan, :distances]
52
+
53
+ ##
54
+ # Tasks to be excluded from query datasets.
55
+ @@EXCLUDE_NOREF_TASKS = [:essential_genes, :mytaxa_scan]
56
+
57
+ ##
58
+ # Tasks to be executed only in datasets that are not multi-organism. These
59
+ # tasks are ignored for multi-organism datasets or for unknown types.
60
+ @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances]
61
+
62
+ ##
63
+ # Tasks to be executed only in datasets that are multi-organism. These
64
+ # tasks are ignored for single-organism datasets or for unknwon types.
65
+ @@ONLY_MULTI_TASKS = [:mytaxa]
66
+
67
+ ##
68
+ # Does the +project+ already have a dataset with that +name+?
69
+ def self.exist?(project, name)
70
+ File.exist? project.path + "/metadata/" + name + ".json"
71
+ end
72
+
73
+ ##
74
+ # Standard fields of metadata for datasets.
75
+ def self.INFO_FIELDS
76
+ %w(name created updated type ref user description comments)
77
+ end
78
+
79
+ # Instance-level
80
+
81
+ ##
82
+ # MiGA::Project that contains the dataset.
83
+ attr_reader :project
84
+
85
+ ##
86
+ # Datasets are uniquely identified by +name+ in a project.
87
+ attr_reader :name
88
+
89
+ ##
90
+ # MiGA::Metadata with information about the dataset.
91
+ attr_reader :metadata
92
+
93
+ ##
94
+ # Create a MiGA::Dataset object in a +project+ MiGA::Project with a
95
+ # uniquely identifying +name+. +is_ref+ indicates if the dataset is to
96
+ # be treated as reference (true, default) or query (false). Pass any
97
+ # additional +metadata+ as a Hash.
98
+ def initialize(project, name, is_ref=true, metadata={})
99
+ raise "Invalid name '#{name}', please use only alphanumerics and " +
100
+ "underscores." unless name.miga_name?
101
+ @project = project
102
+ @name = name
103
+ metadata[:ref] = is_ref
104
+ @metadata = MiGA::Metadata.new(project.path + "/metadata/" + name + ".json",
105
+ metadata)
106
+ end
107
+
108
+ ##
109
+ # Save any changes you've made in the dataset.
110
+ def save
111
+ self.metadata[:type] = :metagenome if !metadata[:tax].nil? and
112
+ !metadata[:tax][:ns].nil? and metadata[:tax][:ns]=="COMMUNITY"
113
+ self.metadata.save
114
+ end
115
+
116
+ ##
117
+ # Delete the dataset with all it's contents (including results) and returns
118
+ # nil.
119
+ def remove!
120
+ self.results.each{ |r| r.remove! }
121
+ self.metadata.remove!
122
+ end
123
+
124
+ ##
125
+ # Get standard metadata values for the dataset as Array.
126
+ def info
127
+ MiGA::Dataset.INFO_FIELDS.map do |k|
128
+ (k=="name") ? self.name : self.metadata[k.to_sym]
129
+ end
130
+ end
131
+
132
+ ##
133
+ # Is this dataset a reference?
134
+ def is_ref? ; !!self.metadata[:ref] ; end
135
+
136
+ ##
137
+ # Is this dataset known to be multi-organism?
138
+ def is_multi?
139
+ return false if self.metadata[:type].nil?
140
+ return @@KNOWN_TYPES[self.metadata[:type]][:multi]
141
+ end
142
+
143
+ ##
144
+ # Is this dataset known to be single-organism?
145
+ def is_nonmulti?
146
+ return false if self.metadata[:type].nil?
147
+ return !@@KNOWN_TYPES[self.metadata[:type]][:multi]
148
+ end
149
+
150
+ ##
151
+ # Get the result MiGA::Result in this dataset identified by the symbol +k+.
152
+ def result(k)
153
+ return nil if @@RESULT_DIRS[k.to_sym].nil?
154
+ MiGA::Result.load(project.path + "/data/" + @@RESULT_DIRS[k.to_sym] +
155
+ "/" + name + ".json")
156
+ end
157
+
158
+ ##
159
+ # Get all the results (Array of MiGA::Result) in this dataset.
160
+ def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
161
+
162
+ ##
163
+ # For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
164
+ def each_result(&blk)
165
+ @@RESULT_DIRS.keys.each do |k|
166
+ blk.call(k, result(k)) unless result(k).nil?
167
+ end
168
+ end
169
+
170
+ ##
171
+ # Look for the result with symbol key +result_type+ and register it in the
172
+ # dataset. If +save+ is false, it doesn't register the result, but it still
173
+ # returns a result if the expected files are complete. Returns MiGA::Result
174
+ # or nil.
175
+ def add_result(result_type, save=true)
176
+ return nil if @@RESULT_DIRS[result_type].nil?
177
+ base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
178
+ "/" + name
179
+ return MiGA::Result.load(base + ".json") unless save
180
+ return nil unless result_files_exist?(base, ".done")
181
+ r = self.send("add_result_#{result_type}", base)
182
+ r.save unless r.nil?
183
+ r
184
+ end
185
+
186
+ ##
187
+ # Returns the key symbol of the first registered result (sorted by the
188
+ # execution order). This typically corresponds to the result used as the
189
+ # initial input. Passes +save+ to #add_result.
190
+ def first_preprocessing(save=false)
191
+ @@PREPROCESSING_TASKS.find{ |t| not add_result(t, save).nil? }
192
+ end
193
+
194
+ ##
195
+ # Returns the key symbol of the next task that needs to be executed. Passes
196
+ # +save+ to #add_result.
197
+ def next_preprocessing(save=false)
198
+ after_first = false
199
+ first = first_preprocessing(save)
200
+ return nil if first.nil?
201
+ @@PREPROCESSING_TASKS.each do |t|
202
+ next if ignore_task? t
203
+ return t if after_first and add_result(t, save).nil?
204
+ after_first = (after_first or (t==first))
205
+ end
206
+ nil
207
+ end
208
+
209
+ ##
210
+ # Should I ignore +task+ for this dataset?
211
+ def ignore_task?(task)
212
+ ( (@@EXCLUDE_NOREF_TASKS.include?(task) and not is_ref?) or
213
+ (@@ONLY_MULTI_TASKS.include?(task) and not is_multi?) or
214
+ (@@ONLY_NONMULTI_TASKS.include?(task) and not is_nonmulti?))
215
+ end
216
+
217
+ ##
218
+ # Are all the dataset-specific tasks done? Passes +save+ to #add_result.
219
+ def done_preprocessing?(save=false)
220
+ !first_preprocessing(save).nil? and next_preprocessing(save).nil?
221
+ end
222
+
223
+ ##
224
+ # Returns an array indicating the stage of each task (sorted by execution
225
+ # order). The values are integers:
226
+ # - 0 for an undefined result (a task before the initial input).
227
+ # - 1 for a registered result (a completed task).
228
+ # - 2 for a queued result (a task yet to be executed).
229
+ # It passes +save+ to #add_result
230
+ def profile_advance(save=false)
231
+ first_task = first_preprocessing(save)
232
+ return Array.new(@@PREPROCESSING_TASKS.size, 0) if first_task.nil?
233
+ adv = []
234
+ state = 0
235
+ next_task = next_preprocessing(save)
236
+ @@PREPROCESSING_TASKS.each do |task|
237
+ state = 1 if first_task==task
238
+ state = 2 if !next_task.nil? and next_task==task
239
+ adv << state
240
+ end
241
+ adv
242
+ end
243
+
244
+ private
245
+
246
+ def add_result_raw_reads(base)
247
+ return nil unless result_files_exist?(base, ".1.fastq")
248
+ r = MiGA::Result.new(base + ".json")
249
+ add_files_to_ds_result(r, name,
250
+ ( result_files_exist?(base, ".2.fastq") ?
251
+ {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
252
+ {:single=>".1.fastq"} ))
253
+ end
254
+
255
+ def add_result_trimmed_reads(base)
256
+ return nil unless result_files_exist?(base, ".1.clipped.fastq")
257
+ r = MiGA::Result.new base + ".json"
258
+ r = add_files_to_ds_result(r, name,
259
+ {:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
260
+ result_files_exist?(base, ".2.clipped.fastq")
261
+ r.add_file(:single, name + ".1.clipped.single.fastq")
262
+ add_result(:raw_reads) #-> Post gunzip
263
+ r
264
+ end
265
+
266
+ def add_result_read_quality(base)
267
+ return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
268
+ r = MiGA::Result.new(base + ".json")
269
+ r = add_files_to_ds_result(r, name,
270
+ {:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
271
+ add_result(:trimmed_reads) #-> Post cleaning
272
+ r
273
+ end
274
+
275
+ def add_result_trimmed_fasta(base)
276
+ return nil unless
277
+ result_files_exist?(base, ".CoupledReads.fa") or
278
+ result_files_exist?(base, ".SingleReads.fa")
279
+ r = MiGA::Result.new base + ".json"
280
+ r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
281
+ :pair1=>".1.fa", :pair2=>".2.fa"}) if
282
+ result_files_exist?(base, ".CoupledReads.fa")
283
+ r.add_file(:single, name + ".SingleReads.fa")
284
+ add_result(:raw_reads) #-> Post gzip
285
+ r
286
+ end
287
+
288
+ def add_result_assembly(base)
289
+ return nil unless result_files_exist?(base, ".LargeContigs.fna")
290
+ r = MiGA::Result.new(base + ".json")
291
+ add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
292
+ :allcontigs=>".AllContigs.fna"})
293
+ end
294
+
295
+ def add_result_cds(base)
296
+ return nil unless result_files_exist?(base, %w[.faa .fna])
297
+ r = MiGA::Result.new(base + ".json")
298
+ add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
299
+ :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
300
+ end
301
+
302
+ def add_result_essential_genes(base)
303
+ return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
304
+ r = MiGA::Result.new(base + ".json")
305
+ add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
306
+ :collection=>".ess", :report=>".ess/log"})
307
+ end
308
+
309
+ def add_result_ssu(base)
310
+ return MiGA::Result.new(base + ".json") if result(:assembly).nil?
311
+ return nil unless result_files_exist?(base, ".ssu.fa")
312
+ r = MiGA::Result.new(base + ".json")
313
+ add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
314
+ :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
315
+ end
316
+
317
+ def add_result_mytaxa(base)
318
+ if is_multi?
319
+ return nil unless result_files_exist?(base, ".mytaxa")
320
+ r = MiGA::Result.new(base + ".json")
321
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
322
+ :mytaxain=>".mytaxain"})
323
+ else
324
+ MiGA::Result.new base + ".json"
264
325
  end
265
- def done_preprocessing?
266
- !first_preprocessing.nil? and next_preprocessing.nil?
326
+ end
327
+
328
+ def add_result_mytaxa_scan(base)
329
+ if is_nonmulti?
330
+ return nil unless
331
+ result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
332
+ r = MiGA::Result.new(base + ".json")
333
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
334
+ :blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
335
+ :regions=>".reg", :gene_ids=>".wintax.genes",
336
+ :region_ids=>".wintax.regions"})
337
+ else
338
+ MiGA::Result.new base + ".json"
267
339
  end
268
- def profile_advance
269
- if first_preprocessing.nil?
270
- adv = Array.new(@@PREPROCESSING_TASKS.size, 0)
271
- else
272
- adv = []
273
- state = 0
274
- first_task = first_preprocessing
275
- next_task = next_preprocessing
276
- @@PREPROCESSING_TASKS.each do |task|
277
- state = 1 if first_task==task
278
- state = 2 if !next_task.nil? and next_task==task
279
- adv << state
280
- end
281
- end
282
- adv
340
+ end
341
+
342
+ def add_result_distances(base)
343
+ if is_nonmulti?
344
+ pref = File.dirname(base)
345
+ return nil unless
346
+ File.exist?("#{pref}/#{is_ref? ? "01.haai" : "02.aai"}/#{name}.db")
347
+ r = MiGA::Result.new(base + ".json")
348
+ r.add_files({:haai_db=>"01.haai/#{name}.db",
349
+ :aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
350
+ else
351
+ r = MiGA::Result.new "#{base}.json"
283
352
  end
284
- end # class Dataset
285
- end # module MiGA
353
+ r
354
+ end
355
+
356
+ def add_files_to_ds_result(r, name, rel_files)
357
+ files = {}
358
+ rel_files.each{ |k,v| files[k] = name + v }
359
+ r.add_files(files)
360
+ r
361
+ end
286
362
 
363
+ end # class MiGA::Dataset