miga-base 0.2.0.6 → 0.2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE +201 -0
  4. data/README.md +17 -335
  5. data/Rakefile +31 -0
  6. data/actions/add_result +2 -5
  7. data/actions/add_taxonomy +4 -7
  8. data/actions/create_dataset +5 -6
  9. data/actions/create_project +2 -5
  10. data/actions/daemon +2 -5
  11. data/actions/download_dataset +88 -58
  12. data/actions/find_datasets +36 -38
  13. data/actions/import_datasets +2 -5
  14. data/actions/index_taxonomy +2 -5
  15. data/actions/list_datasets +47 -49
  16. data/actions/list_files +7 -11
  17. data/actions/unlink_dataset +2 -5
  18. data/bin/miga +1 -1
  19. data/lib/miga/common.rb +132 -0
  20. data/lib/miga/daemon.rb +229 -168
  21. data/lib/miga/dataset.rb +354 -277
  22. data/lib/miga/gui.rb +346 -269
  23. data/lib/miga/metadata.rb +115 -71
  24. data/lib/miga/project.rb +361 -259
  25. data/lib/miga/remote_dataset.rb +200 -148
  26. data/lib/miga/result.rb +150 -99
  27. data/lib/miga/tax_index.rb +124 -67
  28. data/lib/miga/taxonomy.rb +129 -100
  29. data/lib/miga/version.rb +57 -0
  30. data/lib/miga.rb +2 -77
  31. data/scripts/_distances_noref_nomulti.bash +2 -0
  32. data/scripts/_distances_ref_nomulti.bash +2 -0
  33. data/scripts/aai_distances.bash +1 -0
  34. data/scripts/ani_distances.bash +1 -0
  35. data/scripts/assembly.bash +1 -0
  36. data/scripts/cds.bash +1 -0
  37. data/scripts/clade_finding.bash +17 -1
  38. data/scripts/distances.bash +1 -0
  39. data/scripts/essential_genes.bash +1 -0
  40. data/scripts/haai_distances.bash +1 -0
  41. data/scripts/init.bash +2 -0
  42. data/scripts/mytaxa.bash +1 -0
  43. data/scripts/mytaxa_scan.bash +1 -0
  44. data/scripts/ogs.bash +1 -0
  45. data/scripts/read_quality.bash +1 -0
  46. data/scripts/ssu.bash +1 -0
  47. data/scripts/subclades.bash +1 -0
  48. data/scripts/trimmed_fasta.bash +1 -0
  49. data/scripts/trimmed_reads.bash +1 -0
  50. data/test/common_test.rb +82 -0
  51. data/test/daemon_test.rb +53 -0
  52. data/test/dataset_test.rb +156 -0
  53. data/test/jruby_gui_test.rb +20 -0
  54. data/test/metadata_test.rb +48 -0
  55. data/test/project_test.rb +54 -0
  56. data/test/remote_dataset_test.rb +41 -0
  57. data/test/tax_index_test.rb +44 -0
  58. data/test/taxonomy_test.rb +36 -0
  59. data/test/test_helper.rb +32 -0
  60. metadata +53 -38
data/lib/miga/project.rb CHANGED
@@ -1,268 +1,370 @@
1
- #
2
1
  # @package MiGA
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Jan-18-2016
6
- #
2
+ # @license Artistic-2.0
7
3
 
8
4
  require "miga/dataset"
9
5
 
10
- module MiGA
11
- class Project
12
- # Class
13
- @@FOLDERS = %w(data metadata daemon)
14
- @@DATA_FOLDERS = %w(
15
- 01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
16
- 05.assembly 06.cds
17
- 07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
18
- 07.annotation/01.function/01.essential
19
- 07.annotation/01.function/02.ssu
20
- 07.annotation/02.taxonomy/01.mytaxa
21
- 07.annotation/03.qa 07.annotation/03.qa/01.checkm
22
- 07.annotation/03.qa/02.mytaxa_scan
23
- 08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
24
- 09.distances 09.distances/01.haai 09.distances/02.aai
25
- 09.distances/03.ani 09.distances/04.ssu
26
- 10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
27
- 10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
28
- 10.clades/04.phylogeny/02.core 10.clades/05.metadata)
29
- @@RESULT_DIRS = {
30
- # Distances
31
- haai_distances: "09.distances/01.haai",
32
- aai_distances: "09.distances/02.aai",
33
- ani_distances: "09.distances/03.ani",
34
- #ssu_distances: "09.distances/04.ssu",
35
- # Clade identification
36
- clade_finding: "10.clades/01.find",
37
- # Clade analysis
38
- subclades: "10.clades/02.ani",
39
- ogs: "10.clades/03.ogs",
40
- ess_phylogeny: "10.clades/04.phylogeny/01.essential",
41
- core_phylogeny: "10.clades/04.phylogeny/02.core",
42
- clade_metadata: "10.clades/05.metadata"
43
- }
44
- @@KNOWN_TYPES = {
45
- mixed: {description: "Mixed collection of genomes, metagenomes, " +
46
- "and viromes.",
47
- single: true, multi: true},
48
- genomes: {description: "Collection of genomes.",
49
- single: true, multi: false},
50
- clade: {description: "Collection of closely-related genomes " +
51
- "(ANI <= 90%).",
52
- single: true, multi: false},
53
- metagenomes: {description: "Collection of metagenomes and/or " +
54
- "viromes.",
55
- single: false, multi: true}
56
- }
57
- @@DISTANCE_TASKS = [:haai_distances, :aai_distances, :ani_distances,
58
- :clade_finding]
59
- @@INCLADE_TASKS = [:subclades, :ogs, :ess_phylogeny, :core_phylogeny,
60
- :clade_metadata]
61
- def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end
62
- def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end
63
- def self.RESULT_DIRS ; @@RESULT_DIRS ; end
64
- def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end
65
- def self.exist?(path)
66
- Dir.exist?(path) and File.exist?(path + "/miga.project.json")
67
- end
68
- def self.load(path)
69
- return nil unless Project.exist? path
70
- Project.new path
71
- end
72
- # Instance
73
- attr_reader :path, :metadata
74
- def initialize(path, update=false)
75
- raise "Impossible to create project in uninitialized MiGA." unless
76
- File.exist? "#{ENV["HOME"]}/.miga_rc" and
77
- File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
78
- @path = File.absolute_path(path)
79
- self.create if update or not Project.exist? self.path
80
- self.load if self.metadata.nil?
81
- end
82
- def create
83
- Dir.mkdir self.path unless Dir.exist? self.path
84
- @@FOLDERS.each do |dir|
85
- Dir.mkdir self.path + "/" + dir unless
86
- Dir.exist? self.path + "/" + dir
87
- end
88
- @@DATA_FOLDERS.each do |dir|
89
- Dir.mkdir self.path + "/data/" + dir unless
90
- Dir.exist? self.path + "/data/" + dir
91
- end
92
- @metadata = Metadata.new(self.path + "/miga.project.json",
93
- {datasets: [], name: File.basename(self.path)})
94
- FileUtils.cp(ENV["HOME"] + "/.miga_daemon.json",
95
- self.path + "/daemon/daemon.json") unless
96
- File.exist? self.path + "/daemon/daemon.json"
97
- self.load
98
- end
99
- def save
100
- self.metadata.save
101
- self.load
102
- end
103
- def load
104
- @metadata = Metadata.load self.path + "/miga.project.json"
105
- raise "Couldn't find project metadata at #{self.path}" if
106
- self.metadata.nil?
107
- end
108
- def name ; self.metadata[:name] ; end
109
- def datasets
110
- self.metadata[:datasets].map{ |name| self.dataset name }
111
- end
112
- def dataset(name)
113
- name = name.miga_name
114
- @datasets = {} if @datasets.nil?
115
- @datasets[name] = Dataset.new(self, name) if @datasets[name].nil?
116
- @datasets[name]
117
- end
118
- def each_dataset(&blk)
119
- self.metadata[:datasets].each{ |name| blk.call(self.dataset name) }
120
- end
121
- def add_dataset(name)
122
- self.metadata[:datasets] << name unless
123
- self.metadata[:datasets].include? name
124
- self.save
125
- self.dataset(name)
126
- end
127
- def unlink_dataset(name)
128
- d = self.dataset name
129
- return nil if d.nil?
130
- self.metadata[:datasets].delete(name)
131
- self.save
132
- d
133
- end
134
- def import_dataset(ds, method=:hardlink)
135
- raise "Impossible to import dataset, it already exists: #{ds.name}." if
136
- Dataset.exist?(self, ds.name)
137
- # Import dataset results
138
- ds.each_result do |task, result|
139
- # import result files
140
- result.each_file do |file|
141
- File.generic_transfer("#{result.dir}/#{file}",
142
- "#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/#{file}",
143
- method)
144
- end
145
- # import result metadata
146
- %w(json start done).each do |suffix|
147
- if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
148
- File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
149
- "#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/" +
150
- "#{ds.name}.#{suffix}",
151
- method)
152
- end
153
- end
154
- end
155
- # Import dataset metadata
156
- File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
157
- "#{self.path}/metadata/#{ds.name}.json", method)
158
- # Save dataset
159
- self.add_dataset ds.name
160
- end
161
- def result(name)
162
- return nil if @@RESULT_DIRS[name.to_sym].nil?
163
- Result.load self.path + "/data/" + @@RESULT_DIRS[name.to_sym] +
164
- "/miga-project.json"
165
- end
166
- def results
167
- @@RESULT_DIRS.keys.map{ |k| self.result k }.reject{ |r| r.nil? }
168
- end
169
- def add_result result_type
170
- return nil if @@RESULT_DIRS[result_type].nil?
171
- base = self.path + "/data/" + @@RESULT_DIRS[result_type] +
172
- "/miga-project"
173
- return nil unless File.exist? base + ".done"
174
- r = nil
175
- case result_type
176
- when :haai_distances, :aai_distances, :ani_distances, :ssu_distances
177
- return nil unless
178
- File.exist? base + ".Rdata" and
179
- File.exist? base + ".log" and
180
- (File.exist?(base + ".txt") or File.exist?(base + ".txt.gz"))
181
- r = Result.new base + ".json"
182
- r.add_file :rdata, "miga-project.Rdata"
183
- r.add_file :matrix, "miga-project.txt"
184
- r.add_file :log, "miga-project.log"
185
- r.add_file :hist, "miga-project.hist"
186
- r.data[:gz] = File.exist?(base + ".txt.gz")
187
- when :clade_finding
188
- return nil unless File.exist? base + ".proposed-clades"
189
- r = Result.new base + ".json"
190
- r.add_file :proposal, "miga-project.proposed-clades"
191
- r.add_file :rbm_aai90, "genome-genome.aai90.rbm"
192
- r.add_file :clades_aai90, "miga-project.ani-clades"
193
- r.add_file :rbm_ani95, "genome-genome.ani95.rbm"
194
- r.add_file :clades_ani95, "miga-project.ani95-clades"
195
- when :subclades
196
- return nil unless
197
- File.exist?(base+".pdf") and
198
- File.exist?(base+".1.classif") and
199
- File.exist?(base+".1.medoids") and
200
- File.exist?(base+".class.tsv") and
201
- File.exist?(base+".class.nwk")
202
- r = Result.new base + ".json"
203
- r.add_file :report, "miga-project.pdf"
204
- (1..6).each do |i|
205
- %w{classif medoids}.each do |m|
206
- r.add_file "#{m}_#{i}".to_sym, "miga-project.#{i}.#{m}"
207
- end
208
- end
209
- r.add_file :class_table, "miga-project.class.tsv"
210
- r.add_file :class_tree, "miga-project.class.nwk"
211
- r.add_file :ani_tree, "miga-project.ani.nwk"
212
- when :ogs
213
- return nil unless
214
- File.exist?(base+".ogs") and
215
- File.exist?(base+".stats")
216
- r = Result.new base + ".json"
217
- r.add_file :ogs, "miga-project.ogs"
218
- r.add_file :stats, "miga-project.stats"
219
- r.add_file :rbm, "miga-project.rbm"
220
- end
221
- r.save
222
- r
223
- end
224
- def next_distances
225
- @@DISTANCE_TASKS.find{ |t| self.add_result(t).nil? }
226
- end
227
- def next_inclade
228
- return nil unless self.metadata[:type]==:clade
229
- @@INCLADE_TASKS.find{ |t| self.add_result(t).nil? }
230
- end
231
- def unregistered_datasets
232
- datasets = []
233
- Dataset.RESULT_DIRS.each do |res, dir|
234
- Dir.entries(self.path + "/data/" + dir).each do |file|
235
- next unless
236
- file =~ %r{
237
- \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
238
- }x
239
- m = /([^\.]+)/.match(file)
240
- datasets << m[1] unless m.nil? or m[1] == "miga-project"
241
- end
242
- end
243
- datasets.uniq - self.metadata[:datasets]
6
+ ##
7
+ # MiGA representation of a project.
8
+ class MiGA::Project < MiGA::MiGA
9
+
10
+ # Class-level
11
+
12
+ ##
13
+ # Top-level folders inside a project.
14
+ @@FOLDERS = %w[data metadata daemon]
15
+
16
+ ##
17
+ # Folders for results.
18
+ @@DATA_FOLDERS = %w[
19
+ 01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
20
+ 05.assembly 06.cds
21
+ 07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
22
+ 07.annotation/01.function/01.essential
23
+ 07.annotation/01.function/02.ssu
24
+ 07.annotation/02.taxonomy/01.mytaxa
25
+ 07.annotation/03.qa 07.annotation/03.qa/01.checkm
26
+ 07.annotation/03.qa/02.mytaxa_scan
27
+ 08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
28
+ 09.distances 09.distances/01.haai 09.distances/02.aai
29
+ 09.distances/03.ani 09.distances/04.ssu
30
+ 10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
31
+ 10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
32
+ 10.clades/04.phylogeny/02.core 10.clades/05.metadata
33
+ ]
34
+
35
+ ##
36
+ # Directories containing the results from project-wide tasks.
37
+ def self.RESULT_DIRS ; @@RESULT_DIRS ; end
38
+ @@RESULT_DIRS = {
39
+ # Distances
40
+ haai_distances: "09.distances/01.haai",
41
+ aai_distances: "09.distances/02.aai",
42
+ ani_distances: "09.distances/03.ani",
43
+ #ssu_distances: "09.distances/04.ssu",
44
+ # Clade identification
45
+ clade_finding: "10.clades/01.find",
46
+ # Clade analysis
47
+ subclades: "10.clades/02.ani",
48
+ ogs: "10.clades/03.ogs",
49
+ ess_phylogeny: "10.clades/04.phylogeny/01.essential",
50
+ core_phylogeny: "10.clades/04.phylogeny/02.core",
51
+ clade_metadata: "10.clades/05.metadata"
52
+ }
53
+
54
+ ##
55
+ # Supported types of projects.
56
+ def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end
57
+ @@KNOWN_TYPES = {
58
+ mixed: {
59
+ description: "Mixed collection of genomes, metagenomes, and viromes.",
60
+ single: true, multi: true},
61
+ genomes: {description: "Collection of genomes.",
62
+ single: true, multi: false},
63
+ clade: {description: "Collection of closely-related genomes (ANI <= 90%).",
64
+ single: true, multi: false},
65
+ metagenomes: {description: "Collection of metagenomes and/or viromes.",
66
+ single: false, multi: true}
67
+ }
68
+
69
+ ##
70
+ # Project-wide distance estimations.
71
+ def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end
72
+ @@DISTANCE_TASKS = [:haai_distances, :aai_distances, :ani_distances,
73
+ :clade_finding]
74
+
75
+ ##
76
+ # Project-wide tasks for :clade projects.
77
+ def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end
78
+ @@INCLADE_TASKS = [:subclades, :ogs, :ess_phylogeny, :core_phylogeny,
79
+ :clade_metadata]
80
+
81
+ ##
82
+ # Does the project at +path+ exist?
83
+ def self.exist?(path)
84
+ Dir.exist?(path) and File.exist?(path + "/miga.project.json")
85
+ end
86
+
87
+ ##
88
+ # Load the project at +path+. Returns MiGA::Project if project exists, nil
89
+ # otherwise.
90
+ def self.load(path)
91
+ return nil unless Project.exist? path
92
+ Project.new path
93
+ end
94
+
95
+ # Instance-level
96
+
97
+ ##
98
+ # Absolute path to the project folder.
99
+ attr_reader :path
100
+
101
+ ##
102
+ # Information about the project as MiGA::Metadata.
103
+ attr_reader :metadata
104
+
105
+ ##
106
+ # Create a new MiGA::Project at +path+, if it doesn't exist and +update+ is
107
+ # false, or load an existing one.
108
+ def initialize(path, update=false)
109
+ @datasets = {}
110
+ @path = File.absolute_path(path)
111
+ self.create if update or not Project.exist? self.path
112
+ self.load if self.metadata.nil?
113
+ end
114
+
115
+ ##
116
+ # Create an empty project.
117
+ def create
118
+ unless MiGA::MiGA.initialized?
119
+ raise "Impossible to create project in uninitialized MiGA."
120
+ end
121
+ dirs = [path] + @@FOLDERS.map{|d| "#{path}/#{d}" } +
122
+ @@DATA_FOLDERS.map{ |d| "#{path}/data/#{d}"}
123
+ dirs.each{ |d| Dir.mkdir(d) unless Dir.exist? d }
124
+ @metadata = MiGA::Metadata.new(self.path + "/miga.project.json",
125
+ {datasets: [], name: File.basename(path)})
126
+ FileUtils.cp(ENV["MIGA_HOME"] + "/.miga_daemon.json",
127
+ "#{path}/daemon/daemon.json") unless
128
+ File.exist? "#{path}/daemon/daemon.json"
129
+ self.load
130
+ end
131
+
132
+ ##
133
+ # Save any changes persistently.
134
+ def save
135
+ metadata.save
136
+ self.load
137
+ end
138
+
139
+ ##
140
+ # (Re-)load project data and metadata.
141
+ def load
142
+ @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
143
+ raise "Couldn't find project metadata at #{path}" if metadata.nil?
144
+ end
145
+
146
+ ##
147
+ # Name of the project.
148
+ def name ; metadata[:name] ; end
149
+
150
+ ##
151
+ # Returns Array of MiGA::Dataset.
152
+ def datasets
153
+ metadata[:datasets].map{ |name| dataset(name) }
154
+ end
155
+
156
+ ##
157
+ # Returns MiGA::Dataset.
158
+ def dataset(name)
159
+ name = name.miga_name
160
+ return nil unless MiGA::Dataset.exist?(self, name)
161
+ @datasets ||= {}
162
+ @datasets[name] ||= MiGA::Dataset.new(self, name)
163
+ @datasets[name]
164
+ end
165
+
166
+ ##
167
+ # Iterate through datasets, with a single variable MiGA::Dataset passed to
168
+ # +blk+.
169
+ def each_dataset(&blk)
170
+ metadata[:datasets].each{ |name| blk.call(dataset(name)) }
171
+ end
172
+
173
+ ##
174
+ # Add dataset identified by +name+ and return MiGA::Dataset.
175
+ def add_dataset(name)
176
+ unless metadata[:datasets].include? name
177
+ d = MiGA::Dataset.new(self, name)
178
+ @metadata[:datasets] << name
179
+ save
180
+ end
181
+ dataset(name)
182
+ end
183
+
184
+ ##
185
+ # Unlink dataset identified by +name+ and return MiGA::Dataset.
186
+ def unlink_dataset(name)
187
+ d = dataset(name)
188
+ return nil if d.nil?
189
+ self.metadata[:datasets].delete(name)
190
+ save
191
+ d
192
+ end
193
+
194
+ ##
195
+ # Import the dataset +ds+, a MiGA::Dataset, using +method+ which is any method
196
+ # supported by File#generic_transfer.
197
+ def import_dataset(ds, method=:hardlink)
198
+ raise "Impossible to import dataset, it already exists: #{ds.name}." if
199
+ MiGA::Dataset.exist?(self, ds.name)
200
+ # Import dataset results
201
+ ds.each_result do |task, result|
202
+ # import result files
203
+ result.each_file do |file|
204
+ File.generic_transfer("#{result.dir}/#{file}",
205
+ "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/#{file}", method)
244
206
  end
245
- def done_preprocessing?
246
- self.datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?}.all?
207
+ # import result metadata
208
+ %w(json start done).each do |suffix|
209
+ if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
210
+ File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
211
+ "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/" +
212
+ "#{ds.name}.#{suffix}", method)
213
+ end
247
214
  end
248
- ## Generates a two-dimensional matrix (array of arrays) where the first
249
- ## index corresponds to the dataset, the second index corresponds to the
250
- ## dataset task, and the value corresponds to:
251
- ## 0: Before execution.
252
- ## 1: Done (or not required).
253
- ## 2: To do.
254
- def profile_datasets_advance
255
- advance = []
256
- self.each_dataset_profile_advance do |ds_adv|
257
- advance << ds_adv
258
- end
259
- advance
215
+ end
216
+ # Import dataset metadata
217
+ File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
218
+ "#{self.path}/metadata/#{ds.name}.json", method)
219
+ # Save dataset
220
+ self.add_dataset(ds.name)
221
+ end
222
+
223
+ ##
224
+ # Get result identified by Symbol +name+, returns MiGA::Result.
225
+ def result(name)
226
+ return nil if @@RESULT_DIRS[name.to_sym].nil?
227
+ MiGA::Result.load "#{path}/data/" + @@RESULT_DIRS[name.to_sym] +
228
+ "/miga-project.json"
229
+ end
230
+
231
+ ##
232
+ # Get all results, an Array of MiGA::Result.
233
+ def results
234
+ @@RESULT_DIRS.keys.map{ |k| result(k) }.reject{ |r| r.nil? }
235
+ end
236
+
237
+ ##
238
+ # Add the result identified by Symbol +name+, and return MiGA::Result. Save
239
+ # the result if +save+.
240
+ def add_result(name, save=true)
241
+ return nil if @@RESULT_DIRS[name].nil?
242
+ base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
243
+ return MiGA::Result.load(base + ".json") unless save
244
+ return nil unless result_files_exist?(base, ".done")
245
+ r = send("add_result_#{name}", base)
246
+ r.save
247
+ r
248
+ end
249
+
250
+ ##
251
+ # Get the next distances task, saving intermediate results if +save+. Returns
252
+ # a Symbol.
253
+ def next_distances(save=true)
254
+ @@DISTANCE_TASKS.find{ |t| add_result(t, save).nil? }
255
+ end
256
+
257
+ ##
258
+ # Get the next inclade task, saving intermediate results if +save+. Returns a
259
+ # Symbol.
260
+ def next_inclade(save=true)
261
+ return nil unless metadata[:type]==:clade
262
+ @@INCLADE_TASKS.find{ |t| add_result(t, save).nil? }
263
+ end
264
+
265
+ ##
266
+ # Find all datasets with (potential) result files but are yet unregistered.
267
+ def unregistered_datasets
268
+ datasets = []
269
+ MiGA::Dataset.RESULT_DIRS.values.each do |dir|
270
+ Dir.entries("#{path}/data/#{dir}").each do |file|
271
+ next unless
272
+ file =~ %r{
273
+ \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
274
+ }x
275
+ m = /([^\.]+)/.match(file)
276
+ datasets << m[1] unless m.nil? or m[1] == "miga-project"
260
277
  end
261
- def each_dataset_profile_advance(&blk)
262
- self.each_dataset do |ds|
263
- blk.call(ds.profile_advance)
264
- end
278
+ end
279
+ datasets.uniq - metadata[:datasets]
280
+ end
281
+
282
+ ##
283
+ # Are all the datasets in the project preprocessed? Save intermediate results
284
+ # if +save+.
285
+ def done_preprocessing?(save=true)
286
+ datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?(save) }.all?
287
+ end
288
+
289
+ ##
290
+ # Returns a two-dimensional matrix (Array of Array) where the first index
291
+ # corresponds to the dataset, the second index corresponds to the dataset
292
+ # task, and the value corresponds to:
293
+ # - 0: Before execution.
294
+ # - 1: Done (or not required).
295
+ # - 2: To do.
296
+ def profile_datasets_advance
297
+ advance = []
298
+ self.each_dataset_profile_advance do |ds_adv|
299
+ advance << ds_adv
300
+ end
301
+ advance
302
+ end
303
+
304
+ ##
305
+ # Call +blk+ passing the result of MiGA::Dataset#profile_advance for each
306
+ # registered dataset.
307
+ def each_dataset_profile_advance(&blk)
308
+ each_dataset { |ds| blk.call(ds.profile_advance) }
309
+ end
310
+
311
+ private
312
+
313
+ ##
314
+ # Internal alias for all add_result_*_distances.
315
+ def add_result_distances(base)
316
+ return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
317
+ r = Result.new(base + ".json")
318
+ r.add_file(:rdata, "miga-project.Rdata")
319
+ r.add_file(:matrix, "miga-project.txt")
320
+ r.add_file(:log, "miga-project.log")
321
+ r.add_file(:hist, "miga-project.hist")
322
+ r
323
+ end
324
+
325
+ def add_result_clade_finding(base)
326
+ return nil unless result_files_exist?(base,
327
+ %w[.proposed-clades .pdf .1.classif .1.medoids .class.tsv .class.nwk])
328
+ r = add_result_iter_clades(base)
329
+ r.add_file(:aai_tree, "miga-project.aai.nwk")
330
+ r.add_file(:proposal, "miga-project.proposed-clades")
331
+ r.add_file(:clades_aai90, "miga-project.aai90-clades")
332
+ r.add_file(:clades_ani95, "miga-project.ani95-clades")
333
+ r
334
+ end
335
+
336
+ def add_result_subclades(base)
337
+ return nil unless result_files_exist?(base,
338
+ %w[.pdf .1.classif .1.medoids .class.tsv .class.nwk])
339
+ r = add_result_iter_clades(base)
340
+ r.add_file(:ani_tree, "miga-project.ani.nwk")
341
+ r
342
+ end
343
+
344
+ def add_result_iter_clades(base)
345
+ r = Result.new(base + ".json")
346
+ r.add_file(:report, "miga-project.pdf")
347
+ r.add_file(:class_table, "miga-project.class.tsv")
348
+ r.add_file(:class_tree, "miga-project.class.nwk")
349
+ (1..6).each do |i|
350
+ %w{classif medoids}.each do |m|
351
+ r.add_file("#{m}_#{i}".to_sym, "miga-project.#{i}.#{m}")
352
+ end
265
353
  end
266
- end
267
- end
354
+ r
355
+ end
268
356
 
357
+ def add_result_ogs(base)
358
+ return nil unless result_files_exist?(base, %w[.ogs .stats])
359
+ r = Result.new(base + ".json")
360
+ r.add_file(:ogs, "miga-project.ogs")
361
+ r.add_file(:stats, "miga-project.stats")
362
+ r.add_file(:rbm, "miga-project.rbm")
363
+ r
364
+ end
365
+
366
+ alias add_result_haai_distances add_result_distances
367
+ alias add_result_aai_distances add_result_distances
368
+ alias add_result_ani_distances add_result_distances
369
+ alias add_result_ssu_distances add_result_distances
370
+ end