miga-base 0.2.0.9 → 0.2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -0
  3. data/actions/add_result.rb +37 -0
  4. data/actions/add_taxonomy.rb +63 -0
  5. data/actions/create_dataset.rb +49 -0
  6. data/actions/create_project.rb +46 -0
  7. data/actions/daemon.rb +50 -0
  8. data/actions/date.rb +14 -0
  9. data/actions/{download_dataset → download_dataset.rb} +5 -28
  10. data/actions/find_datasets.rb +41 -0
  11. data/actions/import_datasets.rb +47 -0
  12. data/actions/index_taxonomy.rb +46 -0
  13. data/actions/list_datasets.rb +50 -0
  14. data/actions/list_files.rb +43 -0
  15. data/actions/project_info.rb +40 -0
  16. data/actions/unlink_dataset.rb +28 -0
  17. data/bin/miga +129 -33
  18. data/lib/miga/daemon.rb +48 -34
  19. data/lib/miga/dataset.rb +7 -123
  20. data/lib/miga/dataset_result.rb +177 -0
  21. data/lib/miga/project.rb +32 -12
  22. data/lib/miga/version.rb +2 -2
  23. data/scripts/_distances_functions.bash +82 -0
  24. data/scripts/_distances_noref_nomulti.bash +96 -67
  25. data/scripts/_distances_ref_nomulti.bash +54 -85
  26. data/scripts/assembly.bash +16 -3
  27. data/scripts/clade_finding.bash +20 -18
  28. data/scripts/distances.bash +2 -1
  29. data/scripts/init.bash +2 -6
  30. data/scripts/subclades.bash +4 -5
  31. data/test/common_test.rb +2 -2
  32. data/test/daemon_test.rb +73 -1
  33. data/test/project_test.rb +26 -2
  34. data/test/taxonomy_test.rb +10 -0
  35. data/test/test_helper.rb +1 -1
  36. data/utils/subclades-compile.rb +4 -2
  37. data/utils/subclades.R +140 -158
  38. metadata +48 -44
  39. data/actions/add_result +0 -58
  40. data/actions/add_taxonomy +0 -83
  41. data/actions/create_dataset +0 -61
  42. data/actions/create_project +0 -67
  43. data/actions/daemon +0 -66
  44. data/actions/find_datasets +0 -61
  45. data/actions/import_datasets +0 -83
  46. data/actions/index_taxonomy +0 -68
  47. data/actions/list_datasets +0 -81
  48. data/actions/list_files +0 -63
  49. data/actions/unlink_dataset +0 -49
data/lib/miga/dataset.rb CHANGED
@@ -3,11 +3,14 @@
3
3
 
4
4
  require "miga/metadata"
5
5
  require "miga/result"
6
+ require "miga/dataset_result"
6
7
 
7
8
  ##
8
9
  # Dataset representation in MiGA.
9
10
  class MiGA::Dataset < MiGA::MiGA
10
11
 
12
+ include MiGA::DatasetResult
13
+
11
14
  # Class-level
12
15
 
13
16
  ##
@@ -52,7 +55,7 @@ class MiGA::Dataset < MiGA::MiGA
52
55
 
53
56
  ##
54
57
  # Tasks to be excluded from query datasets.
55
- @@EXCLUDE_NOREF_TASKS = [:essential_genes, :mytaxa_scan]
58
+ @@EXCLUDE_NOREF_TASKS = [:mytaxa_scan]
56
59
 
57
60
  ##
58
61
  # Tasks to be executed only in datasets that are not multi-organism. These
@@ -137,14 +140,14 @@ class MiGA::Dataset < MiGA::MiGA
137
140
  # Is this dataset known to be multi-organism?
138
141
  def is_multi?
139
142
  return false if self.metadata[:type].nil?
140
- return @@KNOWN_TYPES[self.metadata[:type]][:multi]
143
+ @@KNOWN_TYPES[self.metadata[:type]][:multi]
141
144
  end
142
145
 
143
146
  ##
144
147
  # Is this dataset known to be single-organism?
145
148
  def is_nonmulti?
146
149
  return false if self.metadata[:type].nil?
147
- return !@@KNOWN_TYPES[self.metadata[:type]][:multi]
150
+ !@@KNOWN_TYPES[self.metadata[:type]][:multi]
148
151
  end
149
152
 
150
153
  ##
@@ -176,7 +179,7 @@ class MiGA::Dataset < MiGA::MiGA
176
179
  return nil if @@RESULT_DIRS[result_type].nil?
177
180
  base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
178
181
  "/" + name
179
- return MiGA::Result.load(base + ".json") unless save
182
+ return MiGA::Result.load("#{base}.json") unless save
180
183
  return nil unless result_files_exist?(base, ".done")
181
184
  r = self.send("add_result_#{result_type}", base)
182
185
  r.save unless r.nil?
@@ -241,123 +244,4 @@ class MiGA::Dataset < MiGA::MiGA
241
244
  adv
242
245
  end
243
246
 
244
- private
245
-
246
- def add_result_raw_reads(base)
247
- return nil unless result_files_exist?(base, ".1.fastq")
248
- r = MiGA::Result.new(base + ".json")
249
- add_files_to_ds_result(r, name,
250
- ( result_files_exist?(base, ".2.fastq") ?
251
- {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
252
- {:single=>".1.fastq"} ))
253
- end
254
-
255
- def add_result_trimmed_reads(base)
256
- return nil unless result_files_exist?(base, ".1.clipped.fastq")
257
- r = MiGA::Result.new base + ".json"
258
- r = add_files_to_ds_result(r, name,
259
- {:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
260
- result_files_exist?(base, ".2.clipped.fastq")
261
- r.add_file(:single, name + ".1.clipped.single.fastq")
262
- add_result(:raw_reads) #-> Post gunzip
263
- r
264
- end
265
-
266
- def add_result_read_quality(base)
267
- return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
268
- r = MiGA::Result.new(base + ".json")
269
- r = add_files_to_ds_result(r, name,
270
- {:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
271
- add_result(:trimmed_reads) #-> Post cleaning
272
- r
273
- end
274
-
275
- def add_result_trimmed_fasta(base)
276
- return nil unless
277
- result_files_exist?(base, ".CoupledReads.fa") or
278
- result_files_exist?(base, ".SingleReads.fa")
279
- r = MiGA::Result.new base + ".json"
280
- r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
281
- :pair1=>".1.fa", :pair2=>".2.fa"}) if
282
- result_files_exist?(base, ".CoupledReads.fa")
283
- r.add_file(:single, name + ".SingleReads.fa")
284
- add_result(:raw_reads) #-> Post gzip
285
- r
286
- end
287
-
288
- def add_result_assembly(base)
289
- return nil unless result_files_exist?(base, ".LargeContigs.fna")
290
- r = MiGA::Result.new(base + ".json")
291
- add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
292
- :allcontigs=>".AllContigs.fna"})
293
- end
294
-
295
- def add_result_cds(base)
296
- return nil unless result_files_exist?(base, %w[.faa .fna])
297
- r = MiGA::Result.new(base + ".json")
298
- add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
299
- :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
300
- end
301
-
302
- def add_result_essential_genes(base)
303
- return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
304
- r = MiGA::Result.new(base + ".json")
305
- add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
306
- :collection=>".ess", :report=>".ess/log"})
307
- end
308
-
309
- def add_result_ssu(base)
310
- return MiGA::Result.new(base + ".json") if result(:assembly).nil?
311
- return nil unless result_files_exist?(base, ".ssu.fa")
312
- r = MiGA::Result.new(base + ".json")
313
- add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
314
- :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
315
- end
316
-
317
- def add_result_mytaxa(base)
318
- if is_multi?
319
- return nil unless result_files_exist?(base, ".mytaxa")
320
- r = MiGA::Result.new(base + ".json")
321
- add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
322
- :mytaxain=>".mytaxain"})
323
- else
324
- MiGA::Result.new base + ".json"
325
- end
326
- end
327
-
328
- def add_result_mytaxa_scan(base)
329
- if is_nonmulti?
330
- return nil unless
331
- result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
332
- r = MiGA::Result.new(base + ".json")
333
- add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
334
- :blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
335
- :regions=>".reg", :gene_ids=>".wintax.genes",
336
- :region_ids=>".wintax.regions"})
337
- else
338
- MiGA::Result.new base + ".json"
339
- end
340
- end
341
-
342
- def add_result_distances(base)
343
- if is_nonmulti?
344
- pref = File.dirname(base)
345
- return nil unless
346
- File.exist?("#{pref}/#{is_ref? ? "01.haai" : "02.aai"}/#{name}.db")
347
- r = MiGA::Result.new(base + ".json")
348
- r.add_files({:haai_db=>"01.haai/#{name}.db",
349
- :aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
350
- else
351
- r = MiGA::Result.new "#{base}.json"
352
- end
353
- r
354
- end
355
-
356
- def add_files_to_ds_result(r, name, rel_files)
357
- files = {}
358
- rel_files.each{ |k,v| files[k] = name + v }
359
- r.add_files(files)
360
- r
361
- end
362
-
363
247
  end # class MiGA::Dataset
@@ -0,0 +1,177 @@
1
+
2
+ ##
3
+ # Helper module including specific functions to add dataset results.
4
+ module MiGA::DatasetResult
5
+
6
+ private
7
+
8
+ ##
9
+ # Add result type +:raw_reads+ at +base+.
10
+ def add_result_raw_reads(base)
11
+ return nil unless result_files_exist?(base, ".1.fastq")
12
+ r = MiGA::Result.new(base + ".json")
13
+ add_files_to_ds_result(r, name,
14
+ ( result_files_exist?(base, ".2.fastq") ?
15
+ {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
16
+ {:single=>".1.fastq"} ))
17
+ end
18
+
19
+ ##
20
+ # Add result type +:trimmed_reads+ at +base+.
21
+ def add_result_trimmed_reads(base)
22
+ return nil unless result_files_exist?(base, ".1.clipped.fastq")
23
+ r = MiGA::Result.new base + ".json"
24
+ r = add_files_to_ds_result(r, name,
25
+ {:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
26
+ result_files_exist?(base, ".2.clipped.fastq")
27
+ r.add_file(:single, name + ".1.clipped.single.fastq")
28
+ add_result(:raw_reads) #-> Post gunzip
29
+ r
30
+ end
31
+
32
+ ##
33
+ # Add result type +:read_quality+ at +base+.
34
+ def add_result_read_quality(base)
35
+ return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
36
+ r = MiGA::Result.new(base + ".json")
37
+ r = add_files_to_ds_result(r, name,
38
+ {:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
39
+ add_result(:trimmed_reads) #-> Post cleaning
40
+ r
41
+ end
42
+
43
+ ##
44
+ # Add result type +:trimmed_fasta+ at +base+.
45
+ def add_result_trimmed_fasta(base)
46
+ return nil unless
47
+ result_files_exist?(base, ".CoupledReads.fa") or
48
+ result_files_exist?(base, ".SingleReads.fa") or
49
+ result_files_exist?(base, %w[.1.fasta .2.fasta])
50
+ r = MiGA::Result.new base + ".json"
51
+ r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
52
+ :single=>".SingleReads.fa", :pair1=>".1.fasta", :pair2=>".2.fasta"})
53
+ add_result(:raw_reads) #-> Post gzip
54
+ r
55
+ end
56
+
57
+ ##
58
+ # Add result type +:assembly+ at +base+.
59
+ def add_result_assembly(base)
60
+ return nil unless result_files_exist?(base, ".LargeContigs.fna")
61
+ r = MiGA::Result.new(base + ".json")
62
+ add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
63
+ :allcontigs=>".AllContigs.fna"})
64
+ end
65
+
66
+ ##
67
+ # Add result type +:cds+ at +base+.
68
+ def add_result_cds(base)
69
+ return nil unless result_files_exist?(base, %w[.faa .fna])
70
+ r = MiGA::Result.new(base + ".json")
71
+ add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
72
+ :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
73
+ end
74
+
75
+ ##
76
+ # Add result type +:essential_genes+ at +base+.
77
+ def add_result_essential_genes(base)
78
+ return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
79
+ r = MiGA::Result.new(base + ".json")
80
+ add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
81
+ :collection=>".ess", :report=>".ess/log"})
82
+ end
83
+
84
+ ##
85
+ # Add result type +:ssu+ at +base+.
86
+ def add_result_ssu(base)
87
+ return MiGA::Result.new(base + ".json") if result(:assembly).nil?
88
+ return nil unless result_files_exist?(base, ".ssu.fa")
89
+ r = MiGA::Result.new(base + ".json")
90
+ add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
91
+ :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
92
+ end
93
+
94
+ ##
95
+ # Add result type +:mytaxa+ at +base+.
96
+ def add_result_mytaxa(base)
97
+ if is_multi?
98
+ return nil unless result_files_exist?(base, ".mytaxa")
99
+ r = MiGA::Result.new(base + ".json")
100
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
101
+ :mytaxain=>".mytaxain"})
102
+ else
103
+ MiGA::Result.new base + ".json"
104
+ end
105
+ end
106
+
107
+ ##
108
+ # Add result type +:mytaxa_scan+ at +base+.
109
+ def add_result_mytaxa_scan(base)
110
+ if is_nonmulti?
111
+ return nil unless
112
+ result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
113
+ r = MiGA::Result.new(base + ".json")
114
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
115
+ :blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
116
+ :regions=>".reg", :gene_ids=>".wintax.genes",
117
+ :region_ids=>".wintax.regions"})
118
+ else
119
+ MiGA::Result.new base + ".json"
120
+ end
121
+ end
122
+
123
+ ##
124
+ # Add result type +:distances+ at +base+.
125
+ def add_result_distances(base)
126
+ if is_nonmulti?
127
+ if is_ref?
128
+ add_result_distances_ref(base)
129
+ else
130
+ add_result_distances_nonref(base)
131
+ end
132
+ else
133
+ add_result_distances_multi(base)
134
+ end
135
+ end
136
+
137
+ ##
138
+ # Add result type +:distances+ for _multi_ datasets at +base+.
139
+ def add_result_distances_multi(base)
140
+ MiGA::Result.new "#{base}.json"
141
+ end
142
+
143
+ ##
144
+ # Add result type +:distances+ for _nonmulti_ reference datasets at +base+.
145
+ def add_result_distances_ref(base)
146
+ pref = File.dirname(base)
147
+ return nil unless
148
+ File.exist?("#{pref}/01.haai/#{name}.db")
149
+ r = MiGA::Result.new(base + ".json")
150
+ r.add_files({:haai_db=>"01.haai/#{name}.db",
151
+ :aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
152
+ r
153
+ end
154
+
155
+ ##
156
+ # Add result type +:distances+ for _nonmulti_ query datasets at +base+.
157
+ def add_result_distances_nonref(base)
158
+ return nil unless
159
+ result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
160
+ result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
161
+ r = MiGA::Result.new(base + ".json")
162
+ add_files_to_ds_result(r, name, {
163
+ :aai_medoids=>".aai-medoids.tsv",
164
+ :haai_db=>".haai.db", :aai_db=>".aai.db",
165
+ :ani_medoids=>".ani-medoids.tsv", :ani_db=>".ani.db"})
166
+ end
167
+
168
+ ##
169
+ # Add files in +rel_files+ Hash to the result +r+ with dataset name +name+.
170
+ def add_files_to_ds_result(r, name, rel_files)
171
+ files = {}
172
+ rel_files.each{ |k,v| files[k] = name + v }
173
+ r.add_files(files)
174
+ r
175
+ end
176
+
177
+ end
data/lib/miga/project.rb CHANGED
@@ -146,12 +146,26 @@ class MiGA::Project < MiGA::MiGA
146
146
  ##
147
147
  # Name of the project.
148
148
  def name ; metadata[:name] ; end
149
+
150
+ ##
151
+ # Type of project.
152
+ def type ; metadata[:type] ; end
153
+
154
+ ##
155
+ # Is this a clade project?
156
+ def is_clade? ; type==:clade ; end
149
157
 
150
158
  ##
151
159
  # Returns Array of MiGA::Dataset.
152
160
  def datasets
153
161
  metadata[:datasets].map{ |name| dataset(name) }
154
162
  end
163
+
164
+ ##
165
+ # Returns Array of String (without evaluating dataset objects).
166
+ def dataset_names
167
+ metadata[:datasets]
168
+ end
155
169
 
156
170
  ##
157
171
  # Returns MiGA::Dataset.
@@ -164,17 +178,24 @@ class MiGA::Project < MiGA::MiGA
164
178
  end
165
179
 
166
180
  ##
167
- # Iterate through datasets, with a single variable MiGA::Dataset passed to
168
- # +blk+.
181
+ # Iterate through datasets, with one or two variables passed to +blk+.
182
+ # If one, the dataset MiGA::Dataset object is passed. If two, the name and
183
+ # the dataset object are passed.
169
184
  def each_dataset(&blk)
170
- metadata[:datasets].each{ |name| blk.call(dataset(name)) }
185
+ metadata[:datasets].each do |name|
186
+ if blk.arity == 1
187
+ blk.call(dataset(name))
188
+ else
189
+ blk.call(name, dataset(name))
190
+ end
191
+ end
171
192
  end
172
193
 
173
194
  ##
174
195
  # Add dataset identified by +name+ and return MiGA::Dataset.
175
196
  def add_dataset(name)
176
197
  unless metadata[:datasets].include? name
177
- d = MiGA::Dataset.new(self, name)
198
+ MiGA::Dataset.new(self, name)
178
199
  @metadata[:datasets] << name
179
200
  save
180
201
  end
@@ -324,7 +345,9 @@ class MiGA::Project < MiGA::MiGA
324
345
 
325
346
  def add_result_clade_finding(base)
326
347
  return nil unless result_files_exist?(base,
327
- %w[.proposed-clades .pdf .1.classif .1.medoids .class.tsv .class.nwk])
348
+ %w[.proposed-clades])
349
+ return nil unless is_clade? or result_files_exist?(base,
350
+ %w[.pdf .classif .medoids .class.tsv .class.nwk])
328
351
  r = add_result_iter_clades(base)
329
352
  r.add_file(:aai_tree, "miga-project.aai.nwk")
330
353
  r.add_file(:proposal, "miga-project.proposed-clades")
@@ -335,9 +358,9 @@ class MiGA::Project < MiGA::MiGA
335
358
 
336
359
  def add_result_subclades(base)
337
360
  return nil unless result_files_exist?(base,
338
- %w[.pdf .1.classif .1.medoids .class.tsv .class.nwk])
361
+ %w[.pdf .classif .medoids .class.tsv .class.nwk])
339
362
  r = add_result_iter_clades(base)
340
- r.add_file(:ani_tree, "miga-project.ani.nwk")
363
+ r.add_file(:ani_tree, "miga-project.ani.nwk")
341
364
  r
342
365
  end
343
366
 
@@ -346,11 +369,8 @@ class MiGA::Project < MiGA::MiGA
346
369
  r.add_file(:report, "miga-project.pdf")
347
370
  r.add_file(:class_table, "miga-project.class.tsv")
348
371
  r.add_file(:class_tree, "miga-project.class.nwk")
349
- (1..6).each do |i|
350
- %w{classif medoids}.each do |m|
351
- r.add_file("#{m}_#{i}".to_sym, "miga-project.#{i}.#{m}")
352
- end
353
- end
372
+ r.add_file(:classif, "miga-project.classif")
373
+ r.add_file(:medoids, "miga-project.medoids")
354
374
  r
355
375
  end
356
376
 
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 0, 9]
13
+ VERSION = [0.2, 1, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2016, 04, 13)
21
+ VERSION_DATE = Date.new(2016, 04, 20)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.