miga-base 0.2.0.9 → 0.2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -0
  3. data/actions/add_result.rb +37 -0
  4. data/actions/add_taxonomy.rb +63 -0
  5. data/actions/create_dataset.rb +49 -0
  6. data/actions/create_project.rb +46 -0
  7. data/actions/daemon.rb +50 -0
  8. data/actions/date.rb +14 -0
  9. data/actions/{download_dataset → download_dataset.rb} +5 -28
  10. data/actions/find_datasets.rb +41 -0
  11. data/actions/import_datasets.rb +47 -0
  12. data/actions/index_taxonomy.rb +46 -0
  13. data/actions/list_datasets.rb +50 -0
  14. data/actions/list_files.rb +43 -0
  15. data/actions/project_info.rb +40 -0
  16. data/actions/unlink_dataset.rb +28 -0
  17. data/bin/miga +129 -33
  18. data/lib/miga/daemon.rb +48 -34
  19. data/lib/miga/dataset.rb +7 -123
  20. data/lib/miga/dataset_result.rb +177 -0
  21. data/lib/miga/project.rb +32 -12
  22. data/lib/miga/version.rb +2 -2
  23. data/scripts/_distances_functions.bash +82 -0
  24. data/scripts/_distances_noref_nomulti.bash +96 -67
  25. data/scripts/_distances_ref_nomulti.bash +54 -85
  26. data/scripts/assembly.bash +16 -3
  27. data/scripts/clade_finding.bash +20 -18
  28. data/scripts/distances.bash +2 -1
  29. data/scripts/init.bash +2 -6
  30. data/scripts/subclades.bash +4 -5
  31. data/test/common_test.rb +2 -2
  32. data/test/daemon_test.rb +73 -1
  33. data/test/project_test.rb +26 -2
  34. data/test/taxonomy_test.rb +10 -0
  35. data/test/test_helper.rb +1 -1
  36. data/utils/subclades-compile.rb +4 -2
  37. data/utils/subclades.R +140 -158
  38. metadata +48 -44
  39. data/actions/add_result +0 -58
  40. data/actions/add_taxonomy +0 -83
  41. data/actions/create_dataset +0 -61
  42. data/actions/create_project +0 -67
  43. data/actions/daemon +0 -66
  44. data/actions/find_datasets +0 -61
  45. data/actions/import_datasets +0 -83
  46. data/actions/index_taxonomy +0 -68
  47. data/actions/list_datasets +0 -81
  48. data/actions/list_files +0 -63
  49. data/actions/unlink_dataset +0 -49
data/lib/miga/dataset.rb CHANGED
@@ -3,11 +3,14 @@
3
3
 
4
4
  require "miga/metadata"
5
5
  require "miga/result"
6
+ require "miga/dataset_result"
6
7
 
7
8
  ##
8
9
  # Dataset representation in MiGA.
9
10
  class MiGA::Dataset < MiGA::MiGA
10
11
 
12
+ include MiGA::DatasetResult
13
+
11
14
  # Class-level
12
15
 
13
16
  ##
@@ -52,7 +55,7 @@ class MiGA::Dataset < MiGA::MiGA
52
55
 
53
56
  ##
54
57
  # Tasks to be excluded from query datasets.
55
- @@EXCLUDE_NOREF_TASKS = [:essential_genes, :mytaxa_scan]
58
+ @@EXCLUDE_NOREF_TASKS = [:mytaxa_scan]
56
59
 
57
60
  ##
58
61
  # Tasks to be executed only in datasets that are not multi-organism. These
@@ -137,14 +140,14 @@ class MiGA::Dataset < MiGA::MiGA
137
140
  # Is this dataset known to be multi-organism?
138
141
  def is_multi?
139
142
  return false if self.metadata[:type].nil?
140
- return @@KNOWN_TYPES[self.metadata[:type]][:multi]
143
+ @@KNOWN_TYPES[self.metadata[:type]][:multi]
141
144
  end
142
145
 
143
146
  ##
144
147
  # Is this dataset known to be single-organism?
145
148
  def is_nonmulti?
146
149
  return false if self.metadata[:type].nil?
147
- return !@@KNOWN_TYPES[self.metadata[:type]][:multi]
150
+ !@@KNOWN_TYPES[self.metadata[:type]][:multi]
148
151
  end
149
152
 
150
153
  ##
@@ -176,7 +179,7 @@ class MiGA::Dataset < MiGA::MiGA
176
179
  return nil if @@RESULT_DIRS[result_type].nil?
177
180
  base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
178
181
  "/" + name
179
- return MiGA::Result.load(base + ".json") unless save
182
+ return MiGA::Result.load("#{base}.json") unless save
180
183
  return nil unless result_files_exist?(base, ".done")
181
184
  r = self.send("add_result_#{result_type}", base)
182
185
  r.save unless r.nil?
@@ -241,123 +244,4 @@ class MiGA::Dataset < MiGA::MiGA
241
244
  adv
242
245
  end
243
246
 
244
- private
245
-
246
- def add_result_raw_reads(base)
247
- return nil unless result_files_exist?(base, ".1.fastq")
248
- r = MiGA::Result.new(base + ".json")
249
- add_files_to_ds_result(r, name,
250
- ( result_files_exist?(base, ".2.fastq") ?
251
- {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
252
- {:single=>".1.fastq"} ))
253
- end
254
-
255
- def add_result_trimmed_reads(base)
256
- return nil unless result_files_exist?(base, ".1.clipped.fastq")
257
- r = MiGA::Result.new base + ".json"
258
- r = add_files_to_ds_result(r, name,
259
- {:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
260
- result_files_exist?(base, ".2.clipped.fastq")
261
- r.add_file(:single, name + ".1.clipped.single.fastq")
262
- add_result(:raw_reads) #-> Post gunzip
263
- r
264
- end
265
-
266
- def add_result_read_quality(base)
267
- return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
268
- r = MiGA::Result.new(base + ".json")
269
- r = add_files_to_ds_result(r, name,
270
- {:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
271
- add_result(:trimmed_reads) #-> Post cleaning
272
- r
273
- end
274
-
275
- def add_result_trimmed_fasta(base)
276
- return nil unless
277
- result_files_exist?(base, ".CoupledReads.fa") or
278
- result_files_exist?(base, ".SingleReads.fa")
279
- r = MiGA::Result.new base + ".json"
280
- r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
281
- :pair1=>".1.fa", :pair2=>".2.fa"}) if
282
- result_files_exist?(base, ".CoupledReads.fa")
283
- r.add_file(:single, name + ".SingleReads.fa")
284
- add_result(:raw_reads) #-> Post gzip
285
- r
286
- end
287
-
288
- def add_result_assembly(base)
289
- return nil unless result_files_exist?(base, ".LargeContigs.fna")
290
- r = MiGA::Result.new(base + ".json")
291
- add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
292
- :allcontigs=>".AllContigs.fna"})
293
- end
294
-
295
- def add_result_cds(base)
296
- return nil unless result_files_exist?(base, %w[.faa .fna])
297
- r = MiGA::Result.new(base + ".json")
298
- add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
299
- :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
300
- end
301
-
302
- def add_result_essential_genes(base)
303
- return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
304
- r = MiGA::Result.new(base + ".json")
305
- add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
306
- :collection=>".ess", :report=>".ess/log"})
307
- end
308
-
309
- def add_result_ssu(base)
310
- return MiGA::Result.new(base + ".json") if result(:assembly).nil?
311
- return nil unless result_files_exist?(base, ".ssu.fa")
312
- r = MiGA::Result.new(base + ".json")
313
- add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
314
- :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
315
- end
316
-
317
- def add_result_mytaxa(base)
318
- if is_multi?
319
- return nil unless result_files_exist?(base, ".mytaxa")
320
- r = MiGA::Result.new(base + ".json")
321
- add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
322
- :mytaxain=>".mytaxain"})
323
- else
324
- MiGA::Result.new base + ".json"
325
- end
326
- end
327
-
328
- def add_result_mytaxa_scan(base)
329
- if is_nonmulti?
330
- return nil unless
331
- result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
332
- r = MiGA::Result.new(base + ".json")
333
- add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
334
- :blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
335
- :regions=>".reg", :gene_ids=>".wintax.genes",
336
- :region_ids=>".wintax.regions"})
337
- else
338
- MiGA::Result.new base + ".json"
339
- end
340
- end
341
-
342
- def add_result_distances(base)
343
- if is_nonmulti?
344
- pref = File.dirname(base)
345
- return nil unless
346
- File.exist?("#{pref}/#{is_ref? ? "01.haai" : "02.aai"}/#{name}.db")
347
- r = MiGA::Result.new(base + ".json")
348
- r.add_files({:haai_db=>"01.haai/#{name}.db",
349
- :aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
350
- else
351
- r = MiGA::Result.new "#{base}.json"
352
- end
353
- r
354
- end
355
-
356
- def add_files_to_ds_result(r, name, rel_files)
357
- files = {}
358
- rel_files.each{ |k,v| files[k] = name + v }
359
- r.add_files(files)
360
- r
361
- end
362
-
363
247
  end # class MiGA::Dataset
@@ -0,0 +1,177 @@
1
+
2
+ ##
3
+ # Helper module including specific functions to add dataset results.
4
+ module MiGA::DatasetResult
5
+
6
+ private
7
+
8
+ ##
9
+ # Add result type +:raw_reads+ at +base+.
10
+ def add_result_raw_reads(base)
11
+ return nil unless result_files_exist?(base, ".1.fastq")
12
+ r = MiGA::Result.new(base + ".json")
13
+ add_files_to_ds_result(r, name,
14
+ ( result_files_exist?(base, ".2.fastq") ?
15
+ {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
16
+ {:single=>".1.fastq"} ))
17
+ end
18
+
19
+ ##
20
+ # Add result type +:trimmed_reads+ at +base+.
21
+ def add_result_trimmed_reads(base)
22
+ return nil unless result_files_exist?(base, ".1.clipped.fastq")
23
+ r = MiGA::Result.new base + ".json"
24
+ r = add_files_to_ds_result(r, name,
25
+ {:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
26
+ result_files_exist?(base, ".2.clipped.fastq")
27
+ r.add_file(:single, name + ".1.clipped.single.fastq")
28
+ add_result(:raw_reads) #-> Post gunzip
29
+ r
30
+ end
31
+
32
+ ##
33
+ # Add result type +:read_quality+ at +base+.
34
+ def add_result_read_quality(base)
35
+ return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
36
+ r = MiGA::Result.new(base + ".json")
37
+ r = add_files_to_ds_result(r, name,
38
+ {:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
39
+ add_result(:trimmed_reads) #-> Post cleaning
40
+ r
41
+ end
42
+
43
+ ##
44
+ # Add result type +:trimmed_fasta+ at +base+.
45
+ def add_result_trimmed_fasta(base)
46
+ return nil unless
47
+ result_files_exist?(base, ".CoupledReads.fa") or
48
+ result_files_exist?(base, ".SingleReads.fa") or
49
+ result_files_exist?(base, %w[.1.fasta .2.fasta])
50
+ r = MiGA::Result.new base + ".json"
51
+ r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
52
+ :single=>".SingleReads.fa", :pair1=>".1.fasta", :pair2=>".2.fasta"})
53
+ add_result(:raw_reads) #-> Post gzip
54
+ r
55
+ end
56
+
57
+ ##
58
+ # Add result type +:assembly+ at +base+.
59
+ def add_result_assembly(base)
60
+ return nil unless result_files_exist?(base, ".LargeContigs.fna")
61
+ r = MiGA::Result.new(base + ".json")
62
+ add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
63
+ :allcontigs=>".AllContigs.fna"})
64
+ end
65
+
66
+ ##
67
+ # Add result type +:cds+ at +base+.
68
+ def add_result_cds(base)
69
+ return nil unless result_files_exist?(base, %w[.faa .fna])
70
+ r = MiGA::Result.new(base + ".json")
71
+ add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
72
+ :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
73
+ end
74
+
75
+ ##
76
+ # Add result type +:essential_genes+ at +base+.
77
+ def add_result_essential_genes(base)
78
+ return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
79
+ r = MiGA::Result.new(base + ".json")
80
+ add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
81
+ :collection=>".ess", :report=>".ess/log"})
82
+ end
83
+
84
+ ##
85
+ # Add result type +:ssu+ at +base+.
86
+ def add_result_ssu(base)
87
+ return MiGA::Result.new(base + ".json") if result(:assembly).nil?
88
+ return nil unless result_files_exist?(base, ".ssu.fa")
89
+ r = MiGA::Result.new(base + ".json")
90
+ add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
91
+ :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
92
+ end
93
+
94
+ ##
95
+ # Add result type +:mytaxa+ at +base+.
96
+ def add_result_mytaxa(base)
97
+ if is_multi?
98
+ return nil unless result_files_exist?(base, ".mytaxa")
99
+ r = MiGA::Result.new(base + ".json")
100
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
101
+ :mytaxain=>".mytaxain"})
102
+ else
103
+ MiGA::Result.new base + ".json"
104
+ end
105
+ end
106
+
107
+ ##
108
+ # Add result type +:mytaxa_scan+ at +base+.
109
+ def add_result_mytaxa_scan(base)
110
+ if is_nonmulti?
111
+ return nil unless
112
+ result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
113
+ r = MiGA::Result.new(base + ".json")
114
+ add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
115
+ :blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
116
+ :regions=>".reg", :gene_ids=>".wintax.genes",
117
+ :region_ids=>".wintax.regions"})
118
+ else
119
+ MiGA::Result.new base + ".json"
120
+ end
121
+ end
122
+
123
+ ##
124
+ # Add result type +:distances+ at +base+.
125
+ def add_result_distances(base)
126
+ if is_nonmulti?
127
+ if is_ref?
128
+ add_result_distances_ref(base)
129
+ else
130
+ add_result_distances_nonref(base)
131
+ end
132
+ else
133
+ add_result_distances_multi(base)
134
+ end
135
+ end
136
+
137
+ ##
138
+ # Add result type +:distances+ for _multi_ datasets at +base+.
139
+ def add_result_distances_multi(base)
140
+ MiGA::Result.new "#{base}.json"
141
+ end
142
+
143
+ ##
144
+ # Add result type +:distances+ for _nonmulti_ reference datasets at +base+.
145
+ def add_result_distances_ref(base)
146
+ pref = File.dirname(base)
147
+ return nil unless
148
+ File.exist?("#{pref}/01.haai/#{name}.db")
149
+ r = MiGA::Result.new(base + ".json")
150
+ r.add_files({:haai_db=>"01.haai/#{name}.db",
151
+ :aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
152
+ r
153
+ end
154
+
155
+ ##
156
+ # Add result type +:distances+ for _nonmulti_ query datasets at +base+.
157
+ def add_result_distances_nonref(base)
158
+ return nil unless
159
+ result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
160
+ result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
161
+ r = MiGA::Result.new(base + ".json")
162
+ add_files_to_ds_result(r, name, {
163
+ :aai_medoids=>".aai-medoids.tsv",
164
+ :haai_db=>".haai.db", :aai_db=>".aai.db",
165
+ :ani_medoids=>".ani-medoids.tsv", :ani_db=>".ani.db"})
166
+ end
167
+
168
+ ##
169
+ # Add files in +rel_files+ Hash to the result +r+ with dataset name +name+.
170
+ def add_files_to_ds_result(r, name, rel_files)
171
+ files = {}
172
+ rel_files.each{ |k,v| files[k] = name + v }
173
+ r.add_files(files)
174
+ r
175
+ end
176
+
177
+ end
data/lib/miga/project.rb CHANGED
@@ -146,12 +146,26 @@ class MiGA::Project < MiGA::MiGA
146
146
  ##
147
147
  # Name of the project.
148
148
  def name ; metadata[:name] ; end
149
+
150
+ ##
151
+ # Type of project.
152
+ def type ; metadata[:type] ; end
153
+
154
+ ##
155
+ # Is this a clade project?
156
+ def is_clade? ; type==:clade ; end
149
157
 
150
158
  ##
151
159
  # Returns Array of MiGA::Dataset.
152
160
  def datasets
153
161
  metadata[:datasets].map{ |name| dataset(name) }
154
162
  end
163
+
164
+ ##
165
+ # Returns Array of String (without evaluating dataset objects).
166
+ def dataset_names
167
+ metadata[:datasets]
168
+ end
155
169
 
156
170
  ##
157
171
  # Returns MiGA::Dataset.
@@ -164,17 +178,24 @@ class MiGA::Project < MiGA::MiGA
164
178
  end
165
179
 
166
180
  ##
167
- # Iterate through datasets, with a single variable MiGA::Dataset passed to
168
- # +blk+.
181
+ # Iterate through datasets, with one or two variables passed to +blk+.
182
+ # If one, the dataset MiGA::Dataset object is passed. If two, the name and
183
+ # the dataset object are passed.
169
184
  def each_dataset(&blk)
170
- metadata[:datasets].each{ |name| blk.call(dataset(name)) }
185
+ metadata[:datasets].each do |name|
186
+ if blk.arity == 1
187
+ blk.call(dataset(name))
188
+ else
189
+ blk.call(name, dataset(name))
190
+ end
191
+ end
171
192
  end
172
193
 
173
194
  ##
174
195
  # Add dataset identified by +name+ and return MiGA::Dataset.
175
196
  def add_dataset(name)
176
197
  unless metadata[:datasets].include? name
177
- d = MiGA::Dataset.new(self, name)
198
+ MiGA::Dataset.new(self, name)
178
199
  @metadata[:datasets] << name
179
200
  save
180
201
  end
@@ -324,7 +345,9 @@ class MiGA::Project < MiGA::MiGA
324
345
 
325
346
  def add_result_clade_finding(base)
326
347
  return nil unless result_files_exist?(base,
327
- %w[.proposed-clades .pdf .1.classif .1.medoids .class.tsv .class.nwk])
348
+ %w[.proposed-clades])
349
+ return nil unless is_clade? or result_files_exist?(base,
350
+ %w[.pdf .classif .medoids .class.tsv .class.nwk])
328
351
  r = add_result_iter_clades(base)
329
352
  r.add_file(:aai_tree, "miga-project.aai.nwk")
330
353
  r.add_file(:proposal, "miga-project.proposed-clades")
@@ -335,9 +358,9 @@ class MiGA::Project < MiGA::MiGA
335
358
 
336
359
  def add_result_subclades(base)
337
360
  return nil unless result_files_exist?(base,
338
- %w[.pdf .1.classif .1.medoids .class.tsv .class.nwk])
361
+ %w[.pdf .classif .medoids .class.tsv .class.nwk])
339
362
  r = add_result_iter_clades(base)
340
- r.add_file(:ani_tree, "miga-project.ani.nwk")
363
+ r.add_file(:ani_tree, "miga-project.ani.nwk")
341
364
  r
342
365
  end
343
366
 
@@ -346,11 +369,8 @@ class MiGA::Project < MiGA::MiGA
346
369
  r.add_file(:report, "miga-project.pdf")
347
370
  r.add_file(:class_table, "miga-project.class.tsv")
348
371
  r.add_file(:class_tree, "miga-project.class.nwk")
349
- (1..6).each do |i|
350
- %w{classif medoids}.each do |m|
351
- r.add_file("#{m}_#{i}".to_sym, "miga-project.#{i}.#{m}")
352
- end
353
- end
372
+ r.add_file(:classif, "miga-project.classif")
373
+ r.add_file(:medoids, "miga-project.medoids")
354
374
  r
355
375
  end
356
376
 
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 0, 9]
13
+ VERSION = [0.2, 1, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2016, 04, 13)
21
+ VERSION_DATE = Date.new(2016, 04, 20)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.