miga-base 0.6.2.0 → 0.6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c317237c4cd2f049b56af6021272208c206f1810052dde4b1b5b41394f37139
4
- data.tar.gz: '066558242d9bcc1a96f166349815398296cdb976190853a18ca52bdafe84c263'
3
+ metadata.gz: 8b225951f374bcd267560e5bd8234fb88bcd6b0c11b0561fb4b3b479af39c4b3
4
+ data.tar.gz: 9b32d40ea94ceb526fe0ba732c77fce978b0cba5decffd4e1c0d701594670dbd
5
5
  SHA512:
6
- metadata.gz: 50b395740f68543e7aa79bf0e3631cb86248fffbcd9aac942e2d30681579a42e528d552638701f153605f68ea9da389762b6d068455e9b67cbfd9b66cde879c0
7
- data.tar.gz: d20fd84e71bd4d7a8ebf5ff1060e12096d78477b662098e013b88ea9a19e54b4665a71bbfa0e49fd57df47a61b3a0780ed469fcb004b1384dd4e4f2304320666
6
+ metadata.gz: b7fe9b2cbb09b6612b762c7c9202b4b27ece7a0b6f4dd23eecee9bddc835c130f15a63772011106a1f0f1425e5445fa1541a8ebc81661ef341dcacec3ae22193
7
+ data.tar.gz: 501fa797aa6726ac5cdc6c043a3073d03a7ba3ed81d63ebac9ef989a76aa1947806c9052ab50c089567001ad1b1b92ad04cc05ea4ca33691de36c4dcb2e34b52
@@ -11,20 +11,25 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
11
11
  opt.on(
12
12
  '-n', '--name STRING',
13
13
  'Name of the project'
14
- ){ |v| cli[:name] = v }
14
+ ) { |v| cli[:name] = v }
15
15
  opt.on(
16
16
  '-d', '--description STRING',
17
17
  'Description of the project'
18
- ){ |v| cli[:description] = v }
18
+ ) { |v| cli[:description] = v }
19
19
  opt.on(
20
20
  '-c', '--comments STRING',
21
21
  'Comments on the project'
22
- ){ |v| cli[:comments] = v }
22
+ ) { |v| cli[:comments] = v }
23
+ opt.on(
24
+ '--fast',
25
+ 'Use faster identity engines (Diamond-AAI and FastANI)',
26
+ 'Equivalent to: -m aai_p=diamond,ani_p=fastani'
27
+ ) { |v| cli[:fast] = v }
23
28
  opt.on(
24
29
  '-m', '--metadata STRING',
25
30
  'Metadata as key-value pairs separated by = and delimited by comma',
26
31
  'Values are saved as strings except for booleans (true / false) or nil'
27
- ){ |v| cli[:metadata] = v }
32
+ ) { |v| cli[:metadata] = v }
28
33
  end
29
34
  end
30
35
 
@@ -40,6 +45,10 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
40
45
  raise 'Project already exists, aborting.' if Project.exist? cli[:project]
41
46
  p = Project.new(cli[:project], false)
42
47
  p = cli.add_metadata(p)
48
+ if cli[:fast]
49
+ p.metadata[:aai_p] = 'diamond'
50
+ p.metadata[:ani_p] = 'fastani'
51
+ end
43
52
  p.save
44
53
  end
45
54
  end
@@ -9,10 +9,9 @@ require 'sqlite3'
9
9
  ##
10
10
  # Dataset representation in MiGA.
11
11
  class MiGA::Dataset < MiGA::MiGA
12
-
13
12
  include MiGA::Dataset::Result
14
13
  include MiGA::Dataset::Hooks
15
-
14
+
16
15
  # Class-level
17
16
  class << self
18
17
 
@@ -27,7 +26,7 @@ class MiGA::Dataset < MiGA::MiGA
27
26
  def INFO_FIELDS
28
27
  %w(name created updated type ref user description comments)
29
28
  end
30
-
29
+
31
30
  end
32
31
 
33
32
  # Instance-level
@@ -35,11 +34,11 @@ class MiGA::Dataset < MiGA::MiGA
35
34
  ##
36
35
  # MiGA::Project that contains the dataset.
37
36
  attr_reader :project
38
-
37
+
39
38
  ##
40
39
  # Datasets are uniquely identified by +name+ in a project.
41
40
  attr_reader :name
42
-
41
+
43
42
  ##
44
43
  # Create a MiGA::Dataset object in a +project+ MiGA::Project with a
45
44
  # uniquely identifying +name+. +is_ref+ indicates if the dataset is to
@@ -52,21 +51,25 @@ class MiGA::Dataset < MiGA::MiGA
52
51
  end
53
52
  @project = project
54
53
  @name = name
54
+ @metadata = nil
55
55
  metadata[:ref] = is_ref
56
56
  @metadata_future = [
57
57
  File.expand_path("metadata/#{name}.json", project.path),
58
58
  metadata
59
59
  ]
60
60
  save unless File.exist? @metadata_future[0]
61
- pull_hook :on_load
62
61
  end
63
62
 
64
63
  ##
65
- # MiGA::Metadata with information about the dataset.
64
+ # MiGA::Metadata with information about the dataset
66
65
  def metadata
67
- @metadata ||= MiGA::Metadata.new(*@metadata_future)
66
+ if @metadata.nil?
67
+ @metadata = MiGA::Metadata.new(*@metadata_future)
68
+ pull_hook :on_load
69
+ end
70
+ @metadata
68
71
  end
69
-
72
+
70
73
  ##
71
74
  # Save any changes you've made in the dataset.
72
75
  def save
@@ -74,11 +77,11 @@ class MiGA::Dataset < MiGA::MiGA
74
77
  metadata.save
75
78
  pull_hook :on_save
76
79
  end
77
-
80
+
78
81
  ##
79
82
  # Get the type of dataset as Symbol.
80
83
  def type ; metadata[:type] ; end
81
-
84
+
82
85
  ##
83
86
  # Delete the dataset with all it's contents (including results) and returns
84
87
  # nil.
@@ -103,7 +106,7 @@ class MiGA::Dataset < MiGA::MiGA
103
106
  self.metadata.save
104
107
  pull_hook :on_activate
105
108
  end
106
-
109
+
107
110
  ##
108
111
  # Get standard metadata values for the dataset as Array.
109
112
  def info
@@ -111,7 +114,7 @@ class MiGA::Dataset < MiGA::MiGA
111
114
  (k == 'name') ? self.name : metadata[k.to_sym]
112
115
  end
113
116
  end
114
-
117
+
115
118
  ##
116
119
  # Is this dataset a reference?
117
120
  def is_ref? ; !!metadata[:ref] ; end
@@ -119,14 +122,14 @@ class MiGA::Dataset < MiGA::MiGA
119
122
  ##
120
123
  # Is this dataset a query (non-reference)?
121
124
  def is_query? ; !metadata[:ref] ; end
122
-
125
+
123
126
  ##
124
127
  # Is this dataset known to be multi-organism?
125
128
  def is_multi?
126
129
  return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
127
130
  @@KNOWN_TYPES[type][:multi]
128
131
  end
129
-
132
+
130
133
  ##
131
134
  # Is this dataset known to be single-organism?
132
135
  def is_nonmulti?
@@ -139,7 +142,7 @@ class MiGA::Dataset < MiGA::MiGA
139
142
  def is_active?
140
143
  metadata[:inactive].nil? or !metadata[:inactive]
141
144
  end
142
-
145
+
143
146
  ##
144
147
  # Should I ignore +task+ for this dataset?
145
148
  def ignore_task?(task)
@@ -24,10 +24,10 @@ module MiGA::Dataset::Result
24
24
  end
25
25
 
26
26
  ##
27
- # For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result
28
- def each_result(&blk)
29
- @@RESULT_DIRS.keys.each do |k|
30
- blk.call(k, result(k)) unless result(k).nil?
27
+ # For each result executes the 2-ary block: key symbol and MiGA::Result
28
+ def each_result
29
+ @@RESULT_DIRS.each_key do |k|
30
+ yield(k, result(k)) unless result(k).nil?
31
31
  end
32
32
  end
33
33
 
@@ -50,20 +50,20 @@ module MiGA::Dataset::Result
50
50
  r_pre = MiGA::Result.load("#{base}.json")
51
51
  return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
52
52
  end
53
- r = if File.exist?("#{base}.done")
54
- self.send("add_result_#{result_type}", base, opts)
55
- end
56
- unless r.nil?
57
- r.save
58
- pull_hook(:on_result_ready, result_type)
59
- end
53
+ fun = "add_result_#{result_type}"
54
+ r = send(fun, base, opts) if File.exist?("#{base}.done")
55
+ return if r.nil?
56
+ r.save
57
+ pull_hook(:on_result_ready, result_type)
60
58
  r
61
59
  end
62
60
 
63
61
  ##
64
62
  # Gets a result as MiGA::Result for the datasets with +result_type+. This is
65
63
  # equivalent to +add_result(result_type, false)+.
66
- def get_result(result_type) ; add_result(result_type, false) ; end
64
+ def get_result(result_type)
65
+ add_result(result_type, false)
66
+ end
67
67
 
68
68
  ##
69
69
  # Returns the key symbol of the first registered result (sorted by the
@@ -179,7 +179,7 @@ module MiGA::Dataset::Result
179
179
  r = get_result(:distances)
180
180
  ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
181
181
  return if r.nil?
182
- [:haai_db, :aai_db, :ani_db].each do |db_type|
182
+ %i[haai_db aai_db ani_db].each do |db_type|
183
183
  db = r.file_path(db_type)
184
184
  next if db.nil? || !File.size?(db)
185
185
  sqlite_db = SQLite3::Database.new db
@@ -198,9 +198,8 @@ module MiGA::Dataset::Result
198
198
  # Add result type +:raw_reads+ at +base+ (no +_opts+ supported)
199
199
  def add_result_raw_reads(base, _opts)
200
200
  return nil unless result_files_exist?(base, '.1.fastq')
201
- r = MiGA::Result.new("#{base}.json")
202
201
  add_files_to_ds_result(
203
- r, name,
202
+ MiGA::Result.new("#{base}.json"), name,
204
203
  if result_files_exist?(base, '.2.fastq')
205
204
  { pair1: '.1.fastq', pair2: '.2.fastq' }
206
205
  else
@@ -213,25 +212,30 @@ module MiGA::Dataset::Result
213
212
  # Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported)
214
213
  def add_result_trimmed_reads(base, _opts)
215
214
  return nil unless result_files_exist?(base, '.1.clipped.fastq')
216
- r = MiGA::Result.new("#{base}.json")
217
- if result_files_exist?(base, '.2.clipped.fastq')
218
- r = add_files_to_ds_result(r, name,
219
- pair1: '.1.clipped.fastq',
220
- pair2: '.2.clipped.fastq',
221
- single: '.1.clipped.single.fastq')
222
- else
223
- r = add_files_to_ds_result(r, name, single: '.1.clipped.fastq')
215
+ add_files_to_ds_result(
216
+ MiGA::Result.new("#{base}.json"), name,
217
+ if result_files_exist?(base, '.2.clipped.fastq')
218
+ {
219
+ pair1: '.1.clipped.fastq',
220
+ pair2: '.2.clipped.fastq',
221
+ single: '.1.clipped.single.fastq'
222
+ }
223
+ else
224
+ { single: '.1.clipped.fastq' }
225
+ end
226
+ ).tap do |r|
227
+ r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
224
228
  end
225
- r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
226
- r
227
229
  end
228
230
 
229
231
  ##
230
232
  # Add result type +:read_quality+ at +base+ (no +_opts+ supported)
231
233
  def add_result_read_quality(base, _opts)
232
234
  return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
233
- r = MiGA::Result.new("#{base}.json")
234
- add_files_to_ds_result(r, name, solexaqa: '.solexaqa', fastqc: '.fastqc')
235
+ add_files_to_ds_result(
236
+ MiGA::Result.new("#{base}.json"), name,
237
+ solexaqa: '.solexaqa', fastqc: '.fastqc'
238
+ )
235
239
  end
236
240
 
237
241
  ##
@@ -241,13 +245,13 @@ module MiGA::Dataset::Result
241
245
  result_files_exist?(base, '.CoupledReads.fa') ||
242
246
  result_files_exist?(base, '.SingleReads.fa') ||
243
247
  result_files_exist?(base, %w[.1.fasta .2.fasta])
244
- r = MiGA::Result.new("#{base}.json")
245
248
  add_files_to_ds_result(
246
- r, name,
249
+ MiGA::Result.new("#{base}.json"), name,
247
250
  coupled: '.CoupledReads.fa',
248
251
  single: '.SingleReads.fa',
249
252
  pair1: '.1.fasta',
250
- pair2: '.2.fasta')
253
+ pair2: '.2.fasta'
254
+ )
251
255
  end
252
256
 
253
257
  ##
@@ -255,12 +259,12 @@ module MiGA::Dataset::Result
255
259
  # +is_clean: Boolean+.
256
260
  def add_result_assembly(base, opts)
257
261
  return nil unless result_files_exist?(base, '.LargeContigs.fna')
258
- r = MiGA::Result.new("#{base}.json")
259
262
  r = add_files_to_ds_result(
260
- r, name,
263
+ MiGA::Result.new("#{base}.json"), name,
261
264
  largecontigs: '.LargeContigs.fna',
262
265
  allcontigs: '.AllContigs.fna',
263
- assembly_data: '')
266
+ assembly_data: ''
267
+ )
264
268
  opts[:is_clean] ||= false
265
269
  r.clean! if opts[:is_clean]
266
270
  unless r.clean?
@@ -274,14 +278,14 @@ module MiGA::Dataset::Result
274
278
  # Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
275
279
  def add_result_cds(base, opts)
276
280
  return nil unless result_files_exist?(base, %w[.faa])
277
- r = MiGA::Result.new("#{base}.json")
278
281
  r = add_files_to_ds_result(
279
- r, name,
282
+ MiGA::Result.new("#{base}.json"), name,
280
283
  proteins: '.faa',
281
284
  genes: '.fna',
282
285
  gff2: '.gff2',
283
286
  gff3: '.gff3',
284
- tab: '.tab')
287
+ tab: '.tab'
288
+ )
285
289
  opts[:is_clean] ||= false
286
290
  r.clean! if opts[:is_clean]
287
291
  unless r.clean?
@@ -296,13 +300,13 @@ module MiGA::Dataset::Result
296
300
  # Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
297
301
  def add_result_essential_genes(base, _opts)
298
302
  return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
299
- r = MiGA::Result.new("#{base}.json")
300
303
  add_files_to_ds_result(
301
- r, name,
304
+ MiGA::Result.new("#{base}.json"), name,
302
305
  ess_genes: '.ess.faa',
303
306
  collection: '.ess',
304
307
  report: '.ess/log',
305
- alignments: '.ess/proteins.aln')
308
+ alignments: '.ess/proteins.aln'
309
+ )
306
310
  end
307
311
 
308
312
  ##
@@ -310,12 +314,12 @@ module MiGA::Dataset::Result
310
314
  def add_result_ssu(base, opts)
311
315
  return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
312
316
  return nil unless result_files_exist?(base, '.ssu.fa')
313
- r = MiGA::Result.new("#{base}.json")
314
317
  r = add_files_to_ds_result(
315
- r, name,
318
+ MiGA::Result.new("#{base}.json"), name,
316
319
  longest_ssu_gene: '.ssu.fa',
317
320
  gff: '.ssu.gff',
318
- all_ssu_genes: '.ssu.all.fa')
321
+ all_ssu_genes: '.ssu.all.fa'
322
+ )
319
323
  opts[:is_clean] ||= false
320
324
  r.clean! if opts[:is_clean]
321
325
  unless r.clean?
@@ -332,9 +336,8 @@ module MiGA::Dataset::Result
332
336
  return nil unless
333
337
  result_files_exist?(base, '.mytaxa') ||
334
338
  result_files_exist?(base, '.nomytaxa.txt')
335
- r = MiGA::Result.new("#{base}.json")
336
339
  add_files_to_ds_result(
337
- r, name,
340
+ MiGA::Result.new("#{base}.json"), name,
338
341
  mytaxa: '.mytaxa',
339
342
  blast: '.blast',
340
343
  mytaxain: '.mytaxain',
@@ -344,7 +347,8 @@ module MiGA::Dataset::Result
344
347
  phylum: '.mytaxa.Phylum.txt',
345
348
  innominate: '.mytaxa.innominate',
346
349
  kronain: '.mytaxa.krona',
347
- krona: '.html')
350
+ krona: '.html'
351
+ )
348
352
  else
349
353
  MiGA::Result.new("#{base}.json")
350
354
  end
@@ -357,9 +361,8 @@ module MiGA::Dataset::Result
357
361
  return nil unless
358
362
  result_files_exist?(base, %w[.pdf .mytaxa]) ||
359
363
  result_files_exist?(base, '.nomytaxa.txt')
360
- r = MiGA::Result.new("#{base}.json")
361
364
  add_files_to_ds_result(
362
- r, name,
365
+ MiGA::Result.new("#{base}.json"), name,
363
366
  nomytaxa: '.nomytaxa.txt',
364
367
  mytaxa: '.mytaxa',
365
368
  report: '.pdf',
@@ -370,7 +373,8 @@ module MiGA::Dataset::Result
370
373
  wintax: '.wintax',
371
374
  gene_ids: '.wintax.genes',
372
375
  region_ids: '.wintax.regions',
373
- regions: '.reg')
376
+ regions: '.reg'
377
+ )
374
378
  else
375
379
  MiGA::Result.new("#{base}.json")
376
380
  end
@@ -428,9 +432,8 @@ module MiGA::Dataset::Result
428
432
  return nil unless
429
433
  result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) ||
430
434
  result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
431
- r = MiGA::Result.new("#{base}.json")
432
435
  add_files_to_ds_result(
433
- r, name,
436
+ MiGA::Result.new("#{base}.json"), name,
434
437
  aai_medoids: '.aai-medoids.tsv',
435
438
  haai_db: '.haai.db',
436
439
  aai_db: '.aai.db',
@@ -438,7 +441,8 @@ module MiGA::Dataset::Result
438
441
  ani_db: '.ani.db',
439
442
  ref_tree: '.nwk',
440
443
  ref_tree_pdf: '.nwk.pdf',
441
- intax_test: '.intax.txt')
444
+ intax_test: '.intax.txt'
445
+ )
442
446
  end
443
447
 
444
448
  ##
@@ -27,7 +27,7 @@ class MiGA::Metadata < MiGA::MiGA
27
27
 
28
28
  ##
29
29
  # Initiate a MiGA::Metadata object with description in +path+.
30
- # It will create it if it doesn't exist
30
+ # It will create it if it doesn't exist.
31
31
  def initialize(path, defaults = {})
32
32
  @data = nil
33
33
  @path = File.absolute_path(path)
@@ -39,21 +39,21 @@ class MiGA::Metadata < MiGA::MiGA
39
39
  end
40
40
 
41
41
  ##
42
- # Parsed data as a Hash.
42
+ # Parsed data as a Hash
43
43
  def data
44
44
  self.load if @data.nil?
45
45
  @data
46
46
  end
47
47
 
48
48
  ##
49
- # Reset :created field and save the current data.
49
+ # Reset :created field and save the current data
50
50
  def create
51
51
  self[:created] = Time.now.to_s
52
52
  save
53
53
  end
54
54
 
55
55
  ##
56
- # Save the metadata into #path.
56
+ # Save the metadata into #path
57
57
  def save
58
58
  MiGA.DEBUG "Metadata.save #{path}"
59
59
  self[:updated] = Time.now.to_s
@@ -78,7 +78,7 @@ class MiGA::Metadata < MiGA::MiGA
78
78
  end
79
79
 
80
80
  ##
81
- # (Re-)load metadata stored in #path.
81
+ # (Re-)load metadata stored in #path
82
82
  def load
83
83
  sleeper = 0.0
84
84
  while File.exist? lock_file
@@ -87,11 +87,11 @@ class MiGA::Metadata < MiGA::MiGA
87
87
  end
88
88
  tmp = MiGA::Json.parse(path, additions: true)
89
89
  @data = {}
90
- tmp.each_pair{ |k,v| self[k] = v }
90
+ tmp.each { |k, v| self[k] = v }
91
91
  end
92
92
 
93
93
  ##
94
- # Delete file at #path.
94
+ # Delete file at #path
95
95
  def remove!
96
96
  MiGA.DEBUG "Metadata.remove! #{path}"
97
97
  File.unlink(path)
@@ -99,29 +99,41 @@ class MiGA::Metadata < MiGA::MiGA
99
99
  end
100
100
 
101
101
  ##
102
- # Lock file for the metadata.
103
- def lock_file ; "#{path}.lock" ; end
102
+ # Lock file for the metadata
103
+ def lock_file
104
+ "#{path}.lock"
105
+ end
104
106
 
105
107
  ##
106
- # Return the value of +k+ in #data.
107
- def [](k) data[k.to_sym] end
108
+ # Return the value of +k+ in #data
109
+ def [](k)
110
+ data[k.to_sym]
111
+ end
108
112
 
109
113
  ##
110
- # Set the value of +k+ to +v+.
111
- def []=(k,v)
114
+ # Set the value of +k+ to +v+
115
+ def []=(k, v)
112
116
  self.load if @data.nil?
113
117
  k = k.to_sym
114
- # Protect the special field :name
115
- v=v.miga_name if k==:name
116
- # Symbolize the special field :type
117
- v=v.to_sym if k==:type
118
- # Delete if nil, register, and return
119
- v.nil? ? @data.delete(k) : (@data[k]=v)
118
+ return @data.delete(k) if v.nil?
119
+
120
+ case k
121
+ when :name
122
+ # Protect the special field :name
123
+ v = v.miga_name
124
+ when :type
125
+ # Symbolize the special field :type
126
+ v = v.to_sym if k == :type
127
+ end
128
+
129
+ @data[k] = v
120
130
  end
121
131
 
122
132
  ##
123
- # Iterate +blk+ for each data with 2 arguments key and value.
124
- def each(&blk) data.each{ |k,v| blk.call(k,v) } ; end
133
+ # Iterate +blk+ for each data with 2 arguments: key and value
134
+ def each(&blk)
135
+ data.each { |k, v| blk.call(k, v) }
136
+ end
125
137
 
126
138
  ##
127
139
  # Show contents in JSON format as a String
@@ -30,16 +30,16 @@ module MiGA::Result::Dates
30
30
 
31
31
  private
32
32
 
33
- ##
34
- # Internal function to detect start and end dates
35
- def date_at(event)
36
- date = self[event]
37
- date ||= self[:started] if event == :start
38
- if date.nil?
39
- f = path event
40
- date = File.read(f) if File.size? f
41
- end
42
- date.nil? ? nil : Time.parse(date)
33
+ ##
34
+ # Internal function to detect start and end dates
35
+ def date_at(event)
36
+ date = self[event]
37
+ date ||= self[:started] if event == :start
38
+ if date.nil?
39
+ f = path event
40
+ date = File.read(f) if File.size? f
43
41
  end
42
+ Time.parse(date) unless date.nil?
43
+ end
44
44
  end
45
45
 
@@ -1,66 +1,82 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/common"
5
- require "miga/taxonomy"
6
- require "zlib"
4
+ require 'miga/common'
5
+ require 'miga/taxonomy'
6
+ require 'zlib'
7
7
 
8
8
  ##
9
9
  # Methods for taxonomy identification based on AAI/ANI values.
10
10
  module MiGA::TaxDist
11
-
12
- ##
13
- # Absolute path to the :intax or :novel data file (determined by +test+) for
14
- # AAI.
15
- def self.aai_path(test)
16
- test = test.downcase.to_sym
17
- return nil unless [:intax, :novel].include? test
18
- File.expand_path("../_data/aai-#{test}.tsv.gz", __FILE__)
19
- end
11
+ # Class-level
12
+ class << self
13
+
14
+ ##
15
+ # Absolute path to the :intax or :novel data file (determined by +test+) for
16
+ # AAI, determined for options +opts+. Supported options:
17
+ # - +:engine+: The search engine for AAI: +:blast+ (default) or +:diamond+
18
+ def aai_path(test, opts = {})
19
+ opts[:engine] ||= :blast
20
+ engine = opts[:engine].to_s.downcase.to_sym
21
+ test = test.to_s.downcase.to_sym
22
+ return nil unless %i[intax novel].include? test
23
+ engine = :blast if %i[blast+ blat].include? engine
24
+ return nil unless %i[blast diamond].include? engine
25
+ File.expand_path("../_data/aai-#{test}-#{engine}.tsv.gz", __FILE__)
26
+ end
20
27
 
21
- # Returns a Hash, where the keys correspond to the taxonomic level
22
- # (see MiGA::Taxonomy.LONG_RANKS for the meanings), and the values correspond
23
- # to the p-values of being :intax or :novel, as determined by +test+.
24
- def self.aai_pvalues(aai, test)
25
- Zlib::GzipReader.open(aai_path(test)) do |fh|
26
- keys = nil
27
- fh.each_line do |ln|
28
- row = ln.chomp.split(/\t/)
29
- if fh.lineno==1
30
- keys = row[1, row.size-1].map{ |i| i.to_i }
31
- elsif row.shift.to_f >= aai
32
- vals = {}
33
- keys.each do |i|
34
- v = row.shift
35
- next if v=="NA"
36
- rank = i==0 ? :root : MiGA::Taxonomy.KNOWN_RANKS[i-1]
37
- vals[rank] = v.to_f
28
+ ##
29
+ # Returns a Hash, where the keys correspond to the taxonomic level
30
+ # (see MiGA::Taxonomy.LONG_RANKS for the meanings), and the values
31
+ # correspond to the p-values of +test+ (one of +:intax+ or +:novel+)
32
+ # with options +opts+. See +aai_path+ for supported options.
33
+ def aai_pvalues(aai, test, opts = {})
34
+ Zlib::GzipReader.open(aai_path(test, opts)) do |fh|
35
+ keys = nil
36
+ fh.each_line do |ln|
37
+ row = ln.chomp.split(/\t/)
38
+ if fh.lineno == 1
39
+ keys = row[1, row.size - 1].map(&:to_i)
40
+ elsif row.shift.to_f >= aai
41
+ vals = {}
42
+ keys.each do |i|
43
+ v = row.shift
44
+ next if v == 'NA' # <- missing data
45
+ next if i == 1 # <- namespace, not a taxonomic rank
46
+ rank = i.zero? ? :root : MiGA::Taxonomy.KNOWN_RANKS[i]
47
+ vals[rank] = v.to_f
48
+ end
49
+ return vals
38
50
  end
39
- return vals
40
51
  end
41
- end # each_line ln
42
- end # open fh
43
- {}
44
- end
52
+ end
53
+ {}
54
+ end
45
55
 
46
- # Determines the degree to which a Float +aai+ value indicates similar
47
- # taxonomy (with +test+ :intax) or a novel taxon (with +test+ :novel). Returns
48
- # a Hash with "likelihood" phrases as keys and values as an array with
49
- # cannonical rank (as in MiGA::Taxonomy) and estimated p-value.
50
- def self.aai_taxtest(aai, test)
51
- meaning = {most_likely:[0,0.01],probably:[0.01,0.1],possibly_even:[0.1,0.5]}
52
- pv = aai_pvalues(aai, test)
53
- out = {}
54
- meaning.each do |phrase, thresholds|
55
- lwr, upr = thresholds
56
- min = pv.values.select{ |v| v < upr }.max
57
- return out if min.nil?
58
- if min >= lwr
59
- v = pv.select{ |_,vj| vj==min }
60
- out[phrase] = (test==:intax ? v.reverse_each : v).first
56
+ ##
57
+ # Determines the degree to which a Float +aai+ value indicates similar
58
+ # taxonomy (with +test+ :intax) or a novel taxon (with +test+ :novel) with
59
+ # options +opts+. See +aai_path+ for supported options.
60
+ # Returns a Hash with "likelihood" phrases as keys and values as an array
61
+ # with cannonical rank (as in MiGA::Taxonomy) and estimated p-value.
62
+ def aai_taxtest(aai, test, opts = {})
63
+ meaning = {
64
+ most_likely: [0.00, 0.01],
65
+ probably: [0.01, 0.10],
66
+ possibly_even: [0.10, 0.50]
67
+ }
68
+ pvalues = aai_pvalues(aai, test, opts)
69
+ out = {}
70
+ meaning.each do |phrase, thresholds|
71
+ lwr, upr = thresholds
72
+ min = pvalues.values.select { |v| v < upr }.max
73
+ return out if min.nil?
74
+ if min >= lwr
75
+ v = pvalues.select { |_, vj| vj == min }
76
+ out[phrase] = (test == :intax ? v.reverse_each : v).first
77
+ end
61
78
  end
79
+ out
62
80
  end
63
- out
64
81
  end
65
-
66
82
  end
@@ -35,7 +35,6 @@ class MiGA::Taxonomy < MiGA::MiGA
35
35
  else
36
36
  initialize_by_ranks(str, ranks)
37
37
  end
38
- initialize_by_str(str)
39
38
  end
40
39
 
41
40
  ##
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.6, 2, 0]
13
+ VERSION = [0.6, 3, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2020, 3, 11)
21
+ VERSION_DATE = Date.new(2020, 3, 27)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -0,0 +1,119 @@
1
+ require 'test_helper'
2
+ require 'miga/project'
3
+
4
+ class ResultStatsTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ $tmp = Dir.mktmpdir
8
+ ENV['MIGA_HOME'] = $tmp
9
+ FileUtils.touch(File.expand_path('.miga_rc', ENV['MIGA_HOME']))
10
+ FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
11
+ $p = MiGA::Project.new(File.expand_path('project1', $tmp))
12
+ $d = $p.add_dataset('dataset1')
13
+ end
14
+
15
+ def teardown
16
+ FileUtils.rm_rf $tmp
17
+ ENV['MIGA_HOME'] = nil
18
+ end
19
+
20
+ def file_path(dir, ext)
21
+ File.join($p.path, dir, "#{$d.name}#{ext}")
22
+ end
23
+
24
+ def touch_done(dir)
25
+ FileUtils.touch(file_path(dir, '.done'))
26
+ end
27
+
28
+ def test_single_raw_reads
29
+ dir = 'data/01.raw_reads'
30
+ fq = file_path(dir, '.1.fastq')
31
+ File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
32
+ touch_done(dir)
33
+ r = $d.add_result(:raw_reads)
34
+ assert_equal({}, r[:stats])
35
+ r.compute_stats
36
+ assert(!r[:stats].empty?)
37
+ assert_equal(Hash, r[:stats].class)
38
+ assert_equal(1, r[:stats][:reads])
39
+ assert_equal([40.0, '%'], r[:stats][:g_c_content])
40
+ end
41
+
42
+ def test_coupled_raw_reads
43
+ dir = 'data/01.raw_reads'
44
+ fq = file_path(dir, '.1.fastq')
45
+ File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
46
+ fq = file_path(dir, '.2.fastq')
47
+ File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
48
+ touch_done(dir)
49
+ r = $d.add_result(:raw_reads)
50
+ r.compute_stats
51
+ assert(!r[:stats].empty?)
52
+ assert_nil(r[:stats][:reads])
53
+ assert_equal(1, r[:stats][:read_pairs])
54
+ assert_equal([40.0, '%'], r[:stats][:reverse_g_c_content])
55
+ end
56
+
57
+ def test_trimmed_reads
58
+ dir = 'data/02.trimmed_reads'
59
+ FileUtils.touch(file_path(dir, '.1.clipped.fastq'))
60
+ touch_done(dir)
61
+ r = $d.add_result(:trimmed_reads)
62
+ assert_equal({}, r[:stats])
63
+ r.compute_stats
64
+ assert_equal({}, r[:stats])
65
+ end
66
+
67
+ def test_read_quality
68
+ dir = 'data/03.read_quality'
69
+ Dir.mkdir(file_path(dir, '.solexaqa'))
70
+ Dir.mkdir(file_path(dir, '.fastqc'))
71
+ touch_done(dir)
72
+ r = $d.add_result(:read_quality)
73
+ assert_equal({}, r[:stats])
74
+ r.compute_stats
75
+ assert_equal({}, r[:stats])
76
+ end
77
+
78
+ def test_trimmed_fasta
79
+ dir = 'data/04.trimmed_fasta'
80
+ fa = file_path(dir, '.CoupledReads.fa')
81
+ File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
82
+ touch_done(dir)
83
+ r = $d.add_result(:trimmed_fasta)
84
+ assert_equal({}, r[:stats])
85
+ r.compute_stats
86
+ assert_equal(1, r[:stats][:reads])
87
+ assert_equal([40.0, '%'], r[:stats][:g_c_content])
88
+ end
89
+
90
+ def test_assembly
91
+ dir = 'data/05.assembly'
92
+ fa = file_path(dir, '.LargeContigs.fna')
93
+ File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
94
+ touch_done(dir)
95
+ r = $d.add_result(:assembly)
96
+ assert_equal({}, r[:stats])
97
+ r.compute_stats
98
+ assert_equal(1, r[:stats][:contigs])
99
+ assert_equal([5, 'bp'], r[:stats][:total_length])
100
+ assert_equal([40.0, '%'], r[:stats][:g_c_content])
101
+ end
102
+
103
+ def test_cds
104
+ dir = 'data/06.cds'
105
+ fa = file_path(dir, '.faa')
106
+ File.open(fa, 'w') { |fh| fh.puts '>1','M' }
107
+ touch_done(dir)
108
+ r = $d.add_result(:cds)
109
+ assert_equal({}, r[:stats])
110
+ r.compute_stats
111
+ assert_equal(1, r[:stats][:predicted_proteins])
112
+ assert_equal([1.0, 'aa'], r[:stats][:average_length])
113
+ assert_nil(r[:stats][:coding_density])
114
+ test_assembly
115
+ r.compute_stats
116
+ assert_equal([60.0, '%'], r[:stats][:coding_density])
117
+ end
118
+
119
+ end
@@ -10,14 +10,14 @@ class ResultTest < Test::Unit::TestCase
10
10
  FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
11
11
  $p1 = MiGA::Project.new(File.expand_path('project1', $tmp))
12
12
  $d1 = $p1.add_dataset('dataset1')
13
- FileUtils.touch(File.expand_path(
14
- "data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq", $p1.path))
15
- FileUtils.touch(File.expand_path(
16
- "data/02.trimmed_reads/#{$d1.name}.done", $p1.path))
17
- FileUtils.touch(File.expand_path(
18
- 'data/10.clades/01.find/miga-project.empty', $p1.path))
19
- FileUtils.touch(File.expand_path(
20
- 'data/10.clades/01.find/miga-project.done', $p1.path))
13
+ FileUtils.touch(
14
+ File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq"))
15
+ FileUtils.touch(
16
+ File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done"))
17
+ FileUtils.touch(
18
+ File.join($p1.path, 'data/10.clades/01.find/miga-project.empty'))
19
+ FileUtils.touch(
20
+ File.join($p1.path, 'data/10.clades/01.find/miga-project.done'))
21
21
  end
22
22
 
23
23
  def teardown
@@ -46,4 +46,18 @@ class ResultTest < Test::Unit::TestCase
46
46
  assert_equal($p1.path, r.source.path)
47
47
  end
48
48
 
49
+ def test_dates
50
+ r = $d1.add_result(:trimmed_reads)
51
+ assert_nil(r.done_at)
52
+ assert_nil(r.started_at)
53
+ tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done")
54
+ File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,5) }
55
+ assert_equal(Time, r.done_at.class)
56
+ assert_nil(r.running_time)
57
+ tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.start")
58
+ File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,0) }
59
+ r = $d1.add_result(:trimmed_reads)
60
+ assert_equal(5.0, r.running_time)
61
+ end
62
+
49
63
  end
@@ -0,0 +1,59 @@
1
+ require 'test_helper'
2
+ require 'miga/tax_dist'
3
+
4
+ class TaxDistTest < Test::Unit::TestCase
5
+
6
+ def test_aai_path
7
+ assert(File.size? MiGA::TaxDist.aai_path(:intax))
8
+ assert(File.size? MiGA::TaxDist.aai_path(:novel))
9
+ assert(File.size? MiGA::TaxDist.aai_path(:intax, engine: :diamond))
10
+ assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :blast))
11
+ assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :'blast+'))
12
+ end
13
+
14
+ def test_aai_pvalues
15
+ distant_intax = MiGA::TaxDist.aai_pvalues(35.0, :intax)
16
+ assert_lt(distant_intax[:root], 0.05)
17
+ assert_gt(distant_intax[:g], 0.05)
18
+ assert_nil(distant_intax[:ns])
19
+
20
+ close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :blast)
21
+ assert_lt(close_intax[:root], 0.05)
22
+ assert_lt(close_intax[:s], 0.05)
23
+
24
+ close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :diamond)
25
+ assert_lt(close_intax[:root], 0.05)
26
+ assert_lt(close_intax[:s], 0.05)
27
+
28
+ distant_novel = MiGA::TaxDist.aai_pvalues(35.0, :novel, engine: :diamond)
29
+ $stderr.puts distant_novel
30
+ assert_gt(distant_novel[:root], 0.05)
31
+ assert_lt(distant_novel[:g], 0.05)
32
+ assert_nil(distant_novel[:ns])
33
+
34
+ close_novel = MiGA::TaxDist.aai_pvalues(99.0, :novel)
35
+ assert_gt(close_novel[:root], 0.05)
36
+ assert_gt(close_novel[:f], 0.05)
37
+
38
+ assert_equal({}, MiGA::TaxDist.aai_pvalues(101.0, :intax))
39
+ end
40
+
41
+ def test_aai_taxtest
42
+ distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :diamond)
43
+ assert_equal(:root, distant_intax[:most_likely][0])
44
+ assert_nil(distant_intax[:probably])
45
+ assert_nil(distant_intax[:possibly_even])
46
+
47
+ distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :blast)
48
+ assert_equal(:root, distant_intax[:most_likely][0])
49
+ assert_nil(distant_intax[:probably])
50
+ assert_nil(distant_intax[:possibly_even])
51
+
52
+ close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :diamond)
53
+ assert_equal(:s, close_intax[:probably][0])
54
+
55
+ close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :blast)
56
+ assert_equal(:s, close_intax[:probably][0])
57
+ end
58
+
59
+ end
@@ -3,6 +3,7 @@ SimpleCov.start
3
3
 
4
4
  require 'rubygems'
5
5
  require 'test/unit'
6
+ require 'assertions'
6
7
  require 'miga/common'
7
8
  require 'stringio'
8
9
 
@@ -63,12 +63,12 @@ module MiGA::DistanceRunner::Commands
63
63
 
64
64
  ##
65
65
  # Execute an AAI command
66
- def aai_cmd(f1, f2, n1, n2, db, o={})
66
+ def aai_cmd(f1, f2, n1, n2, db, o = {})
67
67
  o = opts.merge(o)
68
68
  v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
69
69
  --name1 "#{n1}" --name2 "#{n2}" \
70
70
  -t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
71
- -p "#{o[:aai_p] || "blast+"}"`.chomp
71
+ -p "#{o[:aai_p] || 'blast+'}"`.chomp
72
72
  (v.nil? || v.empty?) ? 0 : v.to_f
73
73
  end
74
74
 
@@ -91,7 +91,7 @@ module MiGA::DistanceRunner::Commands
91
91
  v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
92
92
  --name1 "#{n1}" --name2 "#{n2}" \
93
93
  -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
94
- --lookup-first -p "#{o[:ani_p] || "blast+"}"`.chomp
94
+ --lookup-first -p "#{o[:ani_p] || 'blast+'}"`.chomp
95
95
  end
96
96
  v.nil? || v.empty? ? 0 : v.to_f
97
97
  end
@@ -73,14 +73,17 @@ module MiGA::DistanceRunner::Pipeline
73
73
  cr = dataset.closest_relatives(1, from_ref_project)
74
74
  return if cr.nil? or cr.empty?
75
75
  tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
76
+
76
77
  # Run the test for each rank
77
- r = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax).map do |k,v|
78
+ tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
79
+ r = tax_test.map do |k,v|
78
80
  sig = ''
79
- [0.5,0.1,0.05,0.01].each{ |i| sig << '*' if v<i }
81
+ [0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
80
82
  [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
81
83
  end
84
+
82
85
  # Save test
83
- File.open(File.expand_path("#{dataset.name}.intax.txt", home), "w") do |fh|
86
+ File.open(File.expand_path("#{dataset.name}.intax.txt", home), 'w') do |fh|
84
87
  fh.puts "Closest relative: #{cr[0][0]} with AAI: #{cr[0][1]}."
85
88
  fh.puts ''
86
89
  fh.puts MiGA::MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
@@ -22,6 +22,7 @@ class MiGA::DistanceRunner
22
22
  raise "No project at #{project_path}"
23
23
  @dataset = project.dataset(dataset_name)
24
24
  @home = File.expand_path('data/09.distances', project.path)
25
+
25
26
  # Default opts
26
27
  if project.metadata[:aai_save_rbm] == false
27
28
  @opts[:aai_save_rbm] ||= 'no-save-rbm'
@@ -48,6 +49,8 @@ class MiGA::DistanceRunner
48
49
  [:haai_p, :aai_p, :ani_p, :distances_checkpoint].each do |m|
49
50
  @opts[m] ||= ref_project.metadata[m]
50
51
  end
52
+ @opts[:aai_p] ||= 'blast+'
53
+ @opts[:ani_p] ||= 'blast+'
51
54
  @opts[:distances_checkpoint] ||= 10
52
55
  @opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
53
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2.0
4
+ version: 0.6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-11 00:00:00.000000000 Z
11
+ date: 2020-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: assertions
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1'
83
97
  description: Microbial Genomes Atlas
84
98
  email: lmrodriguezr@gmail.com
85
99
  executables:
@@ -94,8 +108,10 @@ files:
94
108
  - Rakefile
95
109
  - bin/miga
96
110
  - lib/miga.rb
97
- - lib/miga/_data/aai-intax.tsv.gz
98
- - lib/miga/_data/aai-novel.tsv.gz
111
+ - lib/miga/_data/aai-intax-blast.tsv.gz
112
+ - lib/miga/_data/aai-intax-diamond.tsv.gz
113
+ - lib/miga/_data/aai-novel-blast.tsv.gz
114
+ - lib/miga/_data/aai-novel-diamond.tsv.gz
99
115
  - lib/miga/cli.rb
100
116
  - lib/miga/cli/action.rb
101
117
  - lib/miga/cli/action/about.rb
@@ -199,7 +215,9 @@ files:
199
215
  - test/metadata_test.rb
200
216
  - test/project_test.rb
201
217
  - test/remote_dataset_test.rb
218
+ - test/result_stats_test.rb
202
219
  - test/result_test.rb
220
+ - test/tax_dist_test.rb
203
221
  - test/tax_index_test.rb
204
222
  - test/taxonomy_test.rb
205
223
  - test/test_helper.rb