miga-base 0.6.2.0 → 0.6.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c317237c4cd2f049b56af6021272208c206f1810052dde4b1b5b41394f37139
4
- data.tar.gz: '066558242d9bcc1a96f166349815398296cdb976190853a18ca52bdafe84c263'
3
+ metadata.gz: 8b225951f374bcd267560e5bd8234fb88bcd6b0c11b0561fb4b3b479af39c4b3
4
+ data.tar.gz: 9b32d40ea94ceb526fe0ba732c77fce978b0cba5decffd4e1c0d701594670dbd
5
5
  SHA512:
6
- metadata.gz: 50b395740f68543e7aa79bf0e3631cb86248fffbcd9aac942e2d30681579a42e528d552638701f153605f68ea9da389762b6d068455e9b67cbfd9b66cde879c0
7
- data.tar.gz: d20fd84e71bd4d7a8ebf5ff1060e12096d78477b662098e013b88ea9a19e54b4665a71bbfa0e49fd57df47a61b3a0780ed469fcb004b1384dd4e4f2304320666
6
+ metadata.gz: b7fe9b2cbb09b6612b762c7c9202b4b27ece7a0b6f4dd23eecee9bddc835c130f15a63772011106a1f0f1425e5445fa1541a8ebc81661ef341dcacec3ae22193
7
+ data.tar.gz: 501fa797aa6726ac5cdc6c043a3073d03a7ba3ed81d63ebac9ef989a76aa1947806c9052ab50c089567001ad1b1b92ad04cc05ea4ca33691de36c4dcb2e34b52
@@ -11,20 +11,25 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
11
11
  opt.on(
12
12
  '-n', '--name STRING',
13
13
  'Name of the project'
14
- ){ |v| cli[:name] = v }
14
+ ) { |v| cli[:name] = v }
15
15
  opt.on(
16
16
  '-d', '--description STRING',
17
17
  'Description of the project'
18
- ){ |v| cli[:description] = v }
18
+ ) { |v| cli[:description] = v }
19
19
  opt.on(
20
20
  '-c', '--comments STRING',
21
21
  'Comments on the project'
22
- ){ |v| cli[:comments] = v }
22
+ ) { |v| cli[:comments] = v }
23
+ opt.on(
24
+ '--fast',
25
+ 'Use faster identity engines (Diamond-AAI and FastANI)',
26
+ 'Equivalent to: -m aai_p=diamond,ani_p=fastani'
27
+ ) { |v| cli[:fast] = v }
23
28
  opt.on(
24
29
  '-m', '--metadata STRING',
25
30
  'Metadata as key-value pairs separated by = and delimited by comma',
26
31
  'Values are saved as strings except for booleans (true / false) or nil'
27
- ){ |v| cli[:metadata] = v }
32
+ ) { |v| cli[:metadata] = v }
28
33
  end
29
34
  end
30
35
 
@@ -40,6 +45,10 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
40
45
  raise 'Project already exists, aborting.' if Project.exist? cli[:project]
41
46
  p = Project.new(cli[:project], false)
42
47
  p = cli.add_metadata(p)
48
+ if cli[:fast]
49
+ p.metadata[:aai_p] = 'diamond'
50
+ p.metadata[:ani_p] = 'fastani'
51
+ end
43
52
  p.save
44
53
  end
45
54
  end
@@ -9,10 +9,9 @@ require 'sqlite3'
9
9
  ##
10
10
  # Dataset representation in MiGA.
11
11
  class MiGA::Dataset < MiGA::MiGA
12
-
13
12
  include MiGA::Dataset::Result
14
13
  include MiGA::Dataset::Hooks
15
-
14
+
16
15
  # Class-level
17
16
  class << self
18
17
 
@@ -27,7 +26,7 @@ class MiGA::Dataset < MiGA::MiGA
27
26
  def INFO_FIELDS
28
27
  %w(name created updated type ref user description comments)
29
28
  end
30
-
29
+
31
30
  end
32
31
 
33
32
  # Instance-level
@@ -35,11 +34,11 @@ class MiGA::Dataset < MiGA::MiGA
35
34
  ##
36
35
  # MiGA::Project that contains the dataset.
37
36
  attr_reader :project
38
-
37
+
39
38
  ##
40
39
  # Datasets are uniquely identified by +name+ in a project.
41
40
  attr_reader :name
42
-
41
+
43
42
  ##
44
43
  # Create a MiGA::Dataset object in a +project+ MiGA::Project with a
45
44
  # uniquely identifying +name+. +is_ref+ indicates if the dataset is to
@@ -52,21 +51,25 @@ class MiGA::Dataset < MiGA::MiGA
52
51
  end
53
52
  @project = project
54
53
  @name = name
54
+ @metadata = nil
55
55
  metadata[:ref] = is_ref
56
56
  @metadata_future = [
57
57
  File.expand_path("metadata/#{name}.json", project.path),
58
58
  metadata
59
59
  ]
60
60
  save unless File.exist? @metadata_future[0]
61
- pull_hook :on_load
62
61
  end
63
62
 
64
63
  ##
65
- # MiGA::Metadata with information about the dataset.
64
+ # MiGA::Metadata with information about the dataset
66
65
  def metadata
67
- @metadata ||= MiGA::Metadata.new(*@metadata_future)
66
+ if @metadata.nil?
67
+ @metadata = MiGA::Metadata.new(*@metadata_future)
68
+ pull_hook :on_load
69
+ end
70
+ @metadata
68
71
  end
69
-
72
+
70
73
  ##
71
74
  # Save any changes you've made in the dataset.
72
75
  def save
@@ -74,11 +77,11 @@ class MiGA::Dataset < MiGA::MiGA
74
77
  metadata.save
75
78
  pull_hook :on_save
76
79
  end
77
-
80
+
78
81
  ##
79
82
  # Get the type of dataset as Symbol.
80
83
  def type ; metadata[:type] ; end
81
-
84
+
82
85
  ##
83
86
  # Delete the dataset with all it's contents (including results) and returns
84
87
  # nil.
@@ -103,7 +106,7 @@ class MiGA::Dataset < MiGA::MiGA
103
106
  self.metadata.save
104
107
  pull_hook :on_activate
105
108
  end
106
-
109
+
107
110
  ##
108
111
  # Get standard metadata values for the dataset as Array.
109
112
  def info
@@ -111,7 +114,7 @@ class MiGA::Dataset < MiGA::MiGA
111
114
  (k == 'name') ? self.name : metadata[k.to_sym]
112
115
  end
113
116
  end
114
-
117
+
115
118
  ##
116
119
  # Is this dataset a reference?
117
120
  def is_ref? ; !!metadata[:ref] ; end
@@ -119,14 +122,14 @@ class MiGA::Dataset < MiGA::MiGA
119
122
  ##
120
123
  # Is this dataset a query (non-reference)?
121
124
  def is_query? ; !metadata[:ref] ; end
122
-
125
+
123
126
  ##
124
127
  # Is this dataset known to be multi-organism?
125
128
  def is_multi?
126
129
  return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
127
130
  @@KNOWN_TYPES[type][:multi]
128
131
  end
129
-
132
+
130
133
  ##
131
134
  # Is this dataset known to be single-organism?
132
135
  def is_nonmulti?
@@ -139,7 +142,7 @@ class MiGA::Dataset < MiGA::MiGA
139
142
  def is_active?
140
143
  metadata[:inactive].nil? or !metadata[:inactive]
141
144
  end
142
-
145
+
143
146
  ##
144
147
  # Should I ignore +task+ for this dataset?
145
148
  def ignore_task?(task)
@@ -24,10 +24,10 @@ module MiGA::Dataset::Result
24
24
  end
25
25
 
26
26
  ##
27
- # For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result
28
- def each_result(&blk)
29
- @@RESULT_DIRS.keys.each do |k|
30
- blk.call(k, result(k)) unless result(k).nil?
27
+ # For each result executes the 2-ary block: key symbol and MiGA::Result
28
+ def each_result
29
+ @@RESULT_DIRS.each_key do |k|
30
+ yield(k, result(k)) unless result(k).nil?
31
31
  end
32
32
  end
33
33
 
@@ -50,20 +50,20 @@ module MiGA::Dataset::Result
50
50
  r_pre = MiGA::Result.load("#{base}.json")
51
51
  return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
52
52
  end
53
- r = if File.exist?("#{base}.done")
54
- self.send("add_result_#{result_type}", base, opts)
55
- end
56
- unless r.nil?
57
- r.save
58
- pull_hook(:on_result_ready, result_type)
59
- end
53
+ fun = "add_result_#{result_type}"
54
+ r = send(fun, base, opts) if File.exist?("#{base}.done")
55
+ return if r.nil?
56
+ r.save
57
+ pull_hook(:on_result_ready, result_type)
60
58
  r
61
59
  end
62
60
 
63
61
  ##
64
62
  # Gets a result as MiGA::Result for the datasets with +result_type+. This is
65
63
  # equivalent to +add_result(result_type, false)+.
66
- def get_result(result_type) ; add_result(result_type, false) ; end
64
+ def get_result(result_type)
65
+ add_result(result_type, false)
66
+ end
67
67
 
68
68
  ##
69
69
  # Returns the key symbol of the first registered result (sorted by the
@@ -179,7 +179,7 @@ module MiGA::Dataset::Result
179
179
  r = get_result(:distances)
180
180
  ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
181
181
  return if r.nil?
182
- [:haai_db, :aai_db, :ani_db].each do |db_type|
182
+ %i[haai_db aai_db ani_db].each do |db_type|
183
183
  db = r.file_path(db_type)
184
184
  next if db.nil? || !File.size?(db)
185
185
  sqlite_db = SQLite3::Database.new db
@@ -198,9 +198,8 @@ module MiGA::Dataset::Result
198
198
  # Add result type +:raw_reads+ at +base+ (no +_opts+ supported)
199
199
  def add_result_raw_reads(base, _opts)
200
200
  return nil unless result_files_exist?(base, '.1.fastq')
201
- r = MiGA::Result.new("#{base}.json")
202
201
  add_files_to_ds_result(
203
- r, name,
202
+ MiGA::Result.new("#{base}.json"), name,
204
203
  if result_files_exist?(base, '.2.fastq')
205
204
  { pair1: '.1.fastq', pair2: '.2.fastq' }
206
205
  else
@@ -213,25 +212,30 @@ module MiGA::Dataset::Result
213
212
  # Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported)
214
213
  def add_result_trimmed_reads(base, _opts)
215
214
  return nil unless result_files_exist?(base, '.1.clipped.fastq')
216
- r = MiGA::Result.new("#{base}.json")
217
- if result_files_exist?(base, '.2.clipped.fastq')
218
- r = add_files_to_ds_result(r, name,
219
- pair1: '.1.clipped.fastq',
220
- pair2: '.2.clipped.fastq',
221
- single: '.1.clipped.single.fastq')
222
- else
223
- r = add_files_to_ds_result(r, name, single: '.1.clipped.fastq')
215
+ add_files_to_ds_result(
216
+ MiGA::Result.new("#{base}.json"), name,
217
+ if result_files_exist?(base, '.2.clipped.fastq')
218
+ {
219
+ pair1: '.1.clipped.fastq',
220
+ pair2: '.2.clipped.fastq',
221
+ single: '.1.clipped.single.fastq'
222
+ }
223
+ else
224
+ { single: '.1.clipped.fastq' }
225
+ end
226
+ ).tap do |r|
227
+ r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
224
228
  end
225
- r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
226
- r
227
229
  end
228
230
 
229
231
  ##
230
232
  # Add result type +:read_quality+ at +base+ (no +_opts+ supported)
231
233
  def add_result_read_quality(base, _opts)
232
234
  return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
233
- r = MiGA::Result.new("#{base}.json")
234
- add_files_to_ds_result(r, name, solexaqa: '.solexaqa', fastqc: '.fastqc')
235
+ add_files_to_ds_result(
236
+ MiGA::Result.new("#{base}.json"), name,
237
+ solexaqa: '.solexaqa', fastqc: '.fastqc'
238
+ )
235
239
  end
236
240
 
237
241
  ##
@@ -241,13 +245,13 @@ module MiGA::Dataset::Result
241
245
  result_files_exist?(base, '.CoupledReads.fa') ||
242
246
  result_files_exist?(base, '.SingleReads.fa') ||
243
247
  result_files_exist?(base, %w[.1.fasta .2.fasta])
244
- r = MiGA::Result.new("#{base}.json")
245
248
  add_files_to_ds_result(
246
- r, name,
249
+ MiGA::Result.new("#{base}.json"), name,
247
250
  coupled: '.CoupledReads.fa',
248
251
  single: '.SingleReads.fa',
249
252
  pair1: '.1.fasta',
250
- pair2: '.2.fasta')
253
+ pair2: '.2.fasta'
254
+ )
251
255
  end
252
256
 
253
257
  ##
@@ -255,12 +259,12 @@ module MiGA::Dataset::Result
255
259
  # +is_clean: Boolean+.
256
260
  def add_result_assembly(base, opts)
257
261
  return nil unless result_files_exist?(base, '.LargeContigs.fna')
258
- r = MiGA::Result.new("#{base}.json")
259
262
  r = add_files_to_ds_result(
260
- r, name,
263
+ MiGA::Result.new("#{base}.json"), name,
261
264
  largecontigs: '.LargeContigs.fna',
262
265
  allcontigs: '.AllContigs.fna',
263
- assembly_data: '')
266
+ assembly_data: ''
267
+ )
264
268
  opts[:is_clean] ||= false
265
269
  r.clean! if opts[:is_clean]
266
270
  unless r.clean?
@@ -274,14 +278,14 @@ module MiGA::Dataset::Result
274
278
  # Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
275
279
  def add_result_cds(base, opts)
276
280
  return nil unless result_files_exist?(base, %w[.faa])
277
- r = MiGA::Result.new("#{base}.json")
278
281
  r = add_files_to_ds_result(
279
- r, name,
282
+ MiGA::Result.new("#{base}.json"), name,
280
283
  proteins: '.faa',
281
284
  genes: '.fna',
282
285
  gff2: '.gff2',
283
286
  gff3: '.gff3',
284
- tab: '.tab')
287
+ tab: '.tab'
288
+ )
285
289
  opts[:is_clean] ||= false
286
290
  r.clean! if opts[:is_clean]
287
291
  unless r.clean?
@@ -296,13 +300,13 @@ module MiGA::Dataset::Result
296
300
  # Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
297
301
  def add_result_essential_genes(base, _opts)
298
302
  return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
299
- r = MiGA::Result.new("#{base}.json")
300
303
  add_files_to_ds_result(
301
- r, name,
304
+ MiGA::Result.new("#{base}.json"), name,
302
305
  ess_genes: '.ess.faa',
303
306
  collection: '.ess',
304
307
  report: '.ess/log',
305
- alignments: '.ess/proteins.aln')
308
+ alignments: '.ess/proteins.aln'
309
+ )
306
310
  end
307
311
 
308
312
  ##
@@ -310,12 +314,12 @@ module MiGA::Dataset::Result
310
314
  def add_result_ssu(base, opts)
311
315
  return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
312
316
  return nil unless result_files_exist?(base, '.ssu.fa')
313
- r = MiGA::Result.new("#{base}.json")
314
317
  r = add_files_to_ds_result(
315
- r, name,
318
+ MiGA::Result.new("#{base}.json"), name,
316
319
  longest_ssu_gene: '.ssu.fa',
317
320
  gff: '.ssu.gff',
318
- all_ssu_genes: '.ssu.all.fa')
321
+ all_ssu_genes: '.ssu.all.fa'
322
+ )
319
323
  opts[:is_clean] ||= false
320
324
  r.clean! if opts[:is_clean]
321
325
  unless r.clean?
@@ -332,9 +336,8 @@ module MiGA::Dataset::Result
332
336
  return nil unless
333
337
  result_files_exist?(base, '.mytaxa') ||
334
338
  result_files_exist?(base, '.nomytaxa.txt')
335
- r = MiGA::Result.new("#{base}.json")
336
339
  add_files_to_ds_result(
337
- r, name,
340
+ MiGA::Result.new("#{base}.json"), name,
338
341
  mytaxa: '.mytaxa',
339
342
  blast: '.blast',
340
343
  mytaxain: '.mytaxain',
@@ -344,7 +347,8 @@ module MiGA::Dataset::Result
344
347
  phylum: '.mytaxa.Phylum.txt',
345
348
  innominate: '.mytaxa.innominate',
346
349
  kronain: '.mytaxa.krona',
347
- krona: '.html')
350
+ krona: '.html'
351
+ )
348
352
  else
349
353
  MiGA::Result.new("#{base}.json")
350
354
  end
@@ -357,9 +361,8 @@ module MiGA::Dataset::Result
357
361
  return nil unless
358
362
  result_files_exist?(base, %w[.pdf .mytaxa]) ||
359
363
  result_files_exist?(base, '.nomytaxa.txt')
360
- r = MiGA::Result.new("#{base}.json")
361
364
  add_files_to_ds_result(
362
- r, name,
365
+ MiGA::Result.new("#{base}.json"), name,
363
366
  nomytaxa: '.nomytaxa.txt',
364
367
  mytaxa: '.mytaxa',
365
368
  report: '.pdf',
@@ -370,7 +373,8 @@ module MiGA::Dataset::Result
370
373
  wintax: '.wintax',
371
374
  gene_ids: '.wintax.genes',
372
375
  region_ids: '.wintax.regions',
373
- regions: '.reg')
376
+ regions: '.reg'
377
+ )
374
378
  else
375
379
  MiGA::Result.new("#{base}.json")
376
380
  end
@@ -428,9 +432,8 @@ module MiGA::Dataset::Result
428
432
  return nil unless
429
433
  result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) ||
430
434
  result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
431
- r = MiGA::Result.new("#{base}.json")
432
435
  add_files_to_ds_result(
433
- r, name,
436
+ MiGA::Result.new("#{base}.json"), name,
434
437
  aai_medoids: '.aai-medoids.tsv',
435
438
  haai_db: '.haai.db',
436
439
  aai_db: '.aai.db',
@@ -438,7 +441,8 @@ module MiGA::Dataset::Result
438
441
  ani_db: '.ani.db',
439
442
  ref_tree: '.nwk',
440
443
  ref_tree_pdf: '.nwk.pdf',
441
- intax_test: '.intax.txt')
444
+ intax_test: '.intax.txt'
445
+ )
442
446
  end
443
447
 
444
448
  ##
@@ -27,7 +27,7 @@ class MiGA::Metadata < MiGA::MiGA
27
27
 
28
28
  ##
29
29
  # Initiate a MiGA::Metadata object with description in +path+.
30
- # It will create it if it doesn't exist
30
+ # It will create it if it doesn't exist.
31
31
  def initialize(path, defaults = {})
32
32
  @data = nil
33
33
  @path = File.absolute_path(path)
@@ -39,21 +39,21 @@ class MiGA::Metadata < MiGA::MiGA
39
39
  end
40
40
 
41
41
  ##
42
- # Parsed data as a Hash.
42
+ # Parsed data as a Hash
43
43
  def data
44
44
  self.load if @data.nil?
45
45
  @data
46
46
  end
47
47
 
48
48
  ##
49
- # Reset :created field and save the current data.
49
+ # Reset :created field and save the current data
50
50
  def create
51
51
  self[:created] = Time.now.to_s
52
52
  save
53
53
  end
54
54
 
55
55
  ##
56
- # Save the metadata into #path.
56
+ # Save the metadata into #path
57
57
  def save
58
58
  MiGA.DEBUG "Metadata.save #{path}"
59
59
  self[:updated] = Time.now.to_s
@@ -78,7 +78,7 @@ class MiGA::Metadata < MiGA::MiGA
78
78
  end
79
79
 
80
80
  ##
81
- # (Re-)load metadata stored in #path.
81
+ # (Re-)load metadata stored in #path
82
82
  def load
83
83
  sleeper = 0.0
84
84
  while File.exist? lock_file
@@ -87,11 +87,11 @@ class MiGA::Metadata < MiGA::MiGA
87
87
  end
88
88
  tmp = MiGA::Json.parse(path, additions: true)
89
89
  @data = {}
90
- tmp.each_pair{ |k,v| self[k] = v }
90
+ tmp.each { |k, v| self[k] = v }
91
91
  end
92
92
 
93
93
  ##
94
- # Delete file at #path.
94
+ # Delete file at #path
95
95
  def remove!
96
96
  MiGA.DEBUG "Metadata.remove! #{path}"
97
97
  File.unlink(path)
@@ -99,29 +99,41 @@ class MiGA::Metadata < MiGA::MiGA
99
99
  end
100
100
 
101
101
  ##
102
- # Lock file for the metadata.
103
- def lock_file ; "#{path}.lock" ; end
102
+ # Lock file for the metadata
103
+ def lock_file
104
+ "#{path}.lock"
105
+ end
104
106
 
105
107
  ##
106
- # Return the value of +k+ in #data.
107
- def [](k) data[k.to_sym] end
108
+ # Return the value of +k+ in #data
109
+ def [](k)
110
+ data[k.to_sym]
111
+ end
108
112
 
109
113
  ##
110
- # Set the value of +k+ to +v+.
111
- def []=(k,v)
114
+ # Set the value of +k+ to +v+
115
+ def []=(k, v)
112
116
  self.load if @data.nil?
113
117
  k = k.to_sym
114
- # Protect the special field :name
115
- v=v.miga_name if k==:name
116
- # Symbolize the special field :type
117
- v=v.to_sym if k==:type
118
- # Delete if nil, register, and return
119
- v.nil? ? @data.delete(k) : (@data[k]=v)
118
+ return @data.delete(k) if v.nil?
119
+
120
+ case k
121
+ when :name
122
+ # Protect the special field :name
123
+ v = v.miga_name
124
+ when :type
125
+ # Symbolize the special field :type
126
+ v = v.to_sym if k == :type
127
+ end
128
+
129
+ @data[k] = v
120
130
  end
121
131
 
122
132
  ##
123
- # Iterate +blk+ for each data with 2 arguments key and value.
124
- def each(&blk) data.each{ |k,v| blk.call(k,v) } ; end
133
+ # Iterate +blk+ for each data with 2 arguments: key and value
134
+ def each(&blk)
135
+ data.each { |k, v| blk.call(k, v) }
136
+ end
125
137
 
126
138
  ##
127
139
  # Show contents in JSON format as a String
@@ -30,16 +30,16 @@ module MiGA::Result::Dates
30
30
 
31
31
  private
32
32
 
33
- ##
34
- # Internal function to detect start and end dates
35
- def date_at(event)
36
- date = self[event]
37
- date ||= self[:started] if event == :start
38
- if date.nil?
39
- f = path event
40
- date = File.read(f) if File.size? f
41
- end
42
- date.nil? ? nil : Time.parse(date)
33
+ ##
34
+ # Internal function to detect start and end dates
35
+ def date_at(event)
36
+ date = self[event]
37
+ date ||= self[:started] if event == :start
38
+ if date.nil?
39
+ f = path event
40
+ date = File.read(f) if File.size? f
43
41
  end
42
+ Time.parse(date) unless date.nil?
43
+ end
44
44
  end
45
45
 
@@ -1,66 +1,82 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/common"
5
- require "miga/taxonomy"
6
- require "zlib"
4
+ require 'miga/common'
5
+ require 'miga/taxonomy'
6
+ require 'zlib'
7
7
 
8
8
  ##
9
9
  # Methods for taxonomy identification based on AAI/ANI values.
10
10
  module MiGA::TaxDist
11
-
12
- ##
13
- # Absolute path to the :intax or :novel data file (determined by +test+) for
14
- # AAI.
15
- def self.aai_path(test)
16
- test = test.downcase.to_sym
17
- return nil unless [:intax, :novel].include? test
18
- File.expand_path("../_data/aai-#{test}.tsv.gz", __FILE__)
19
- end
11
+ # Class-level
12
+ class << self
13
+
14
+ ##
15
+ # Absolute path to the :intax or :novel data file (determined by +test+) for
16
+ # AAI, determined for options +opts+. Supported options:
17
+ # - +:engine+: The search engine for AAI: +:blast+ (default) or +:diamond+
18
+ def aai_path(test, opts = {})
19
+ opts[:engine] ||= :blast
20
+ engine = opts[:engine].to_s.downcase.to_sym
21
+ test = test.to_s.downcase.to_sym
22
+ return nil unless %i[intax novel].include? test
23
+ engine = :blast if %i[blast+ blat].include? engine
24
+ return nil unless %i[blast diamond].include? engine
25
+ File.expand_path("../_data/aai-#{test}-#{engine}.tsv.gz", __FILE__)
26
+ end
20
27
 
21
- # Returns a Hash, where the keys correspond to the taxonomic level
22
- # (see MiGA::Taxonomy.LONG_RANKS for the meanings), and the values correspond
23
- # to the p-values of being :intax or :novel, as determined by +test+.
24
- def self.aai_pvalues(aai, test)
25
- Zlib::GzipReader.open(aai_path(test)) do |fh|
26
- keys = nil
27
- fh.each_line do |ln|
28
- row = ln.chomp.split(/\t/)
29
- if fh.lineno==1
30
- keys = row[1, row.size-1].map{ |i| i.to_i }
31
- elsif row.shift.to_f >= aai
32
- vals = {}
33
- keys.each do |i|
34
- v = row.shift
35
- next if v=="NA"
36
- rank = i==0 ? :root : MiGA::Taxonomy.KNOWN_RANKS[i-1]
37
- vals[rank] = v.to_f
28
+ ##
29
+ # Returns a Hash, where the keys correspond to the taxonomic level
30
+ # (see MiGA::Taxonomy.LONG_RANKS for the meanings), and the values
31
+ # correspond to the p-values of +test+ (one of +:intax+ or +:novel+)
32
+ # with options +opts+. See +aai_path+ for supported options.
33
+ def aai_pvalues(aai, test, opts = {})
34
+ Zlib::GzipReader.open(aai_path(test, opts)) do |fh|
35
+ keys = nil
36
+ fh.each_line do |ln|
37
+ row = ln.chomp.split(/\t/)
38
+ if fh.lineno == 1
39
+ keys = row[1, row.size - 1].map(&:to_i)
40
+ elsif row.shift.to_f >= aai
41
+ vals = {}
42
+ keys.each do |i|
43
+ v = row.shift
44
+ next if v == 'NA' # <- missing data
45
+ next if i == 1 # <- namespace, not a taxonomic rank
46
+ rank = i.zero? ? :root : MiGA::Taxonomy.KNOWN_RANKS[i]
47
+ vals[rank] = v.to_f
48
+ end
49
+ return vals
38
50
  end
39
- return vals
40
51
  end
41
- end # each_line ln
42
- end # open fh
43
- {}
44
- end
52
+ end
53
+ {}
54
+ end
45
55
 
46
- # Determines the degree to which a Float +aai+ value indicates similar
47
- # taxonomy (with +test+ :intax) or a novel taxon (with +test+ :novel). Returns
48
- # a Hash with "likelihood" phrases as keys and values as an array with
49
- # cannonical rank (as in MiGA::Taxonomy) and estimated p-value.
50
- def self.aai_taxtest(aai, test)
51
- meaning = {most_likely:[0,0.01],probably:[0.01,0.1],possibly_even:[0.1,0.5]}
52
- pv = aai_pvalues(aai, test)
53
- out = {}
54
- meaning.each do |phrase, thresholds|
55
- lwr, upr = thresholds
56
- min = pv.values.select{ |v| v < upr }.max
57
- return out if min.nil?
58
- if min >= lwr
59
- v = pv.select{ |_,vj| vj==min }
60
- out[phrase] = (test==:intax ? v.reverse_each : v).first
56
+ ##
57
+ # Determines the degree to which a Float +aai+ value indicates similar
58
+ # taxonomy (with +test+ :intax) or a novel taxon (with +test+ :novel) with
59
+ # options +opts+. See +aai_path+ for supported options.
60
+ # Returns a Hash with "likelihood" phrases as keys and values as an array
61
+ # with cannonical rank (as in MiGA::Taxonomy) and estimated p-value.
62
+ def aai_taxtest(aai, test, opts = {})
63
+ meaning = {
64
+ most_likely: [0.00, 0.01],
65
+ probably: [0.01, 0.10],
66
+ possibly_even: [0.10, 0.50]
67
+ }
68
+ pvalues = aai_pvalues(aai, test, opts)
69
+ out = {}
70
+ meaning.each do |phrase, thresholds|
71
+ lwr, upr = thresholds
72
+ min = pvalues.values.select { |v| v < upr }.max
73
+ return out if min.nil?
74
+ if min >= lwr
75
+ v = pvalues.select { |_, vj| vj == min }
76
+ out[phrase] = (test == :intax ? v.reverse_each : v).first
77
+ end
61
78
  end
79
+ out
62
80
  end
63
- out
64
81
  end
65
-
66
82
  end
@@ -35,7 +35,6 @@ class MiGA::Taxonomy < MiGA::MiGA
35
35
  else
36
36
  initialize_by_ranks(str, ranks)
37
37
  end
38
- initialize_by_str(str)
39
38
  end
40
39
 
41
40
  ##
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.6, 2, 0]
13
+ VERSION = [0.6, 3, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2020, 3, 11)
21
+ VERSION_DATE = Date.new(2020, 3, 27)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -0,0 +1,119 @@
1
+ require 'test_helper'
2
+ require 'miga/project'
3
+
4
+ class ResultStatsTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ $tmp = Dir.mktmpdir
8
+ ENV['MIGA_HOME'] = $tmp
9
+ FileUtils.touch(File.expand_path('.miga_rc', ENV['MIGA_HOME']))
10
+ FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
11
+ $p = MiGA::Project.new(File.expand_path('project1', $tmp))
12
+ $d = $p.add_dataset('dataset1')
13
+ end
14
+
15
+ def teardown
16
+ FileUtils.rm_rf $tmp
17
+ ENV['MIGA_HOME'] = nil
18
+ end
19
+
20
+ def file_path(dir, ext)
21
+ File.join($p.path, dir, "#{$d.name}#{ext}")
22
+ end
23
+
24
+ def touch_done(dir)
25
+ FileUtils.touch(file_path(dir, '.done'))
26
+ end
27
+
28
+ def test_single_raw_reads
29
+ dir = 'data/01.raw_reads'
30
+ fq = file_path(dir, '.1.fastq')
31
+ File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
32
+ touch_done(dir)
33
+ r = $d.add_result(:raw_reads)
34
+ assert_equal({}, r[:stats])
35
+ r.compute_stats
36
+ assert(!r[:stats].empty?)
37
+ assert_equal(Hash, r[:stats].class)
38
+ assert_equal(1, r[:stats][:reads])
39
+ assert_equal([40.0, '%'], r[:stats][:g_c_content])
40
+ end
41
+
42
+ def test_coupled_raw_reads
43
+ dir = 'data/01.raw_reads'
44
+ fq = file_path(dir, '.1.fastq')
45
+ File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
46
+ fq = file_path(dir, '.2.fastq')
47
+ File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
48
+ touch_done(dir)
49
+ r = $d.add_result(:raw_reads)
50
+ r.compute_stats
51
+ assert(!r[:stats].empty?)
52
+ assert_nil(r[:stats][:reads])
53
+ assert_equal(1, r[:stats][:read_pairs])
54
+ assert_equal([40.0, '%'], r[:stats][:reverse_g_c_content])
55
+ end
56
+
57
+ def test_trimmed_reads
58
+ dir = 'data/02.trimmed_reads'
59
+ FileUtils.touch(file_path(dir, '.1.clipped.fastq'))
60
+ touch_done(dir)
61
+ r = $d.add_result(:trimmed_reads)
62
+ assert_equal({}, r[:stats])
63
+ r.compute_stats
64
+ assert_equal({}, r[:stats])
65
+ end
66
+
67
+ def test_read_quality
68
+ dir = 'data/03.read_quality'
69
+ Dir.mkdir(file_path(dir, '.solexaqa'))
70
+ Dir.mkdir(file_path(dir, '.fastqc'))
71
+ touch_done(dir)
72
+ r = $d.add_result(:read_quality)
73
+ assert_equal({}, r[:stats])
74
+ r.compute_stats
75
+ assert_equal({}, r[:stats])
76
+ end
77
+
78
+ def test_trimmed_fasta
79
+ dir = 'data/04.trimmed_fasta'
80
+ fa = file_path(dir, '.CoupledReads.fa')
81
+ File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
82
+ touch_done(dir)
83
+ r = $d.add_result(:trimmed_fasta)
84
+ assert_equal({}, r[:stats])
85
+ r.compute_stats
86
+ assert_equal(1, r[:stats][:reads])
87
+ assert_equal([40.0, '%'], r[:stats][:g_c_content])
88
+ end
89
+
90
+ def test_assembly
91
+ dir = 'data/05.assembly'
92
+ fa = file_path(dir, '.LargeContigs.fna')
93
+ File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
94
+ touch_done(dir)
95
+ r = $d.add_result(:assembly)
96
+ assert_equal({}, r[:stats])
97
+ r.compute_stats
98
+ assert_equal(1, r[:stats][:contigs])
99
+ assert_equal([5, 'bp'], r[:stats][:total_length])
100
+ assert_equal([40.0, '%'], r[:stats][:g_c_content])
101
+ end
102
+
103
+ def test_cds
104
+ dir = 'data/06.cds'
105
+ fa = file_path(dir, '.faa')
106
+ File.open(fa, 'w') { |fh| fh.puts '>1','M' }
107
+ touch_done(dir)
108
+ r = $d.add_result(:cds)
109
+ assert_equal({}, r[:stats])
110
+ r.compute_stats
111
+ assert_equal(1, r[:stats][:predicted_proteins])
112
+ assert_equal([1.0, 'aa'], r[:stats][:average_length])
113
+ assert_nil(r[:stats][:coding_density])
114
+ test_assembly
115
+ r.compute_stats
116
+ assert_equal([60.0, '%'], r[:stats][:coding_density])
117
+ end
118
+
119
+ end
@@ -10,14 +10,14 @@ class ResultTest < Test::Unit::TestCase
10
10
  FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
11
11
  $p1 = MiGA::Project.new(File.expand_path('project1', $tmp))
12
12
  $d1 = $p1.add_dataset('dataset1')
13
- FileUtils.touch(File.expand_path(
14
- "data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq", $p1.path))
15
- FileUtils.touch(File.expand_path(
16
- "data/02.trimmed_reads/#{$d1.name}.done", $p1.path))
17
- FileUtils.touch(File.expand_path(
18
- 'data/10.clades/01.find/miga-project.empty', $p1.path))
19
- FileUtils.touch(File.expand_path(
20
- 'data/10.clades/01.find/miga-project.done', $p1.path))
13
+ FileUtils.touch(
14
+ File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq"))
15
+ FileUtils.touch(
16
+ File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done"))
17
+ FileUtils.touch(
18
+ File.join($p1.path, 'data/10.clades/01.find/miga-project.empty'))
19
+ FileUtils.touch(
20
+ File.join($p1.path, 'data/10.clades/01.find/miga-project.done'))
21
21
  end
22
22
 
23
23
  def teardown
@@ -46,4 +46,18 @@ class ResultTest < Test::Unit::TestCase
46
46
  assert_equal($p1.path, r.source.path)
47
47
  end
48
48
 
49
+ def test_dates
50
+ r = $d1.add_result(:trimmed_reads)
51
+ assert_nil(r.done_at)
52
+ assert_nil(r.started_at)
53
+ tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done")
54
+ File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,5) }
55
+ assert_equal(Time, r.done_at.class)
56
+ assert_nil(r.running_time)
57
+ tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.start")
58
+ File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,0) }
59
+ r = $d1.add_result(:trimmed_reads)
60
+ assert_equal(5.0, r.running_time)
61
+ end
62
+
49
63
  end
@@ -0,0 +1,59 @@
1
+ require 'test_helper'
2
+ require 'miga/tax_dist'
3
+
4
+ class TaxDistTest < Test::Unit::TestCase
5
+
6
+ def test_aai_path
7
+ assert(File.size? MiGA::TaxDist.aai_path(:intax))
8
+ assert(File.size? MiGA::TaxDist.aai_path(:novel))
9
+ assert(File.size? MiGA::TaxDist.aai_path(:intax, engine: :diamond))
10
+ assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :blast))
11
+ assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :'blast+'))
12
+ end
13
+
14
+ def test_aai_pvalues
15
+ distant_intax = MiGA::TaxDist.aai_pvalues(35.0, :intax)
16
+ assert_lt(distant_intax[:root], 0.05)
17
+ assert_gt(distant_intax[:g], 0.05)
18
+ assert_nil(distant_intax[:ns])
19
+
20
+ close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :blast)
21
+ assert_lt(close_intax[:root], 0.05)
22
+ assert_lt(close_intax[:s], 0.05)
23
+
24
+ close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :diamond)
25
+ assert_lt(close_intax[:root], 0.05)
26
+ assert_lt(close_intax[:s], 0.05)
27
+
28
+ distant_novel = MiGA::TaxDist.aai_pvalues(35.0, :novel, engine: :diamond)
29
+ $stderr.puts distant_novel
30
+ assert_gt(distant_novel[:root], 0.05)
31
+ assert_lt(distant_novel[:g], 0.05)
32
+ assert_nil(distant_novel[:ns])
33
+
34
+ close_novel = MiGA::TaxDist.aai_pvalues(99.0, :novel)
35
+ assert_gt(close_novel[:root], 0.05)
36
+ assert_gt(close_novel[:f], 0.05)
37
+
38
+ assert_equal({}, MiGA::TaxDist.aai_pvalues(101.0, :intax))
39
+ end
40
+
41
+ def test_aai_taxtest
42
+ distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :diamond)
43
+ assert_equal(:root, distant_intax[:most_likely][0])
44
+ assert_nil(distant_intax[:probably])
45
+ assert_nil(distant_intax[:possibly_even])
46
+
47
+ distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :blast)
48
+ assert_equal(:root, distant_intax[:most_likely][0])
49
+ assert_nil(distant_intax[:probably])
50
+ assert_nil(distant_intax[:possibly_even])
51
+
52
+ close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :diamond)
53
+ assert_equal(:s, close_intax[:probably][0])
54
+
55
+ close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :blast)
56
+ assert_equal(:s, close_intax[:probably][0])
57
+ end
58
+
59
+ end
@@ -3,6 +3,7 @@ SimpleCov.start
3
3
 
4
4
  require 'rubygems'
5
5
  require 'test/unit'
6
+ require 'assertions'
6
7
  require 'miga/common'
7
8
  require 'stringio'
8
9
 
@@ -63,12 +63,12 @@ module MiGA::DistanceRunner::Commands
63
63
 
64
64
  ##
65
65
  # Execute an AAI command
66
- def aai_cmd(f1, f2, n1, n2, db, o={})
66
+ def aai_cmd(f1, f2, n1, n2, db, o = {})
67
67
  o = opts.merge(o)
68
68
  v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
69
69
  --name1 "#{n1}" --name2 "#{n2}" \
70
70
  -t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
71
- -p "#{o[:aai_p] || "blast+"}"`.chomp
71
+ -p "#{o[:aai_p] || 'blast+'}"`.chomp
72
72
  (v.nil? || v.empty?) ? 0 : v.to_f
73
73
  end
74
74
 
@@ -91,7 +91,7 @@ module MiGA::DistanceRunner::Commands
91
91
  v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
92
92
  --name1 "#{n1}" --name2 "#{n2}" \
93
93
  -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
94
- --lookup-first -p "#{o[:ani_p] || "blast+"}"`.chomp
94
+ --lookup-first -p "#{o[:ani_p] || 'blast+'}"`.chomp
95
95
  end
96
96
  v.nil? || v.empty? ? 0 : v.to_f
97
97
  end
@@ -73,14 +73,17 @@ module MiGA::DistanceRunner::Pipeline
73
73
  cr = dataset.closest_relatives(1, from_ref_project)
74
74
  return if cr.nil? or cr.empty?
75
75
  tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
76
+
76
77
  # Run the test for each rank
77
- r = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax).map do |k,v|
78
+ tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
79
+ r = tax_test.map do |k,v|
78
80
  sig = ''
79
- [0.5,0.1,0.05,0.01].each{ |i| sig << '*' if v<i }
81
+ [0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
80
82
  [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
81
83
  end
84
+
82
85
  # Save test
83
- File.open(File.expand_path("#{dataset.name}.intax.txt", home), "w") do |fh|
86
+ File.open(File.expand_path("#{dataset.name}.intax.txt", home), 'w') do |fh|
84
87
  fh.puts "Closest relative: #{cr[0][0]} with AAI: #{cr[0][1]}."
85
88
  fh.puts ''
86
89
  fh.puts MiGA::MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
@@ -22,6 +22,7 @@ class MiGA::DistanceRunner
22
22
  raise "No project at #{project_path}"
23
23
  @dataset = project.dataset(dataset_name)
24
24
  @home = File.expand_path('data/09.distances', project.path)
25
+
25
26
  # Default opts
26
27
  if project.metadata[:aai_save_rbm] == false
27
28
  @opts[:aai_save_rbm] ||= 'no-save-rbm'
@@ -48,6 +49,8 @@ class MiGA::DistanceRunner
48
49
  [:haai_p, :aai_p, :ani_p, :distances_checkpoint].each do |m|
49
50
  @opts[m] ||= ref_project.metadata[m]
50
51
  end
52
+ @opts[:aai_p] ||= 'blast+'
53
+ @opts[:ani_p] ||= 'blast+'
51
54
  @opts[:distances_checkpoint] ||= 10
52
55
  @opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
53
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2.0
4
+ version: 0.6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-11 00:00:00.000000000 Z
11
+ date: 2020-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: assertions
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1'
83
97
  description: Microbial Genomes Atlas
84
98
  email: lmrodriguezr@gmail.com
85
99
  executables:
@@ -94,8 +108,10 @@ files:
94
108
  - Rakefile
95
109
  - bin/miga
96
110
  - lib/miga.rb
97
- - lib/miga/_data/aai-intax.tsv.gz
98
- - lib/miga/_data/aai-novel.tsv.gz
111
+ - lib/miga/_data/aai-intax-blast.tsv.gz
112
+ - lib/miga/_data/aai-intax-diamond.tsv.gz
113
+ - lib/miga/_data/aai-novel-blast.tsv.gz
114
+ - lib/miga/_data/aai-novel-diamond.tsv.gz
99
115
  - lib/miga/cli.rb
100
116
  - lib/miga/cli/action.rb
101
117
  - lib/miga/cli/action/about.rb
@@ -199,7 +215,9 @@ files:
199
215
  - test/metadata_test.rb
200
216
  - test/project_test.rb
201
217
  - test/remote_dataset_test.rb
218
+ - test/result_stats_test.rb
202
219
  - test/result_test.rb
220
+ - test/tax_dist_test.rb
203
221
  - test/tax_index_test.rb
204
222
  - test/taxonomy_test.rb
205
223
  - test/test_helper.rb