miga-base 0.6.2.0 → 0.6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax-blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax-diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel-blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel-diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/new.rb +13 -4
- data/lib/miga/dataset.rb +19 -16
- data/lib/miga/dataset/result.rb +55 -51
- data/lib/miga/metadata.rb +33 -21
- data/lib/miga/result/dates.rb +10 -10
- data/lib/miga/tax_dist.rb +67 -51
- data/lib/miga/taxonomy.rb +0 -1
- data/lib/miga/version.rb +2 -2
- data/test/result_stats_test.rb +119 -0
- data/test/result_test.rb +22 -8
- data/test/tax_dist_test.rb +59 -0
- data/test/test_helper.rb +1 -0
- data/utils/distance/commands.rb +3 -3
- data/utils/distance/pipeline.rb +6 -3
- data/utils/distance/runner.rb +3 -0
- metadata +22 -4
- data/lib/miga/_data/aai-intax.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.tsv.gz +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b225951f374bcd267560e5bd8234fb88bcd6b0c11b0561fb4b3b479af39c4b3
|
4
|
+
data.tar.gz: 9b32d40ea94ceb526fe0ba732c77fce978b0cba5decffd4e1c0d701594670dbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b7fe9b2cbb09b6612b762c7c9202b4b27ece7a0b6f4dd23eecee9bddc835c130f15a63772011106a1f0f1425e5445fa1541a8ebc81661ef341dcacec3ae22193
|
7
|
+
data.tar.gz: 501fa797aa6726ac5cdc6c043a3073d03a7ba3ed81d63ebac9ef989a76aa1947806c9052ab50c089567001ad1b1b92ad04cc05ea4ca33691de36c4dcb2e34b52
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/miga/cli/action/new.rb
CHANGED
@@ -11,20 +11,25 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
|
|
11
11
|
opt.on(
|
12
12
|
'-n', '--name STRING',
|
13
13
|
'Name of the project'
|
14
|
-
|
14
|
+
) { |v| cli[:name] = v }
|
15
15
|
opt.on(
|
16
16
|
'-d', '--description STRING',
|
17
17
|
'Description of the project'
|
18
|
-
|
18
|
+
) { |v| cli[:description] = v }
|
19
19
|
opt.on(
|
20
20
|
'-c', '--comments STRING',
|
21
21
|
'Comments on the project'
|
22
|
-
|
22
|
+
) { |v| cli[:comments] = v }
|
23
|
+
opt.on(
|
24
|
+
'--fast',
|
25
|
+
'Use faster identity engines (Diamond-AAI and FastANI)',
|
26
|
+
'Equivalent to: -m aai_p=diamond,ani_p=fastani'
|
27
|
+
) { |v| cli[:fast] = v }
|
23
28
|
opt.on(
|
24
29
|
'-m', '--metadata STRING',
|
25
30
|
'Metadata as key-value pairs separated by = and delimited by comma',
|
26
31
|
'Values are saved as strings except for booleans (true / false) or nil'
|
27
|
-
|
32
|
+
) { |v| cli[:metadata] = v }
|
28
33
|
end
|
29
34
|
end
|
30
35
|
|
@@ -40,6 +45,10 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
|
|
40
45
|
raise 'Project already exists, aborting.' if Project.exist? cli[:project]
|
41
46
|
p = Project.new(cli[:project], false)
|
42
47
|
p = cli.add_metadata(p)
|
48
|
+
if cli[:fast]
|
49
|
+
p.metadata[:aai_p] = 'diamond'
|
50
|
+
p.metadata[:ani_p] = 'fastani'
|
51
|
+
end
|
43
52
|
p.save
|
44
53
|
end
|
45
54
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -9,10 +9,9 @@ require 'sqlite3'
|
|
9
9
|
##
|
10
10
|
# Dataset representation in MiGA.
|
11
11
|
class MiGA::Dataset < MiGA::MiGA
|
12
|
-
|
13
12
|
include MiGA::Dataset::Result
|
14
13
|
include MiGA::Dataset::Hooks
|
15
|
-
|
14
|
+
|
16
15
|
# Class-level
|
17
16
|
class << self
|
18
17
|
|
@@ -27,7 +26,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
27
26
|
def INFO_FIELDS
|
28
27
|
%w(name created updated type ref user description comments)
|
29
28
|
end
|
30
|
-
|
29
|
+
|
31
30
|
end
|
32
31
|
|
33
32
|
# Instance-level
|
@@ -35,11 +34,11 @@ class MiGA::Dataset < MiGA::MiGA
|
|
35
34
|
##
|
36
35
|
# MiGA::Project that contains the dataset.
|
37
36
|
attr_reader :project
|
38
|
-
|
37
|
+
|
39
38
|
##
|
40
39
|
# Datasets are uniquely identified by +name+ in a project.
|
41
40
|
attr_reader :name
|
42
|
-
|
41
|
+
|
43
42
|
##
|
44
43
|
# Create a MiGA::Dataset object in a +project+ MiGA::Project with a
|
45
44
|
# uniquely identifying +name+. +is_ref+ indicates if the dataset is to
|
@@ -52,21 +51,25 @@ class MiGA::Dataset < MiGA::MiGA
|
|
52
51
|
end
|
53
52
|
@project = project
|
54
53
|
@name = name
|
54
|
+
@metadata = nil
|
55
55
|
metadata[:ref] = is_ref
|
56
56
|
@metadata_future = [
|
57
57
|
File.expand_path("metadata/#{name}.json", project.path),
|
58
58
|
metadata
|
59
59
|
]
|
60
60
|
save unless File.exist? @metadata_future[0]
|
61
|
-
pull_hook :on_load
|
62
61
|
end
|
63
62
|
|
64
63
|
##
|
65
|
-
# MiGA::Metadata with information about the dataset
|
64
|
+
# MiGA::Metadata with information about the dataset
|
66
65
|
def metadata
|
67
|
-
@metadata
|
66
|
+
if @metadata.nil?
|
67
|
+
@metadata = MiGA::Metadata.new(*@metadata_future)
|
68
|
+
pull_hook :on_load
|
69
|
+
end
|
70
|
+
@metadata
|
68
71
|
end
|
69
|
-
|
72
|
+
|
70
73
|
##
|
71
74
|
# Save any changes you've made in the dataset.
|
72
75
|
def save
|
@@ -74,11 +77,11 @@ class MiGA::Dataset < MiGA::MiGA
|
|
74
77
|
metadata.save
|
75
78
|
pull_hook :on_save
|
76
79
|
end
|
77
|
-
|
80
|
+
|
78
81
|
##
|
79
82
|
# Get the type of dataset as Symbol.
|
80
83
|
def type ; metadata[:type] ; end
|
81
|
-
|
84
|
+
|
82
85
|
##
|
83
86
|
# Delete the dataset with all it's contents (including results) and returns
|
84
87
|
# nil.
|
@@ -103,7 +106,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
103
106
|
self.metadata.save
|
104
107
|
pull_hook :on_activate
|
105
108
|
end
|
106
|
-
|
109
|
+
|
107
110
|
##
|
108
111
|
# Get standard metadata values for the dataset as Array.
|
109
112
|
def info
|
@@ -111,7 +114,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
111
114
|
(k == 'name') ? self.name : metadata[k.to_sym]
|
112
115
|
end
|
113
116
|
end
|
114
|
-
|
117
|
+
|
115
118
|
##
|
116
119
|
# Is this dataset a reference?
|
117
120
|
def is_ref? ; !!metadata[:ref] ; end
|
@@ -119,14 +122,14 @@ class MiGA::Dataset < MiGA::MiGA
|
|
119
122
|
##
|
120
123
|
# Is this dataset a query (non-reference)?
|
121
124
|
def is_query? ; !metadata[:ref] ; end
|
122
|
-
|
125
|
+
|
123
126
|
##
|
124
127
|
# Is this dataset known to be multi-organism?
|
125
128
|
def is_multi?
|
126
129
|
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
127
130
|
@@KNOWN_TYPES[type][:multi]
|
128
131
|
end
|
129
|
-
|
132
|
+
|
130
133
|
##
|
131
134
|
# Is this dataset known to be single-organism?
|
132
135
|
def is_nonmulti?
|
@@ -139,7 +142,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
139
142
|
def is_active?
|
140
143
|
metadata[:inactive].nil? or !metadata[:inactive]
|
141
144
|
end
|
142
|
-
|
145
|
+
|
143
146
|
##
|
144
147
|
# Should I ignore +task+ for this dataset?
|
145
148
|
def ignore_task?(task)
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -24,10 +24,10 @@ module MiGA::Dataset::Result
|
|
24
24
|
end
|
25
25
|
|
26
26
|
##
|
27
|
-
# For each result executes the 2-ary
|
28
|
-
def each_result
|
29
|
-
@@RESULT_DIRS.
|
30
|
-
|
27
|
+
# For each result executes the 2-ary block: key symbol and MiGA::Result
|
28
|
+
def each_result
|
29
|
+
@@RESULT_DIRS.each_key do |k|
|
30
|
+
yield(k, result(k)) unless result(k).nil?
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
@@ -50,20 +50,20 @@ module MiGA::Dataset::Result
|
|
50
50
|
r_pre = MiGA::Result.load("#{base}.json")
|
51
51
|
return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
|
52
52
|
end
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
pull_hook(:on_result_ready, result_type)
|
59
|
-
end
|
53
|
+
fun = "add_result_#{result_type}"
|
54
|
+
r = send(fun, base, opts) if File.exist?("#{base}.done")
|
55
|
+
return if r.nil?
|
56
|
+
r.save
|
57
|
+
pull_hook(:on_result_ready, result_type)
|
60
58
|
r
|
61
59
|
end
|
62
60
|
|
63
61
|
##
|
64
62
|
# Gets a result as MiGA::Result for the datasets with +result_type+. This is
|
65
63
|
# equivalent to +add_result(result_type, false)+.
|
66
|
-
def get_result(result_type)
|
64
|
+
def get_result(result_type)
|
65
|
+
add_result(result_type, false)
|
66
|
+
end
|
67
67
|
|
68
68
|
##
|
69
69
|
# Returns the key symbol of the first registered result (sorted by the
|
@@ -179,7 +179,7 @@ module MiGA::Dataset::Result
|
|
179
179
|
r = get_result(:distances)
|
180
180
|
ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
|
181
181
|
return if r.nil?
|
182
|
-
[
|
182
|
+
%i[haai_db aai_db ani_db].each do |db_type|
|
183
183
|
db = r.file_path(db_type)
|
184
184
|
next if db.nil? || !File.size?(db)
|
185
185
|
sqlite_db = SQLite3::Database.new db
|
@@ -198,9 +198,8 @@ module MiGA::Dataset::Result
|
|
198
198
|
# Add result type +:raw_reads+ at +base+ (no +_opts+ supported)
|
199
199
|
def add_result_raw_reads(base, _opts)
|
200
200
|
return nil unless result_files_exist?(base, '.1.fastq')
|
201
|
-
r = MiGA::Result.new("#{base}.json")
|
202
201
|
add_files_to_ds_result(
|
203
|
-
|
202
|
+
MiGA::Result.new("#{base}.json"), name,
|
204
203
|
if result_files_exist?(base, '.2.fastq')
|
205
204
|
{ pair1: '.1.fastq', pair2: '.2.fastq' }
|
206
205
|
else
|
@@ -213,25 +212,30 @@ module MiGA::Dataset::Result
|
|
213
212
|
# Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported)
|
214
213
|
def add_result_trimmed_reads(base, _opts)
|
215
214
|
return nil unless result_files_exist?(base, '.1.clipped.fastq')
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
215
|
+
add_files_to_ds_result(
|
216
|
+
MiGA::Result.new("#{base}.json"), name,
|
217
|
+
if result_files_exist?(base, '.2.clipped.fastq')
|
218
|
+
{
|
219
|
+
pair1: '.1.clipped.fastq',
|
220
|
+
pair2: '.2.clipped.fastq',
|
221
|
+
single: '.1.clipped.single.fastq'
|
222
|
+
}
|
223
|
+
else
|
224
|
+
{ single: '.1.clipped.fastq' }
|
225
|
+
end
|
226
|
+
).tap do |r|
|
227
|
+
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
224
228
|
end
|
225
|
-
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
226
|
-
r
|
227
229
|
end
|
228
230
|
|
229
231
|
##
|
230
232
|
# Add result type +:read_quality+ at +base+ (no +_opts+ supported)
|
231
233
|
def add_result_read_quality(base, _opts)
|
232
234
|
return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
|
233
|
-
|
234
|
-
|
235
|
+
add_files_to_ds_result(
|
236
|
+
MiGA::Result.new("#{base}.json"), name,
|
237
|
+
solexaqa: '.solexaqa', fastqc: '.fastqc'
|
238
|
+
)
|
235
239
|
end
|
236
240
|
|
237
241
|
##
|
@@ -241,13 +245,13 @@ module MiGA::Dataset::Result
|
|
241
245
|
result_files_exist?(base, '.CoupledReads.fa') ||
|
242
246
|
result_files_exist?(base, '.SingleReads.fa') ||
|
243
247
|
result_files_exist?(base, %w[.1.fasta .2.fasta])
|
244
|
-
r = MiGA::Result.new("#{base}.json")
|
245
248
|
add_files_to_ds_result(
|
246
|
-
|
249
|
+
MiGA::Result.new("#{base}.json"), name,
|
247
250
|
coupled: '.CoupledReads.fa',
|
248
251
|
single: '.SingleReads.fa',
|
249
252
|
pair1: '.1.fasta',
|
250
|
-
pair2: '.2.fasta'
|
253
|
+
pair2: '.2.fasta'
|
254
|
+
)
|
251
255
|
end
|
252
256
|
|
253
257
|
##
|
@@ -255,12 +259,12 @@ module MiGA::Dataset::Result
|
|
255
259
|
# +is_clean: Boolean+.
|
256
260
|
def add_result_assembly(base, opts)
|
257
261
|
return nil unless result_files_exist?(base, '.LargeContigs.fna')
|
258
|
-
r = MiGA::Result.new("#{base}.json")
|
259
262
|
r = add_files_to_ds_result(
|
260
|
-
|
263
|
+
MiGA::Result.new("#{base}.json"), name,
|
261
264
|
largecontigs: '.LargeContigs.fna',
|
262
265
|
allcontigs: '.AllContigs.fna',
|
263
|
-
assembly_data: ''
|
266
|
+
assembly_data: ''
|
267
|
+
)
|
264
268
|
opts[:is_clean] ||= false
|
265
269
|
r.clean! if opts[:is_clean]
|
266
270
|
unless r.clean?
|
@@ -274,14 +278,14 @@ module MiGA::Dataset::Result
|
|
274
278
|
# Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
|
275
279
|
def add_result_cds(base, opts)
|
276
280
|
return nil unless result_files_exist?(base, %w[.faa])
|
277
|
-
r = MiGA::Result.new("#{base}.json")
|
278
281
|
r = add_files_to_ds_result(
|
279
|
-
|
282
|
+
MiGA::Result.new("#{base}.json"), name,
|
280
283
|
proteins: '.faa',
|
281
284
|
genes: '.fna',
|
282
285
|
gff2: '.gff2',
|
283
286
|
gff3: '.gff3',
|
284
|
-
tab: '.tab'
|
287
|
+
tab: '.tab'
|
288
|
+
)
|
285
289
|
opts[:is_clean] ||= false
|
286
290
|
r.clean! if opts[:is_clean]
|
287
291
|
unless r.clean?
|
@@ -296,13 +300,13 @@ module MiGA::Dataset::Result
|
|
296
300
|
# Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
|
297
301
|
def add_result_essential_genes(base, _opts)
|
298
302
|
return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
|
299
|
-
r = MiGA::Result.new("#{base}.json")
|
300
303
|
add_files_to_ds_result(
|
301
|
-
|
304
|
+
MiGA::Result.new("#{base}.json"), name,
|
302
305
|
ess_genes: '.ess.faa',
|
303
306
|
collection: '.ess',
|
304
307
|
report: '.ess/log',
|
305
|
-
alignments: '.ess/proteins.aln'
|
308
|
+
alignments: '.ess/proteins.aln'
|
309
|
+
)
|
306
310
|
end
|
307
311
|
|
308
312
|
##
|
@@ -310,12 +314,12 @@ module MiGA::Dataset::Result
|
|
310
314
|
def add_result_ssu(base, opts)
|
311
315
|
return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
|
312
316
|
return nil unless result_files_exist?(base, '.ssu.fa')
|
313
|
-
r = MiGA::Result.new("#{base}.json")
|
314
317
|
r = add_files_to_ds_result(
|
315
|
-
|
318
|
+
MiGA::Result.new("#{base}.json"), name,
|
316
319
|
longest_ssu_gene: '.ssu.fa',
|
317
320
|
gff: '.ssu.gff',
|
318
|
-
all_ssu_genes: '.ssu.all.fa'
|
321
|
+
all_ssu_genes: '.ssu.all.fa'
|
322
|
+
)
|
319
323
|
opts[:is_clean] ||= false
|
320
324
|
r.clean! if opts[:is_clean]
|
321
325
|
unless r.clean?
|
@@ -332,9 +336,8 @@ module MiGA::Dataset::Result
|
|
332
336
|
return nil unless
|
333
337
|
result_files_exist?(base, '.mytaxa') ||
|
334
338
|
result_files_exist?(base, '.nomytaxa.txt')
|
335
|
-
r = MiGA::Result.new("#{base}.json")
|
336
339
|
add_files_to_ds_result(
|
337
|
-
|
340
|
+
MiGA::Result.new("#{base}.json"), name,
|
338
341
|
mytaxa: '.mytaxa',
|
339
342
|
blast: '.blast',
|
340
343
|
mytaxain: '.mytaxain',
|
@@ -344,7 +347,8 @@ module MiGA::Dataset::Result
|
|
344
347
|
phylum: '.mytaxa.Phylum.txt',
|
345
348
|
innominate: '.mytaxa.innominate',
|
346
349
|
kronain: '.mytaxa.krona',
|
347
|
-
krona: '.html'
|
350
|
+
krona: '.html'
|
351
|
+
)
|
348
352
|
else
|
349
353
|
MiGA::Result.new("#{base}.json")
|
350
354
|
end
|
@@ -357,9 +361,8 @@ module MiGA::Dataset::Result
|
|
357
361
|
return nil unless
|
358
362
|
result_files_exist?(base, %w[.pdf .mytaxa]) ||
|
359
363
|
result_files_exist?(base, '.nomytaxa.txt')
|
360
|
-
r = MiGA::Result.new("#{base}.json")
|
361
364
|
add_files_to_ds_result(
|
362
|
-
|
365
|
+
MiGA::Result.new("#{base}.json"), name,
|
363
366
|
nomytaxa: '.nomytaxa.txt',
|
364
367
|
mytaxa: '.mytaxa',
|
365
368
|
report: '.pdf',
|
@@ -370,7 +373,8 @@ module MiGA::Dataset::Result
|
|
370
373
|
wintax: '.wintax',
|
371
374
|
gene_ids: '.wintax.genes',
|
372
375
|
region_ids: '.wintax.regions',
|
373
|
-
regions: '.reg'
|
376
|
+
regions: '.reg'
|
377
|
+
)
|
374
378
|
else
|
375
379
|
MiGA::Result.new("#{base}.json")
|
376
380
|
end
|
@@ -428,9 +432,8 @@ module MiGA::Dataset::Result
|
|
428
432
|
return nil unless
|
429
433
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) ||
|
430
434
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
431
|
-
r = MiGA::Result.new("#{base}.json")
|
432
435
|
add_files_to_ds_result(
|
433
|
-
|
436
|
+
MiGA::Result.new("#{base}.json"), name,
|
434
437
|
aai_medoids: '.aai-medoids.tsv',
|
435
438
|
haai_db: '.haai.db',
|
436
439
|
aai_db: '.aai.db',
|
@@ -438,7 +441,8 @@ module MiGA::Dataset::Result
|
|
438
441
|
ani_db: '.ani.db',
|
439
442
|
ref_tree: '.nwk',
|
440
443
|
ref_tree_pdf: '.nwk.pdf',
|
441
|
-
intax_test: '.intax.txt'
|
444
|
+
intax_test: '.intax.txt'
|
445
|
+
)
|
442
446
|
end
|
443
447
|
|
444
448
|
##
|
data/lib/miga/metadata.rb
CHANGED
@@ -27,7 +27,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
27
27
|
|
28
28
|
##
|
29
29
|
# Initiate a MiGA::Metadata object with description in +path+.
|
30
|
-
# It will create it if it doesn't exist
|
30
|
+
# It will create it if it doesn't exist.
|
31
31
|
def initialize(path, defaults = {})
|
32
32
|
@data = nil
|
33
33
|
@path = File.absolute_path(path)
|
@@ -39,21 +39,21 @@ class MiGA::Metadata < MiGA::MiGA
|
|
39
39
|
end
|
40
40
|
|
41
41
|
##
|
42
|
-
# Parsed data as a Hash
|
42
|
+
# Parsed data as a Hash
|
43
43
|
def data
|
44
44
|
self.load if @data.nil?
|
45
45
|
@data
|
46
46
|
end
|
47
47
|
|
48
48
|
##
|
49
|
-
# Reset :created field and save the current data
|
49
|
+
# Reset :created field and save the current data
|
50
50
|
def create
|
51
51
|
self[:created] = Time.now.to_s
|
52
52
|
save
|
53
53
|
end
|
54
54
|
|
55
55
|
##
|
56
|
-
# Save the metadata into #path
|
56
|
+
# Save the metadata into #path
|
57
57
|
def save
|
58
58
|
MiGA.DEBUG "Metadata.save #{path}"
|
59
59
|
self[:updated] = Time.now.to_s
|
@@ -78,7 +78,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
78
78
|
end
|
79
79
|
|
80
80
|
##
|
81
|
-
# (Re-)load metadata stored in #path
|
81
|
+
# (Re-)load metadata stored in #path
|
82
82
|
def load
|
83
83
|
sleeper = 0.0
|
84
84
|
while File.exist? lock_file
|
@@ -87,11 +87,11 @@ class MiGA::Metadata < MiGA::MiGA
|
|
87
87
|
end
|
88
88
|
tmp = MiGA::Json.parse(path, additions: true)
|
89
89
|
@data = {}
|
90
|
-
tmp.
|
90
|
+
tmp.each { |k, v| self[k] = v }
|
91
91
|
end
|
92
92
|
|
93
93
|
##
|
94
|
-
# Delete file at #path
|
94
|
+
# Delete file at #path
|
95
95
|
def remove!
|
96
96
|
MiGA.DEBUG "Metadata.remove! #{path}"
|
97
97
|
File.unlink(path)
|
@@ -99,29 +99,41 @@ class MiGA::Metadata < MiGA::MiGA
|
|
99
99
|
end
|
100
100
|
|
101
101
|
##
|
102
|
-
# Lock file for the metadata
|
103
|
-
def lock_file
|
102
|
+
# Lock file for the metadata
|
103
|
+
def lock_file
|
104
|
+
"#{path}.lock"
|
105
|
+
end
|
104
106
|
|
105
107
|
##
|
106
|
-
# Return the value of +k+ in #data
|
107
|
-
def [](k)
|
108
|
+
# Return the value of +k+ in #data
|
109
|
+
def [](k)
|
110
|
+
data[k.to_sym]
|
111
|
+
end
|
108
112
|
|
109
113
|
##
|
110
|
-
# Set the value of +k+ to +v
|
111
|
-
def []=(k,v)
|
114
|
+
# Set the value of +k+ to +v+
|
115
|
+
def []=(k, v)
|
112
116
|
self.load if @data.nil?
|
113
117
|
k = k.to_sym
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
118
|
+
return @data.delete(k) if v.nil?
|
119
|
+
|
120
|
+
case k
|
121
|
+
when :name
|
122
|
+
# Protect the special field :name
|
123
|
+
v = v.miga_name
|
124
|
+
when :type
|
125
|
+
# Symbolize the special field :type
|
126
|
+
v = v.to_sym if k == :type
|
127
|
+
end
|
128
|
+
|
129
|
+
@data[k] = v
|
120
130
|
end
|
121
131
|
|
122
132
|
##
|
123
|
-
# Iterate +blk+ for each data with 2 arguments key and value
|
124
|
-
def each(&blk)
|
133
|
+
# Iterate +blk+ for each data with 2 arguments: key and value
|
134
|
+
def each(&blk)
|
135
|
+
data.each { |k, v| blk.call(k, v) }
|
136
|
+
end
|
125
137
|
|
126
138
|
##
|
127
139
|
# Show contents in JSON format as a String
|
data/lib/miga/result/dates.rb
CHANGED
@@ -30,16 +30,16 @@ module MiGA::Result::Dates
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
42
|
-
date.nil? ? nil : Time.parse(date)
|
33
|
+
##
|
34
|
+
# Internal function to detect start and end dates
|
35
|
+
def date_at(event)
|
36
|
+
date = self[event]
|
37
|
+
date ||= self[:started] if event == :start
|
38
|
+
if date.nil?
|
39
|
+
f = path event
|
40
|
+
date = File.read(f) if File.size? f
|
43
41
|
end
|
42
|
+
Time.parse(date) unless date.nil?
|
43
|
+
end
|
44
44
|
end
|
45
45
|
|
data/lib/miga/tax_dist.rb
CHANGED
@@ -1,66 +1,82 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
4
|
+
require 'miga/common'
|
5
|
+
require 'miga/taxonomy'
|
6
|
+
require 'zlib'
|
7
7
|
|
8
8
|
##
|
9
9
|
# Methods for taxonomy identification based on AAI/ANI values.
|
10
10
|
module MiGA::TaxDist
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
# Class-level
|
12
|
+
class << self
|
13
|
+
|
14
|
+
##
|
15
|
+
# Absolute path to the :intax or :novel data file (determined by +test+) for
|
16
|
+
# AAI, determined for options +opts+. Supported options:
|
17
|
+
# - +:engine+: The search engine for AAI: +:blast+ (default) or +:diamond+
|
18
|
+
def aai_path(test, opts = {})
|
19
|
+
opts[:engine] ||= :blast
|
20
|
+
engine = opts[:engine].to_s.downcase.to_sym
|
21
|
+
test = test.to_s.downcase.to_sym
|
22
|
+
return nil unless %i[intax novel].include? test
|
23
|
+
engine = :blast if %i[blast+ blat].include? engine
|
24
|
+
return nil unless %i[blast diamond].include? engine
|
25
|
+
File.expand_path("../_data/aai-#{test}-#{engine}.tsv.gz", __FILE__)
|
26
|
+
end
|
20
27
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
28
|
+
##
|
29
|
+
# Returns a Hash, where the keys correspond to the taxonomic level
|
30
|
+
# (see MiGA::Taxonomy.LONG_RANKS for the meanings), and the values
|
31
|
+
# correspond to the p-values of +test+ (one of +:intax+ or +:novel+)
|
32
|
+
# with options +opts+. See +aai_path+ for supported options.
|
33
|
+
def aai_pvalues(aai, test, opts = {})
|
34
|
+
Zlib::GzipReader.open(aai_path(test, opts)) do |fh|
|
35
|
+
keys = nil
|
36
|
+
fh.each_line do |ln|
|
37
|
+
row = ln.chomp.split(/\t/)
|
38
|
+
if fh.lineno == 1
|
39
|
+
keys = row[1, row.size - 1].map(&:to_i)
|
40
|
+
elsif row.shift.to_f >= aai
|
41
|
+
vals = {}
|
42
|
+
keys.each do |i|
|
43
|
+
v = row.shift
|
44
|
+
next if v == 'NA' # <- missing data
|
45
|
+
next if i == 1 # <- namespace, not a taxonomic rank
|
46
|
+
rank = i.zero? ? :root : MiGA::Taxonomy.KNOWN_RANKS[i]
|
47
|
+
vals[rank] = v.to_f
|
48
|
+
end
|
49
|
+
return vals
|
38
50
|
end
|
39
|
-
return vals
|
40
51
|
end
|
41
|
-
end
|
42
|
-
|
43
|
-
|
44
|
-
end
|
52
|
+
end
|
53
|
+
{}
|
54
|
+
end
|
45
55
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
56
|
+
##
|
57
|
+
# Determines the degree to which a Float +aai+ value indicates similar
|
58
|
+
# taxonomy (with +test+ :intax) or a novel taxon (with +test+ :novel) with
|
59
|
+
# options +opts+. See +aai_path+ for supported options.
|
60
|
+
# Returns a Hash with "likelihood" phrases as keys and values as an array
|
61
|
+
# with cannonical rank (as in MiGA::Taxonomy) and estimated p-value.
|
62
|
+
def aai_taxtest(aai, test, opts = {})
|
63
|
+
meaning = {
|
64
|
+
most_likely: [0.00, 0.01],
|
65
|
+
probably: [0.01, 0.10],
|
66
|
+
possibly_even: [0.10, 0.50]
|
67
|
+
}
|
68
|
+
pvalues = aai_pvalues(aai, test, opts)
|
69
|
+
out = {}
|
70
|
+
meaning.each do |phrase, thresholds|
|
71
|
+
lwr, upr = thresholds
|
72
|
+
min = pvalues.values.select { |v| v < upr }.max
|
73
|
+
return out if min.nil?
|
74
|
+
if min >= lwr
|
75
|
+
v = pvalues.select { |_, vj| vj == min }
|
76
|
+
out[phrase] = (test == :intax ? v.reverse_each : v).first
|
77
|
+
end
|
61
78
|
end
|
79
|
+
out
|
62
80
|
end
|
63
|
-
out
|
64
81
|
end
|
65
|
-
|
66
82
|
end
|
data/lib/miga/taxonomy.rb
CHANGED
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.6,
|
13
|
+
VERSION = [0.6, 3, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2020, 3,
|
21
|
+
VERSION_DATE = Date.new(2020, 3, 27)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'miga/project'
|
3
|
+
|
4
|
+
class ResultStatsTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
$tmp = Dir.mktmpdir
|
8
|
+
ENV['MIGA_HOME'] = $tmp
|
9
|
+
FileUtils.touch(File.expand_path('.miga_rc', ENV['MIGA_HOME']))
|
10
|
+
FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
|
11
|
+
$p = MiGA::Project.new(File.expand_path('project1', $tmp))
|
12
|
+
$d = $p.add_dataset('dataset1')
|
13
|
+
end
|
14
|
+
|
15
|
+
def teardown
|
16
|
+
FileUtils.rm_rf $tmp
|
17
|
+
ENV['MIGA_HOME'] = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def file_path(dir, ext)
|
21
|
+
File.join($p.path, dir, "#{$d.name}#{ext}")
|
22
|
+
end
|
23
|
+
|
24
|
+
def touch_done(dir)
|
25
|
+
FileUtils.touch(file_path(dir, '.done'))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_single_raw_reads
|
29
|
+
dir = 'data/01.raw_reads'
|
30
|
+
fq = file_path(dir, '.1.fastq')
|
31
|
+
File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
|
32
|
+
touch_done(dir)
|
33
|
+
r = $d.add_result(:raw_reads)
|
34
|
+
assert_equal({}, r[:stats])
|
35
|
+
r.compute_stats
|
36
|
+
assert(!r[:stats].empty?)
|
37
|
+
assert_equal(Hash, r[:stats].class)
|
38
|
+
assert_equal(1, r[:stats][:reads])
|
39
|
+
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_coupled_raw_reads
|
43
|
+
dir = 'data/01.raw_reads'
|
44
|
+
fq = file_path(dir, '.1.fastq')
|
45
|
+
File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
|
46
|
+
fq = file_path(dir, '.2.fastq')
|
47
|
+
File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
|
48
|
+
touch_done(dir)
|
49
|
+
r = $d.add_result(:raw_reads)
|
50
|
+
r.compute_stats
|
51
|
+
assert(!r[:stats].empty?)
|
52
|
+
assert_nil(r[:stats][:reads])
|
53
|
+
assert_equal(1, r[:stats][:read_pairs])
|
54
|
+
assert_equal([40.0, '%'], r[:stats][:reverse_g_c_content])
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_trimmed_reads
|
58
|
+
dir = 'data/02.trimmed_reads'
|
59
|
+
FileUtils.touch(file_path(dir, '.1.clipped.fastq'))
|
60
|
+
touch_done(dir)
|
61
|
+
r = $d.add_result(:trimmed_reads)
|
62
|
+
assert_equal({}, r[:stats])
|
63
|
+
r.compute_stats
|
64
|
+
assert_equal({}, r[:stats])
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_read_quality
|
68
|
+
dir = 'data/03.read_quality'
|
69
|
+
Dir.mkdir(file_path(dir, '.solexaqa'))
|
70
|
+
Dir.mkdir(file_path(dir, '.fastqc'))
|
71
|
+
touch_done(dir)
|
72
|
+
r = $d.add_result(:read_quality)
|
73
|
+
assert_equal({}, r[:stats])
|
74
|
+
r.compute_stats
|
75
|
+
assert_equal({}, r[:stats])
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_trimmed_fasta
|
79
|
+
dir = 'data/04.trimmed_fasta'
|
80
|
+
fa = file_path(dir, '.CoupledReads.fa')
|
81
|
+
File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
|
82
|
+
touch_done(dir)
|
83
|
+
r = $d.add_result(:trimmed_fasta)
|
84
|
+
assert_equal({}, r[:stats])
|
85
|
+
r.compute_stats
|
86
|
+
assert_equal(1, r[:stats][:reads])
|
87
|
+
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_assembly
|
91
|
+
dir = 'data/05.assembly'
|
92
|
+
fa = file_path(dir, '.LargeContigs.fna')
|
93
|
+
File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
|
94
|
+
touch_done(dir)
|
95
|
+
r = $d.add_result(:assembly)
|
96
|
+
assert_equal({}, r[:stats])
|
97
|
+
r.compute_stats
|
98
|
+
assert_equal(1, r[:stats][:contigs])
|
99
|
+
assert_equal([5, 'bp'], r[:stats][:total_length])
|
100
|
+
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_cds
|
104
|
+
dir = 'data/06.cds'
|
105
|
+
fa = file_path(dir, '.faa')
|
106
|
+
File.open(fa, 'w') { |fh| fh.puts '>1','M' }
|
107
|
+
touch_done(dir)
|
108
|
+
r = $d.add_result(:cds)
|
109
|
+
assert_equal({}, r[:stats])
|
110
|
+
r.compute_stats
|
111
|
+
assert_equal(1, r[:stats][:predicted_proteins])
|
112
|
+
assert_equal([1.0, 'aa'], r[:stats][:average_length])
|
113
|
+
assert_nil(r[:stats][:coding_density])
|
114
|
+
test_assembly
|
115
|
+
r.compute_stats
|
116
|
+
assert_equal([60.0, '%'], r[:stats][:coding_density])
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/test/result_test.rb
CHANGED
@@ -10,14 +10,14 @@ class ResultTest < Test::Unit::TestCase
|
|
10
10
|
FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
|
11
11
|
$p1 = MiGA::Project.new(File.expand_path('project1', $tmp))
|
12
12
|
$d1 = $p1.add_dataset('dataset1')
|
13
|
-
FileUtils.touch(
|
14
|
-
"data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq"
|
15
|
-
FileUtils.touch(
|
16
|
-
"data/02.trimmed_reads/#{$d1.name}.done"
|
17
|
-
FileUtils.touch(
|
18
|
-
'data/10.clades/01.find/miga-project.empty'
|
19
|
-
FileUtils.touch(
|
20
|
-
'data/10.clades/01.find/miga-project.done'
|
13
|
+
FileUtils.touch(
|
14
|
+
File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq"))
|
15
|
+
FileUtils.touch(
|
16
|
+
File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done"))
|
17
|
+
FileUtils.touch(
|
18
|
+
File.join($p1.path, 'data/10.clades/01.find/miga-project.empty'))
|
19
|
+
FileUtils.touch(
|
20
|
+
File.join($p1.path, 'data/10.clades/01.find/miga-project.done'))
|
21
21
|
end
|
22
22
|
|
23
23
|
def teardown
|
@@ -46,4 +46,18 @@ class ResultTest < Test::Unit::TestCase
|
|
46
46
|
assert_equal($p1.path, r.source.path)
|
47
47
|
end
|
48
48
|
|
49
|
+
def test_dates
|
50
|
+
r = $d1.add_result(:trimmed_reads)
|
51
|
+
assert_nil(r.done_at)
|
52
|
+
assert_nil(r.started_at)
|
53
|
+
tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done")
|
54
|
+
File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,5) }
|
55
|
+
assert_equal(Time, r.done_at.class)
|
56
|
+
assert_nil(r.running_time)
|
57
|
+
tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.start")
|
58
|
+
File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,0) }
|
59
|
+
r = $d1.add_result(:trimmed_reads)
|
60
|
+
assert_equal(5.0, r.running_time)
|
61
|
+
end
|
62
|
+
|
49
63
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'miga/tax_dist'
|
3
|
+
|
4
|
+
class TaxDistTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_aai_path
|
7
|
+
assert(File.size? MiGA::TaxDist.aai_path(:intax))
|
8
|
+
assert(File.size? MiGA::TaxDist.aai_path(:novel))
|
9
|
+
assert(File.size? MiGA::TaxDist.aai_path(:intax, engine: :diamond))
|
10
|
+
assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :blast))
|
11
|
+
assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :'blast+'))
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_aai_pvalues
|
15
|
+
distant_intax = MiGA::TaxDist.aai_pvalues(35.0, :intax)
|
16
|
+
assert_lt(distant_intax[:root], 0.05)
|
17
|
+
assert_gt(distant_intax[:g], 0.05)
|
18
|
+
assert_nil(distant_intax[:ns])
|
19
|
+
|
20
|
+
close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :blast)
|
21
|
+
assert_lt(close_intax[:root], 0.05)
|
22
|
+
assert_lt(close_intax[:s], 0.05)
|
23
|
+
|
24
|
+
close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :diamond)
|
25
|
+
assert_lt(close_intax[:root], 0.05)
|
26
|
+
assert_lt(close_intax[:s], 0.05)
|
27
|
+
|
28
|
+
distant_novel = MiGA::TaxDist.aai_pvalues(35.0, :novel, engine: :diamond)
|
29
|
+
$stderr.puts distant_novel
|
30
|
+
assert_gt(distant_novel[:root], 0.05)
|
31
|
+
assert_lt(distant_novel[:g], 0.05)
|
32
|
+
assert_nil(distant_novel[:ns])
|
33
|
+
|
34
|
+
close_novel = MiGA::TaxDist.aai_pvalues(99.0, :novel)
|
35
|
+
assert_gt(close_novel[:root], 0.05)
|
36
|
+
assert_gt(close_novel[:f], 0.05)
|
37
|
+
|
38
|
+
assert_equal({}, MiGA::TaxDist.aai_pvalues(101.0, :intax))
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_aai_taxtest
|
42
|
+
distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :diamond)
|
43
|
+
assert_equal(:root, distant_intax[:most_likely][0])
|
44
|
+
assert_nil(distant_intax[:probably])
|
45
|
+
assert_nil(distant_intax[:possibly_even])
|
46
|
+
|
47
|
+
distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :blast)
|
48
|
+
assert_equal(:root, distant_intax[:most_likely][0])
|
49
|
+
assert_nil(distant_intax[:probably])
|
50
|
+
assert_nil(distant_intax[:possibly_even])
|
51
|
+
|
52
|
+
close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :diamond)
|
53
|
+
assert_equal(:s, close_intax[:probably][0])
|
54
|
+
|
55
|
+
close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :blast)
|
56
|
+
assert_equal(:s, close_intax[:probably][0])
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/test/test_helper.rb
CHANGED
data/utils/distance/commands.rb
CHANGED
@@ -63,12 +63,12 @@ module MiGA::DistanceRunner::Commands
|
|
63
63
|
|
64
64
|
##
|
65
65
|
# Execute an AAI command
|
66
|
-
def aai_cmd(f1, f2, n1, n2, db, o={})
|
66
|
+
def aai_cmd(f1, f2, n1, n2, db, o = {})
|
67
67
|
o = opts.merge(o)
|
68
68
|
v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
|
69
69
|
--name1 "#{n1}" --name2 "#{n2}" \
|
70
70
|
-t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
|
71
|
-
-p "#{o[:aai_p] ||
|
71
|
+
-p "#{o[:aai_p] || 'blast+'}"`.chomp
|
72
72
|
(v.nil? || v.empty?) ? 0 : v.to_f
|
73
73
|
end
|
74
74
|
|
@@ -91,7 +91,7 @@ module MiGA::DistanceRunner::Commands
|
|
91
91
|
v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
|
92
92
|
--name1 "#{n1}" --name2 "#{n2}" \
|
93
93
|
-t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
|
94
|
-
--lookup-first -p "#{o[:ani_p] ||
|
94
|
+
--lookup-first -p "#{o[:ani_p] || 'blast+'}"`.chomp
|
95
95
|
end
|
96
96
|
v.nil? || v.empty? ? 0 : v.to_f
|
97
97
|
end
|
data/utils/distance/pipeline.rb
CHANGED
@@ -73,14 +73,17 @@ module MiGA::DistanceRunner::Pipeline
|
|
73
73
|
cr = dataset.closest_relatives(1, from_ref_project)
|
74
74
|
return if cr.nil? or cr.empty?
|
75
75
|
tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
|
76
|
+
|
76
77
|
# Run the test for each rank
|
77
|
-
|
78
|
+
tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
|
79
|
+
r = tax_test.map do |k,v|
|
78
80
|
sig = ''
|
79
|
-
[0.5,0.1,0.05,0.01].each{ |i| sig << '*' if v<i }
|
81
|
+
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
80
82
|
[MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
81
83
|
end
|
84
|
+
|
82
85
|
# Save test
|
83
|
-
File.open(File.expand_path("#{dataset.name}.intax.txt", home),
|
86
|
+
File.open(File.expand_path("#{dataset.name}.intax.txt", home), 'w') do |fh|
|
84
87
|
fh.puts "Closest relative: #{cr[0][0]} with AAI: #{cr[0][1]}."
|
85
88
|
fh.puts ''
|
86
89
|
fh.puts MiGA::MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
data/utils/distance/runner.rb
CHANGED
@@ -22,6 +22,7 @@ class MiGA::DistanceRunner
|
|
22
22
|
raise "No project at #{project_path}"
|
23
23
|
@dataset = project.dataset(dataset_name)
|
24
24
|
@home = File.expand_path('data/09.distances', project.path)
|
25
|
+
|
25
26
|
# Default opts
|
26
27
|
if project.metadata[:aai_save_rbm] == false
|
27
28
|
@opts[:aai_save_rbm] ||= 'no-save-rbm'
|
@@ -48,6 +49,8 @@ class MiGA::DistanceRunner
|
|
48
49
|
[:haai_p, :aai_p, :ani_p, :distances_checkpoint].each do |m|
|
49
50
|
@opts[m] ||= ref_project.metadata[m]
|
50
51
|
end
|
52
|
+
@opts[:aai_p] ||= 'blast+'
|
53
|
+
@opts[:ani_p] ||= 'blast+'
|
51
54
|
@opts[:distances_checkpoint] ||= 10
|
52
55
|
@opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
|
53
56
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '3'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: assertions
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1'
|
83
97
|
description: Microbial Genomes Atlas
|
84
98
|
email: lmrodriguezr@gmail.com
|
85
99
|
executables:
|
@@ -94,8 +108,10 @@ files:
|
|
94
108
|
- Rakefile
|
95
109
|
- bin/miga
|
96
110
|
- lib/miga.rb
|
97
|
-
- lib/miga/_data/aai-intax.tsv.gz
|
98
|
-
- lib/miga/_data/aai-
|
111
|
+
- lib/miga/_data/aai-intax-blast.tsv.gz
|
112
|
+
- lib/miga/_data/aai-intax-diamond.tsv.gz
|
113
|
+
- lib/miga/_data/aai-novel-blast.tsv.gz
|
114
|
+
- lib/miga/_data/aai-novel-diamond.tsv.gz
|
99
115
|
- lib/miga/cli.rb
|
100
116
|
- lib/miga/cli/action.rb
|
101
117
|
- lib/miga/cli/action/about.rb
|
@@ -199,7 +215,9 @@ files:
|
|
199
215
|
- test/metadata_test.rb
|
200
216
|
- test/project_test.rb
|
201
217
|
- test/remote_dataset_test.rb
|
218
|
+
- test/result_stats_test.rb
|
202
219
|
- test/result_test.rb
|
220
|
+
- test/tax_dist_test.rb
|
203
221
|
- test/tax_index_test.rb
|
204
222
|
- test/taxonomy_test.rb
|
205
223
|
- test/test_helper.rb
|
Binary file
|
Binary file
|