miga-base 0.6.2.0 → 0.6.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax-blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax-diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel-blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel-diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/new.rb +13 -4
- data/lib/miga/dataset.rb +19 -16
- data/lib/miga/dataset/result.rb +55 -51
- data/lib/miga/metadata.rb +33 -21
- data/lib/miga/result/dates.rb +10 -10
- data/lib/miga/tax_dist.rb +67 -51
- data/lib/miga/taxonomy.rb +0 -1
- data/lib/miga/version.rb +2 -2
- data/test/result_stats_test.rb +119 -0
- data/test/result_test.rb +22 -8
- data/test/tax_dist_test.rb +59 -0
- data/test/test_helper.rb +1 -0
- data/utils/distance/commands.rb +3 -3
- data/utils/distance/pipeline.rb +6 -3
- data/utils/distance/runner.rb +3 -0
- metadata +22 -4
- data/lib/miga/_data/aai-intax.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.tsv.gz +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b225951f374bcd267560e5bd8234fb88bcd6b0c11b0561fb4b3b479af39c4b3
|
4
|
+
data.tar.gz: 9b32d40ea94ceb526fe0ba732c77fce978b0cba5decffd4e1c0d701594670dbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b7fe9b2cbb09b6612b762c7c9202b4b27ece7a0b6f4dd23eecee9bddc835c130f15a63772011106a1f0f1425e5445fa1541a8ebc81661ef341dcacec3ae22193
|
7
|
+
data.tar.gz: 501fa797aa6726ac5cdc6c043a3073d03a7ba3ed81d63ebac9ef989a76aa1947806c9052ab50c089567001ad1b1b92ad04cc05ea4ca33691de36c4dcb2e34b52
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/miga/cli/action/new.rb
CHANGED
@@ -11,20 +11,25 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
|
|
11
11
|
opt.on(
|
12
12
|
'-n', '--name STRING',
|
13
13
|
'Name of the project'
|
14
|
-
|
14
|
+
) { |v| cli[:name] = v }
|
15
15
|
opt.on(
|
16
16
|
'-d', '--description STRING',
|
17
17
|
'Description of the project'
|
18
|
-
|
18
|
+
) { |v| cli[:description] = v }
|
19
19
|
opt.on(
|
20
20
|
'-c', '--comments STRING',
|
21
21
|
'Comments on the project'
|
22
|
-
|
22
|
+
) { |v| cli[:comments] = v }
|
23
|
+
opt.on(
|
24
|
+
'--fast',
|
25
|
+
'Use faster identity engines (Diamond-AAI and FastANI)',
|
26
|
+
'Equivalent to: -m aai_p=diamond,ani_p=fastani'
|
27
|
+
) { |v| cli[:fast] = v }
|
23
28
|
opt.on(
|
24
29
|
'-m', '--metadata STRING',
|
25
30
|
'Metadata as key-value pairs separated by = and delimited by comma',
|
26
31
|
'Values are saved as strings except for booleans (true / false) or nil'
|
27
|
-
|
32
|
+
) { |v| cli[:metadata] = v }
|
28
33
|
end
|
29
34
|
end
|
30
35
|
|
@@ -40,6 +45,10 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
|
|
40
45
|
raise 'Project already exists, aborting.' if Project.exist? cli[:project]
|
41
46
|
p = Project.new(cli[:project], false)
|
42
47
|
p = cli.add_metadata(p)
|
48
|
+
if cli[:fast]
|
49
|
+
p.metadata[:aai_p] = 'diamond'
|
50
|
+
p.metadata[:ani_p] = 'fastani'
|
51
|
+
end
|
43
52
|
p.save
|
44
53
|
end
|
45
54
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -9,10 +9,9 @@ require 'sqlite3'
|
|
9
9
|
##
|
10
10
|
# Dataset representation in MiGA.
|
11
11
|
class MiGA::Dataset < MiGA::MiGA
|
12
|
-
|
13
12
|
include MiGA::Dataset::Result
|
14
13
|
include MiGA::Dataset::Hooks
|
15
|
-
|
14
|
+
|
16
15
|
# Class-level
|
17
16
|
class << self
|
18
17
|
|
@@ -27,7 +26,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
27
26
|
def INFO_FIELDS
|
28
27
|
%w(name created updated type ref user description comments)
|
29
28
|
end
|
30
|
-
|
29
|
+
|
31
30
|
end
|
32
31
|
|
33
32
|
# Instance-level
|
@@ -35,11 +34,11 @@ class MiGA::Dataset < MiGA::MiGA
|
|
35
34
|
##
|
36
35
|
# MiGA::Project that contains the dataset.
|
37
36
|
attr_reader :project
|
38
|
-
|
37
|
+
|
39
38
|
##
|
40
39
|
# Datasets are uniquely identified by +name+ in a project.
|
41
40
|
attr_reader :name
|
42
|
-
|
41
|
+
|
43
42
|
##
|
44
43
|
# Create a MiGA::Dataset object in a +project+ MiGA::Project with a
|
45
44
|
# uniquely identifying +name+. +is_ref+ indicates if the dataset is to
|
@@ -52,21 +51,25 @@ class MiGA::Dataset < MiGA::MiGA
|
|
52
51
|
end
|
53
52
|
@project = project
|
54
53
|
@name = name
|
54
|
+
@metadata = nil
|
55
55
|
metadata[:ref] = is_ref
|
56
56
|
@metadata_future = [
|
57
57
|
File.expand_path("metadata/#{name}.json", project.path),
|
58
58
|
metadata
|
59
59
|
]
|
60
60
|
save unless File.exist? @metadata_future[0]
|
61
|
-
pull_hook :on_load
|
62
61
|
end
|
63
62
|
|
64
63
|
##
|
65
|
-
# MiGA::Metadata with information about the dataset
|
64
|
+
# MiGA::Metadata with information about the dataset
|
66
65
|
def metadata
|
67
|
-
@metadata
|
66
|
+
if @metadata.nil?
|
67
|
+
@metadata = MiGA::Metadata.new(*@metadata_future)
|
68
|
+
pull_hook :on_load
|
69
|
+
end
|
70
|
+
@metadata
|
68
71
|
end
|
69
|
-
|
72
|
+
|
70
73
|
##
|
71
74
|
# Save any changes you've made in the dataset.
|
72
75
|
def save
|
@@ -74,11 +77,11 @@ class MiGA::Dataset < MiGA::MiGA
|
|
74
77
|
metadata.save
|
75
78
|
pull_hook :on_save
|
76
79
|
end
|
77
|
-
|
80
|
+
|
78
81
|
##
|
79
82
|
# Get the type of dataset as Symbol.
|
80
83
|
def type ; metadata[:type] ; end
|
81
|
-
|
84
|
+
|
82
85
|
##
|
83
86
|
# Delete the dataset with all it's contents (including results) and returns
|
84
87
|
# nil.
|
@@ -103,7 +106,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
103
106
|
self.metadata.save
|
104
107
|
pull_hook :on_activate
|
105
108
|
end
|
106
|
-
|
109
|
+
|
107
110
|
##
|
108
111
|
# Get standard metadata values for the dataset as Array.
|
109
112
|
def info
|
@@ -111,7 +114,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
111
114
|
(k == 'name') ? self.name : metadata[k.to_sym]
|
112
115
|
end
|
113
116
|
end
|
114
|
-
|
117
|
+
|
115
118
|
##
|
116
119
|
# Is this dataset a reference?
|
117
120
|
def is_ref? ; !!metadata[:ref] ; end
|
@@ -119,14 +122,14 @@ class MiGA::Dataset < MiGA::MiGA
|
|
119
122
|
##
|
120
123
|
# Is this dataset a query (non-reference)?
|
121
124
|
def is_query? ; !metadata[:ref] ; end
|
122
|
-
|
125
|
+
|
123
126
|
##
|
124
127
|
# Is this dataset known to be multi-organism?
|
125
128
|
def is_multi?
|
126
129
|
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
127
130
|
@@KNOWN_TYPES[type][:multi]
|
128
131
|
end
|
129
|
-
|
132
|
+
|
130
133
|
##
|
131
134
|
# Is this dataset known to be single-organism?
|
132
135
|
def is_nonmulti?
|
@@ -139,7 +142,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
139
142
|
def is_active?
|
140
143
|
metadata[:inactive].nil? or !metadata[:inactive]
|
141
144
|
end
|
142
|
-
|
145
|
+
|
143
146
|
##
|
144
147
|
# Should I ignore +task+ for this dataset?
|
145
148
|
def ignore_task?(task)
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -24,10 +24,10 @@ module MiGA::Dataset::Result
|
|
24
24
|
end
|
25
25
|
|
26
26
|
##
|
27
|
-
# For each result executes the 2-ary
|
28
|
-
def each_result
|
29
|
-
@@RESULT_DIRS.
|
30
|
-
|
27
|
+
# For each result executes the 2-ary block: key symbol and MiGA::Result
|
28
|
+
def each_result
|
29
|
+
@@RESULT_DIRS.each_key do |k|
|
30
|
+
yield(k, result(k)) unless result(k).nil?
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
@@ -50,20 +50,20 @@ module MiGA::Dataset::Result
|
|
50
50
|
r_pre = MiGA::Result.load("#{base}.json")
|
51
51
|
return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
|
52
52
|
end
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
pull_hook(:on_result_ready, result_type)
|
59
|
-
end
|
53
|
+
fun = "add_result_#{result_type}"
|
54
|
+
r = send(fun, base, opts) if File.exist?("#{base}.done")
|
55
|
+
return if r.nil?
|
56
|
+
r.save
|
57
|
+
pull_hook(:on_result_ready, result_type)
|
60
58
|
r
|
61
59
|
end
|
62
60
|
|
63
61
|
##
|
64
62
|
# Gets a result as MiGA::Result for the datasets with +result_type+. This is
|
65
63
|
# equivalent to +add_result(result_type, false)+.
|
66
|
-
def get_result(result_type)
|
64
|
+
def get_result(result_type)
|
65
|
+
add_result(result_type, false)
|
66
|
+
end
|
67
67
|
|
68
68
|
##
|
69
69
|
# Returns the key symbol of the first registered result (sorted by the
|
@@ -179,7 +179,7 @@ module MiGA::Dataset::Result
|
|
179
179
|
r = get_result(:distances)
|
180
180
|
ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
|
181
181
|
return if r.nil?
|
182
|
-
[
|
182
|
+
%i[haai_db aai_db ani_db].each do |db_type|
|
183
183
|
db = r.file_path(db_type)
|
184
184
|
next if db.nil? || !File.size?(db)
|
185
185
|
sqlite_db = SQLite3::Database.new db
|
@@ -198,9 +198,8 @@ module MiGA::Dataset::Result
|
|
198
198
|
# Add result type +:raw_reads+ at +base+ (no +_opts+ supported)
|
199
199
|
def add_result_raw_reads(base, _opts)
|
200
200
|
return nil unless result_files_exist?(base, '.1.fastq')
|
201
|
-
r = MiGA::Result.new("#{base}.json")
|
202
201
|
add_files_to_ds_result(
|
203
|
-
|
202
|
+
MiGA::Result.new("#{base}.json"), name,
|
204
203
|
if result_files_exist?(base, '.2.fastq')
|
205
204
|
{ pair1: '.1.fastq', pair2: '.2.fastq' }
|
206
205
|
else
|
@@ -213,25 +212,30 @@ module MiGA::Dataset::Result
|
|
213
212
|
# Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported)
|
214
213
|
def add_result_trimmed_reads(base, _opts)
|
215
214
|
return nil unless result_files_exist?(base, '.1.clipped.fastq')
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
215
|
+
add_files_to_ds_result(
|
216
|
+
MiGA::Result.new("#{base}.json"), name,
|
217
|
+
if result_files_exist?(base, '.2.clipped.fastq')
|
218
|
+
{
|
219
|
+
pair1: '.1.clipped.fastq',
|
220
|
+
pair2: '.2.clipped.fastq',
|
221
|
+
single: '.1.clipped.single.fastq'
|
222
|
+
}
|
223
|
+
else
|
224
|
+
{ single: '.1.clipped.fastq' }
|
225
|
+
end
|
226
|
+
).tap do |r|
|
227
|
+
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
224
228
|
end
|
225
|
-
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
226
|
-
r
|
227
229
|
end
|
228
230
|
|
229
231
|
##
|
230
232
|
# Add result type +:read_quality+ at +base+ (no +_opts+ supported)
|
231
233
|
def add_result_read_quality(base, _opts)
|
232
234
|
return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
|
233
|
-
|
234
|
-
|
235
|
+
add_files_to_ds_result(
|
236
|
+
MiGA::Result.new("#{base}.json"), name,
|
237
|
+
solexaqa: '.solexaqa', fastqc: '.fastqc'
|
238
|
+
)
|
235
239
|
end
|
236
240
|
|
237
241
|
##
|
@@ -241,13 +245,13 @@ module MiGA::Dataset::Result
|
|
241
245
|
result_files_exist?(base, '.CoupledReads.fa') ||
|
242
246
|
result_files_exist?(base, '.SingleReads.fa') ||
|
243
247
|
result_files_exist?(base, %w[.1.fasta .2.fasta])
|
244
|
-
r = MiGA::Result.new("#{base}.json")
|
245
248
|
add_files_to_ds_result(
|
246
|
-
|
249
|
+
MiGA::Result.new("#{base}.json"), name,
|
247
250
|
coupled: '.CoupledReads.fa',
|
248
251
|
single: '.SingleReads.fa',
|
249
252
|
pair1: '.1.fasta',
|
250
|
-
pair2: '.2.fasta'
|
253
|
+
pair2: '.2.fasta'
|
254
|
+
)
|
251
255
|
end
|
252
256
|
|
253
257
|
##
|
@@ -255,12 +259,12 @@ module MiGA::Dataset::Result
|
|
255
259
|
# +is_clean: Boolean+.
|
256
260
|
def add_result_assembly(base, opts)
|
257
261
|
return nil unless result_files_exist?(base, '.LargeContigs.fna')
|
258
|
-
r = MiGA::Result.new("#{base}.json")
|
259
262
|
r = add_files_to_ds_result(
|
260
|
-
|
263
|
+
MiGA::Result.new("#{base}.json"), name,
|
261
264
|
largecontigs: '.LargeContigs.fna',
|
262
265
|
allcontigs: '.AllContigs.fna',
|
263
|
-
assembly_data: ''
|
266
|
+
assembly_data: ''
|
267
|
+
)
|
264
268
|
opts[:is_clean] ||= false
|
265
269
|
r.clean! if opts[:is_clean]
|
266
270
|
unless r.clean?
|
@@ -274,14 +278,14 @@ module MiGA::Dataset::Result
|
|
274
278
|
# Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
|
275
279
|
def add_result_cds(base, opts)
|
276
280
|
return nil unless result_files_exist?(base, %w[.faa])
|
277
|
-
r = MiGA::Result.new("#{base}.json")
|
278
281
|
r = add_files_to_ds_result(
|
279
|
-
|
282
|
+
MiGA::Result.new("#{base}.json"), name,
|
280
283
|
proteins: '.faa',
|
281
284
|
genes: '.fna',
|
282
285
|
gff2: '.gff2',
|
283
286
|
gff3: '.gff3',
|
284
|
-
tab: '.tab'
|
287
|
+
tab: '.tab'
|
288
|
+
)
|
285
289
|
opts[:is_clean] ||= false
|
286
290
|
r.clean! if opts[:is_clean]
|
287
291
|
unless r.clean?
|
@@ -296,13 +300,13 @@ module MiGA::Dataset::Result
|
|
296
300
|
# Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
|
297
301
|
def add_result_essential_genes(base, _opts)
|
298
302
|
return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
|
299
|
-
r = MiGA::Result.new("#{base}.json")
|
300
303
|
add_files_to_ds_result(
|
301
|
-
|
304
|
+
MiGA::Result.new("#{base}.json"), name,
|
302
305
|
ess_genes: '.ess.faa',
|
303
306
|
collection: '.ess',
|
304
307
|
report: '.ess/log',
|
305
|
-
alignments: '.ess/proteins.aln'
|
308
|
+
alignments: '.ess/proteins.aln'
|
309
|
+
)
|
306
310
|
end
|
307
311
|
|
308
312
|
##
|
@@ -310,12 +314,12 @@ module MiGA::Dataset::Result
|
|
310
314
|
def add_result_ssu(base, opts)
|
311
315
|
return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
|
312
316
|
return nil unless result_files_exist?(base, '.ssu.fa')
|
313
|
-
r = MiGA::Result.new("#{base}.json")
|
314
317
|
r = add_files_to_ds_result(
|
315
|
-
|
318
|
+
MiGA::Result.new("#{base}.json"), name,
|
316
319
|
longest_ssu_gene: '.ssu.fa',
|
317
320
|
gff: '.ssu.gff',
|
318
|
-
all_ssu_genes: '.ssu.all.fa'
|
321
|
+
all_ssu_genes: '.ssu.all.fa'
|
322
|
+
)
|
319
323
|
opts[:is_clean] ||= false
|
320
324
|
r.clean! if opts[:is_clean]
|
321
325
|
unless r.clean?
|
@@ -332,9 +336,8 @@ module MiGA::Dataset::Result
|
|
332
336
|
return nil unless
|
333
337
|
result_files_exist?(base, '.mytaxa') ||
|
334
338
|
result_files_exist?(base, '.nomytaxa.txt')
|
335
|
-
r = MiGA::Result.new("#{base}.json")
|
336
339
|
add_files_to_ds_result(
|
337
|
-
|
340
|
+
MiGA::Result.new("#{base}.json"), name,
|
338
341
|
mytaxa: '.mytaxa',
|
339
342
|
blast: '.blast',
|
340
343
|
mytaxain: '.mytaxain',
|
@@ -344,7 +347,8 @@ module MiGA::Dataset::Result
|
|
344
347
|
phylum: '.mytaxa.Phylum.txt',
|
345
348
|
innominate: '.mytaxa.innominate',
|
346
349
|
kronain: '.mytaxa.krona',
|
347
|
-
krona: '.html'
|
350
|
+
krona: '.html'
|
351
|
+
)
|
348
352
|
else
|
349
353
|
MiGA::Result.new("#{base}.json")
|
350
354
|
end
|
@@ -357,9 +361,8 @@ module MiGA::Dataset::Result
|
|
357
361
|
return nil unless
|
358
362
|
result_files_exist?(base, %w[.pdf .mytaxa]) ||
|
359
363
|
result_files_exist?(base, '.nomytaxa.txt')
|
360
|
-
r = MiGA::Result.new("#{base}.json")
|
361
364
|
add_files_to_ds_result(
|
362
|
-
|
365
|
+
MiGA::Result.new("#{base}.json"), name,
|
363
366
|
nomytaxa: '.nomytaxa.txt',
|
364
367
|
mytaxa: '.mytaxa',
|
365
368
|
report: '.pdf',
|
@@ -370,7 +373,8 @@ module MiGA::Dataset::Result
|
|
370
373
|
wintax: '.wintax',
|
371
374
|
gene_ids: '.wintax.genes',
|
372
375
|
region_ids: '.wintax.regions',
|
373
|
-
regions: '.reg'
|
376
|
+
regions: '.reg'
|
377
|
+
)
|
374
378
|
else
|
375
379
|
MiGA::Result.new("#{base}.json")
|
376
380
|
end
|
@@ -428,9 +432,8 @@ module MiGA::Dataset::Result
|
|
428
432
|
return nil unless
|
429
433
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) ||
|
430
434
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
431
|
-
r = MiGA::Result.new("#{base}.json")
|
432
435
|
add_files_to_ds_result(
|
433
|
-
|
436
|
+
MiGA::Result.new("#{base}.json"), name,
|
434
437
|
aai_medoids: '.aai-medoids.tsv',
|
435
438
|
haai_db: '.haai.db',
|
436
439
|
aai_db: '.aai.db',
|
@@ -438,7 +441,8 @@ module MiGA::Dataset::Result
|
|
438
441
|
ani_db: '.ani.db',
|
439
442
|
ref_tree: '.nwk',
|
440
443
|
ref_tree_pdf: '.nwk.pdf',
|
441
|
-
intax_test: '.intax.txt'
|
444
|
+
intax_test: '.intax.txt'
|
445
|
+
)
|
442
446
|
end
|
443
447
|
|
444
448
|
##
|
data/lib/miga/metadata.rb
CHANGED
@@ -27,7 +27,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
27
27
|
|
28
28
|
##
|
29
29
|
# Initiate a MiGA::Metadata object with description in +path+.
|
30
|
-
# It will create it if it doesn't exist
|
30
|
+
# It will create it if it doesn't exist.
|
31
31
|
def initialize(path, defaults = {})
|
32
32
|
@data = nil
|
33
33
|
@path = File.absolute_path(path)
|
@@ -39,21 +39,21 @@ class MiGA::Metadata < MiGA::MiGA
|
|
39
39
|
end
|
40
40
|
|
41
41
|
##
|
42
|
-
# Parsed data as a Hash
|
42
|
+
# Parsed data as a Hash
|
43
43
|
def data
|
44
44
|
self.load if @data.nil?
|
45
45
|
@data
|
46
46
|
end
|
47
47
|
|
48
48
|
##
|
49
|
-
# Reset :created field and save the current data
|
49
|
+
# Reset :created field and save the current data
|
50
50
|
def create
|
51
51
|
self[:created] = Time.now.to_s
|
52
52
|
save
|
53
53
|
end
|
54
54
|
|
55
55
|
##
|
56
|
-
# Save the metadata into #path
|
56
|
+
# Save the metadata into #path
|
57
57
|
def save
|
58
58
|
MiGA.DEBUG "Metadata.save #{path}"
|
59
59
|
self[:updated] = Time.now.to_s
|
@@ -78,7 +78,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
78
78
|
end
|
79
79
|
|
80
80
|
##
|
81
|
-
# (Re-)load metadata stored in #path
|
81
|
+
# (Re-)load metadata stored in #path
|
82
82
|
def load
|
83
83
|
sleeper = 0.0
|
84
84
|
while File.exist? lock_file
|
@@ -87,11 +87,11 @@ class MiGA::Metadata < MiGA::MiGA
|
|
87
87
|
end
|
88
88
|
tmp = MiGA::Json.parse(path, additions: true)
|
89
89
|
@data = {}
|
90
|
-
tmp.
|
90
|
+
tmp.each { |k, v| self[k] = v }
|
91
91
|
end
|
92
92
|
|
93
93
|
##
|
94
|
-
# Delete file at #path
|
94
|
+
# Delete file at #path
|
95
95
|
def remove!
|
96
96
|
MiGA.DEBUG "Metadata.remove! #{path}"
|
97
97
|
File.unlink(path)
|
@@ -99,29 +99,41 @@ class MiGA::Metadata < MiGA::MiGA
|
|
99
99
|
end
|
100
100
|
|
101
101
|
##
|
102
|
-
# Lock file for the metadata
|
103
|
-
def lock_file
|
102
|
+
# Lock file for the metadata
|
103
|
+
def lock_file
|
104
|
+
"#{path}.lock"
|
105
|
+
end
|
104
106
|
|
105
107
|
##
|
106
|
-
# Return the value of +k+ in #data
|
107
|
-
def [](k)
|
108
|
+
# Return the value of +k+ in #data
|
109
|
+
def [](k)
|
110
|
+
data[k.to_sym]
|
111
|
+
end
|
108
112
|
|
109
113
|
##
|
110
|
-
# Set the value of +k+ to +v
|
111
|
-
def []=(k,v)
|
114
|
+
# Set the value of +k+ to +v+
|
115
|
+
def []=(k, v)
|
112
116
|
self.load if @data.nil?
|
113
117
|
k = k.to_sym
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
118
|
+
return @data.delete(k) if v.nil?
|
119
|
+
|
120
|
+
case k
|
121
|
+
when :name
|
122
|
+
# Protect the special field :name
|
123
|
+
v = v.miga_name
|
124
|
+
when :type
|
125
|
+
# Symbolize the special field :type
|
126
|
+
v = v.to_sym if k == :type
|
127
|
+
end
|
128
|
+
|
129
|
+
@data[k] = v
|
120
130
|
end
|
121
131
|
|
122
132
|
##
|
123
|
-
# Iterate +blk+ for each data with 2 arguments key and value
|
124
|
-
def each(&blk)
|
133
|
+
# Iterate +blk+ for each data with 2 arguments: key and value
|
134
|
+
def each(&blk)
|
135
|
+
data.each { |k, v| blk.call(k, v) }
|
136
|
+
end
|
125
137
|
|
126
138
|
##
|
127
139
|
# Show contents in JSON format as a String
|
data/lib/miga/result/dates.rb
CHANGED
@@ -30,16 +30,16 @@ module MiGA::Result::Dates
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
42
|
-
date.nil? ? nil : Time.parse(date)
|
33
|
+
##
|
34
|
+
# Internal function to detect start and end dates
|
35
|
+
def date_at(event)
|
36
|
+
date = self[event]
|
37
|
+
date ||= self[:started] if event == :start
|
38
|
+
if date.nil?
|
39
|
+
f = path event
|
40
|
+
date = File.read(f) if File.size? f
|
43
41
|
end
|
42
|
+
Time.parse(date) unless date.nil?
|
43
|
+
end
|
44
44
|
end
|
45
45
|
|
data/lib/miga/tax_dist.rb
CHANGED
@@ -1,66 +1,82 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
4
|
+
require 'miga/common'
|
5
|
+
require 'miga/taxonomy'
|
6
|
+
require 'zlib'
|
7
7
|
|
8
8
|
##
|
9
9
|
# Methods for taxonomy identification based on AAI/ANI values.
|
10
10
|
module MiGA::TaxDist
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
# Class-level
|
12
|
+
class << self
|
13
|
+
|
14
|
+
##
|
15
|
+
# Absolute path to the :intax or :novel data file (determined by +test+) for
|
16
|
+
# AAI, determined for options +opts+. Supported options:
|
17
|
+
# - +:engine+: The search engine for AAI: +:blast+ (default) or +:diamond+
|
18
|
+
def aai_path(test, opts = {})
|
19
|
+
opts[:engine] ||= :blast
|
20
|
+
engine = opts[:engine].to_s.downcase.to_sym
|
21
|
+
test = test.to_s.downcase.to_sym
|
22
|
+
return nil unless %i[intax novel].include? test
|
23
|
+
engine = :blast if %i[blast+ blat].include? engine
|
24
|
+
return nil unless %i[blast diamond].include? engine
|
25
|
+
File.expand_path("../_data/aai-#{test}-#{engine}.tsv.gz", __FILE__)
|
26
|
+
end
|
20
27
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
28
|
+
##
|
29
|
+
# Returns a Hash, where the keys correspond to the taxonomic level
|
30
|
+
# (see MiGA::Taxonomy.LONG_RANKS for the meanings), and the values
|
31
|
+
# correspond to the p-values of +test+ (one of +:intax+ or +:novel+)
|
32
|
+
# with options +opts+. See +aai_path+ for supported options.
|
33
|
+
def aai_pvalues(aai, test, opts = {})
|
34
|
+
Zlib::GzipReader.open(aai_path(test, opts)) do |fh|
|
35
|
+
keys = nil
|
36
|
+
fh.each_line do |ln|
|
37
|
+
row = ln.chomp.split(/\t/)
|
38
|
+
if fh.lineno == 1
|
39
|
+
keys = row[1, row.size - 1].map(&:to_i)
|
40
|
+
elsif row.shift.to_f >= aai
|
41
|
+
vals = {}
|
42
|
+
keys.each do |i|
|
43
|
+
v = row.shift
|
44
|
+
next if v == 'NA' # <- missing data
|
45
|
+
next if i == 1 # <- namespace, not a taxonomic rank
|
46
|
+
rank = i.zero? ? :root : MiGA::Taxonomy.KNOWN_RANKS[i]
|
47
|
+
vals[rank] = v.to_f
|
48
|
+
end
|
49
|
+
return vals
|
38
50
|
end
|
39
|
-
return vals
|
40
51
|
end
|
41
|
-
end
|
42
|
-
|
43
|
-
|
44
|
-
end
|
52
|
+
end
|
53
|
+
{}
|
54
|
+
end
|
45
55
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
56
|
+
##
|
57
|
+
# Determines the degree to which a Float +aai+ value indicates similar
|
58
|
+
# taxonomy (with +test+ :intax) or a novel taxon (with +test+ :novel) with
|
59
|
+
# options +opts+. See +aai_path+ for supported options.
|
60
|
+
# Returns a Hash with "likelihood" phrases as keys and values as an array
|
61
|
+
# with cannonical rank (as in MiGA::Taxonomy) and estimated p-value.
|
62
|
+
def aai_taxtest(aai, test, opts = {})
|
63
|
+
meaning = {
|
64
|
+
most_likely: [0.00, 0.01],
|
65
|
+
probably: [0.01, 0.10],
|
66
|
+
possibly_even: [0.10, 0.50]
|
67
|
+
}
|
68
|
+
pvalues = aai_pvalues(aai, test, opts)
|
69
|
+
out = {}
|
70
|
+
meaning.each do |phrase, thresholds|
|
71
|
+
lwr, upr = thresholds
|
72
|
+
min = pvalues.values.select { |v| v < upr }.max
|
73
|
+
return out if min.nil?
|
74
|
+
if min >= lwr
|
75
|
+
v = pvalues.select { |_, vj| vj == min }
|
76
|
+
out[phrase] = (test == :intax ? v.reverse_each : v).first
|
77
|
+
end
|
61
78
|
end
|
79
|
+
out
|
62
80
|
end
|
63
|
-
out
|
64
81
|
end
|
65
|
-
|
66
82
|
end
|
data/lib/miga/taxonomy.rb
CHANGED
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.6,
|
13
|
+
VERSION = [0.6, 3, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2020, 3,
|
21
|
+
VERSION_DATE = Date.new(2020, 3, 27)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'miga/project'
|
3
|
+
|
4
|
+
class ResultStatsTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
$tmp = Dir.mktmpdir
|
8
|
+
ENV['MIGA_HOME'] = $tmp
|
9
|
+
FileUtils.touch(File.expand_path('.miga_rc', ENV['MIGA_HOME']))
|
10
|
+
FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
|
11
|
+
$p = MiGA::Project.new(File.expand_path('project1', $tmp))
|
12
|
+
$d = $p.add_dataset('dataset1')
|
13
|
+
end
|
14
|
+
|
15
|
+
def teardown
|
16
|
+
FileUtils.rm_rf $tmp
|
17
|
+
ENV['MIGA_HOME'] = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def file_path(dir, ext)
|
21
|
+
File.join($p.path, dir, "#{$d.name}#{ext}")
|
22
|
+
end
|
23
|
+
|
24
|
+
def touch_done(dir)
|
25
|
+
FileUtils.touch(file_path(dir, '.done'))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_single_raw_reads
|
29
|
+
dir = 'data/01.raw_reads'
|
30
|
+
fq = file_path(dir, '.1.fastq')
|
31
|
+
File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
|
32
|
+
touch_done(dir)
|
33
|
+
r = $d.add_result(:raw_reads)
|
34
|
+
assert_equal({}, r[:stats])
|
35
|
+
r.compute_stats
|
36
|
+
assert(!r[:stats].empty?)
|
37
|
+
assert_equal(Hash, r[:stats].class)
|
38
|
+
assert_equal(1, r[:stats][:reads])
|
39
|
+
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_coupled_raw_reads
|
43
|
+
dir = 'data/01.raw_reads'
|
44
|
+
fq = file_path(dir, '.1.fastq')
|
45
|
+
File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
|
46
|
+
fq = file_path(dir, '.2.fastq')
|
47
|
+
File.open(fq, 'w') { |fh| fh.puts '@1','ACTAC','+','####' }
|
48
|
+
touch_done(dir)
|
49
|
+
r = $d.add_result(:raw_reads)
|
50
|
+
r.compute_stats
|
51
|
+
assert(!r[:stats].empty?)
|
52
|
+
assert_nil(r[:stats][:reads])
|
53
|
+
assert_equal(1, r[:stats][:read_pairs])
|
54
|
+
assert_equal([40.0, '%'], r[:stats][:reverse_g_c_content])
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_trimmed_reads
|
58
|
+
dir = 'data/02.trimmed_reads'
|
59
|
+
FileUtils.touch(file_path(dir, '.1.clipped.fastq'))
|
60
|
+
touch_done(dir)
|
61
|
+
r = $d.add_result(:trimmed_reads)
|
62
|
+
assert_equal({}, r[:stats])
|
63
|
+
r.compute_stats
|
64
|
+
assert_equal({}, r[:stats])
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_read_quality
|
68
|
+
dir = 'data/03.read_quality'
|
69
|
+
Dir.mkdir(file_path(dir, '.solexaqa'))
|
70
|
+
Dir.mkdir(file_path(dir, '.fastqc'))
|
71
|
+
touch_done(dir)
|
72
|
+
r = $d.add_result(:read_quality)
|
73
|
+
assert_equal({}, r[:stats])
|
74
|
+
r.compute_stats
|
75
|
+
assert_equal({}, r[:stats])
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_trimmed_fasta
|
79
|
+
dir = 'data/04.trimmed_fasta'
|
80
|
+
fa = file_path(dir, '.CoupledReads.fa')
|
81
|
+
File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
|
82
|
+
touch_done(dir)
|
83
|
+
r = $d.add_result(:trimmed_fasta)
|
84
|
+
assert_equal({}, r[:stats])
|
85
|
+
r.compute_stats
|
86
|
+
assert_equal(1, r[:stats][:reads])
|
87
|
+
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_assembly
|
91
|
+
dir = 'data/05.assembly'
|
92
|
+
fa = file_path(dir, '.LargeContigs.fna')
|
93
|
+
File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
|
94
|
+
touch_done(dir)
|
95
|
+
r = $d.add_result(:assembly)
|
96
|
+
assert_equal({}, r[:stats])
|
97
|
+
r.compute_stats
|
98
|
+
assert_equal(1, r[:stats][:contigs])
|
99
|
+
assert_equal([5, 'bp'], r[:stats][:total_length])
|
100
|
+
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_cds
|
104
|
+
dir = 'data/06.cds'
|
105
|
+
fa = file_path(dir, '.faa')
|
106
|
+
File.open(fa, 'w') { |fh| fh.puts '>1','M' }
|
107
|
+
touch_done(dir)
|
108
|
+
r = $d.add_result(:cds)
|
109
|
+
assert_equal({}, r[:stats])
|
110
|
+
r.compute_stats
|
111
|
+
assert_equal(1, r[:stats][:predicted_proteins])
|
112
|
+
assert_equal([1.0, 'aa'], r[:stats][:average_length])
|
113
|
+
assert_nil(r[:stats][:coding_density])
|
114
|
+
test_assembly
|
115
|
+
r.compute_stats
|
116
|
+
assert_equal([60.0, '%'], r[:stats][:coding_density])
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/test/result_test.rb
CHANGED
@@ -10,14 +10,14 @@ class ResultTest < Test::Unit::TestCase
|
|
10
10
|
FileUtils.touch(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
|
11
11
|
$p1 = MiGA::Project.new(File.expand_path('project1', $tmp))
|
12
12
|
$d1 = $p1.add_dataset('dataset1')
|
13
|
-
FileUtils.touch(
|
14
|
-
"data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq"
|
15
|
-
FileUtils.touch(
|
16
|
-
"data/02.trimmed_reads/#{$d1.name}.done"
|
17
|
-
FileUtils.touch(
|
18
|
-
'data/10.clades/01.find/miga-project.empty'
|
19
|
-
FileUtils.touch(
|
20
|
-
'data/10.clades/01.find/miga-project.done'
|
13
|
+
FileUtils.touch(
|
14
|
+
File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.1.clipped.fastq"))
|
15
|
+
FileUtils.touch(
|
16
|
+
File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done"))
|
17
|
+
FileUtils.touch(
|
18
|
+
File.join($p1.path, 'data/10.clades/01.find/miga-project.empty'))
|
19
|
+
FileUtils.touch(
|
20
|
+
File.join($p1.path, 'data/10.clades/01.find/miga-project.done'))
|
21
21
|
end
|
22
22
|
|
23
23
|
def teardown
|
@@ -46,4 +46,18 @@ class ResultTest < Test::Unit::TestCase
|
|
46
46
|
assert_equal($p1.path, r.source.path)
|
47
47
|
end
|
48
48
|
|
49
|
+
def test_dates
|
50
|
+
r = $d1.add_result(:trimmed_reads)
|
51
|
+
assert_nil(r.done_at)
|
52
|
+
assert_nil(r.started_at)
|
53
|
+
tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.done")
|
54
|
+
File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,5) }
|
55
|
+
assert_equal(Time, r.done_at.class)
|
56
|
+
assert_nil(r.running_time)
|
57
|
+
tf = File.join($p1.path, "data/02.trimmed_reads/#{$d1.name}.start")
|
58
|
+
File.open(tf, 'w') { |fh| fh.puts Time.new(1,2,3,4,0) }
|
59
|
+
r = $d1.add_result(:trimmed_reads)
|
60
|
+
assert_equal(5.0, r.running_time)
|
61
|
+
end
|
62
|
+
|
49
63
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'miga/tax_dist'
|
3
|
+
|
4
|
+
class TaxDistTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_aai_path
|
7
|
+
assert(File.size? MiGA::TaxDist.aai_path(:intax))
|
8
|
+
assert(File.size? MiGA::TaxDist.aai_path(:novel))
|
9
|
+
assert(File.size? MiGA::TaxDist.aai_path(:intax, engine: :diamond))
|
10
|
+
assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :blast))
|
11
|
+
assert(File.size? MiGA::TaxDist.aai_path(:novel, engine: :'blast+'))
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_aai_pvalues
|
15
|
+
distant_intax = MiGA::TaxDist.aai_pvalues(35.0, :intax)
|
16
|
+
assert_lt(distant_intax[:root], 0.05)
|
17
|
+
assert_gt(distant_intax[:g], 0.05)
|
18
|
+
assert_nil(distant_intax[:ns])
|
19
|
+
|
20
|
+
close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :blast)
|
21
|
+
assert_lt(close_intax[:root], 0.05)
|
22
|
+
assert_lt(close_intax[:s], 0.05)
|
23
|
+
|
24
|
+
close_intax = MiGA::TaxDist.aai_pvalues(99.0, :intax, engine: :diamond)
|
25
|
+
assert_lt(close_intax[:root], 0.05)
|
26
|
+
assert_lt(close_intax[:s], 0.05)
|
27
|
+
|
28
|
+
distant_novel = MiGA::TaxDist.aai_pvalues(35.0, :novel, engine: :diamond)
|
29
|
+
$stderr.puts distant_novel
|
30
|
+
assert_gt(distant_novel[:root], 0.05)
|
31
|
+
assert_lt(distant_novel[:g], 0.05)
|
32
|
+
assert_nil(distant_novel[:ns])
|
33
|
+
|
34
|
+
close_novel = MiGA::TaxDist.aai_pvalues(99.0, :novel)
|
35
|
+
assert_gt(close_novel[:root], 0.05)
|
36
|
+
assert_gt(close_novel[:f], 0.05)
|
37
|
+
|
38
|
+
assert_equal({}, MiGA::TaxDist.aai_pvalues(101.0, :intax))
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_aai_taxtest
|
42
|
+
distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :diamond)
|
43
|
+
assert_equal(:root, distant_intax[:most_likely][0])
|
44
|
+
assert_nil(distant_intax[:probably])
|
45
|
+
assert_nil(distant_intax[:possibly_even])
|
46
|
+
|
47
|
+
distant_intax = MiGA::TaxDist.aai_taxtest(35.0, :intax, engine: :blast)
|
48
|
+
assert_equal(:root, distant_intax[:most_likely][0])
|
49
|
+
assert_nil(distant_intax[:probably])
|
50
|
+
assert_nil(distant_intax[:possibly_even])
|
51
|
+
|
52
|
+
close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :diamond)
|
53
|
+
assert_equal(:s, close_intax[:probably][0])
|
54
|
+
|
55
|
+
close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :blast)
|
56
|
+
assert_equal(:s, close_intax[:probably][0])
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/test/test_helper.rb
CHANGED
data/utils/distance/commands.rb
CHANGED
@@ -63,12 +63,12 @@ module MiGA::DistanceRunner::Commands
|
|
63
63
|
|
64
64
|
##
|
65
65
|
# Execute an AAI command
|
66
|
-
def aai_cmd(f1, f2, n1, n2, db, o={})
|
66
|
+
def aai_cmd(f1, f2, n1, n2, db, o = {})
|
67
67
|
o = opts.merge(o)
|
68
68
|
v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
|
69
69
|
--name1 "#{n1}" --name2 "#{n2}" \
|
70
70
|
-t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
|
71
|
-
-p "#{o[:aai_p] ||
|
71
|
+
-p "#{o[:aai_p] || 'blast+'}"`.chomp
|
72
72
|
(v.nil? || v.empty?) ? 0 : v.to_f
|
73
73
|
end
|
74
74
|
|
@@ -91,7 +91,7 @@ module MiGA::DistanceRunner::Commands
|
|
91
91
|
v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
|
92
92
|
--name1 "#{n1}" --name2 "#{n2}" \
|
93
93
|
-t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
|
94
|
-
--lookup-first -p "#{o[:ani_p] ||
|
94
|
+
--lookup-first -p "#{o[:ani_p] || 'blast+'}"`.chomp
|
95
95
|
end
|
96
96
|
v.nil? || v.empty? ? 0 : v.to_f
|
97
97
|
end
|
data/utils/distance/pipeline.rb
CHANGED
@@ -73,14 +73,17 @@ module MiGA::DistanceRunner::Pipeline
|
|
73
73
|
cr = dataset.closest_relatives(1, from_ref_project)
|
74
74
|
return if cr.nil? or cr.empty?
|
75
75
|
tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
|
76
|
+
|
76
77
|
# Run the test for each rank
|
77
|
-
|
78
|
+
tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
|
79
|
+
r = tax_test.map do |k,v|
|
78
80
|
sig = ''
|
79
|
-
[0.5,0.1,0.05,0.01].each{ |i| sig << '*' if v<i }
|
81
|
+
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
80
82
|
[MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
81
83
|
end
|
84
|
+
|
82
85
|
# Save test
|
83
|
-
File.open(File.expand_path("#{dataset.name}.intax.txt", home),
|
86
|
+
File.open(File.expand_path("#{dataset.name}.intax.txt", home), 'w') do |fh|
|
84
87
|
fh.puts "Closest relative: #{cr[0][0]} with AAI: #{cr[0][1]}."
|
85
88
|
fh.puts ''
|
86
89
|
fh.puts MiGA::MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
data/utils/distance/runner.rb
CHANGED
@@ -22,6 +22,7 @@ class MiGA::DistanceRunner
|
|
22
22
|
raise "No project at #{project_path}"
|
23
23
|
@dataset = project.dataset(dataset_name)
|
24
24
|
@home = File.expand_path('data/09.distances', project.path)
|
25
|
+
|
25
26
|
# Default opts
|
26
27
|
if project.metadata[:aai_save_rbm] == false
|
27
28
|
@opts[:aai_save_rbm] ||= 'no-save-rbm'
|
@@ -48,6 +49,8 @@ class MiGA::DistanceRunner
|
|
48
49
|
[:haai_p, :aai_p, :ani_p, :distances_checkpoint].each do |m|
|
49
50
|
@opts[m] ||= ref_project.metadata[m]
|
50
51
|
end
|
52
|
+
@opts[:aai_p] ||= 'blast+'
|
53
|
+
@opts[:ani_p] ||= 'blast+'
|
51
54
|
@opts[:distances_checkpoint] ||= 10
|
52
55
|
@opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
|
53
56
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '3'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: assertions
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1'
|
83
97
|
description: Microbial Genomes Atlas
|
84
98
|
email: lmrodriguezr@gmail.com
|
85
99
|
executables:
|
@@ -94,8 +108,10 @@ files:
|
|
94
108
|
- Rakefile
|
95
109
|
- bin/miga
|
96
110
|
- lib/miga.rb
|
97
|
-
- lib/miga/_data/aai-intax.tsv.gz
|
98
|
-
- lib/miga/_data/aai-
|
111
|
+
- lib/miga/_data/aai-intax-blast.tsv.gz
|
112
|
+
- lib/miga/_data/aai-intax-diamond.tsv.gz
|
113
|
+
- lib/miga/_data/aai-novel-blast.tsv.gz
|
114
|
+
- lib/miga/_data/aai-novel-diamond.tsv.gz
|
99
115
|
- lib/miga/cli.rb
|
100
116
|
- lib/miga/cli/action.rb
|
101
117
|
- lib/miga/cli/action/about.rb
|
@@ -199,7 +215,9 @@ files:
|
|
199
215
|
- test/metadata_test.rb
|
200
216
|
- test/project_test.rb
|
201
217
|
- test/remote_dataset_test.rb
|
218
|
+
- test/result_stats_test.rb
|
202
219
|
- test/result_test.rb
|
220
|
+
- test/tax_dist_test.rb
|
203
221
|
- test/tax_index_test.rb
|
204
222
|
- test/taxonomy_test.rb
|
205
223
|
- test/test_helper.rb
|
Binary file
|
Binary file
|