miga-base 0.5.10.0 → 0.6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +19 -9
- data/lib/miga/cli/action/run.rb +13 -4
- data/lib/miga/cli/action/stats.rb +1 -145
- data/lib/miga/cli/base.rb +0 -1
- data/lib/miga/common/hooks.rb +49 -0
- data/lib/miga/common/path.rb +0 -9
- data/lib/miga/daemon.rb +1 -2
- data/lib/miga/dataset.rb +7 -0
- data/lib/miga/dataset/hooks.rb +72 -0
- data/lib/miga/dataset/result.rb +35 -25
- data/lib/miga/project.rb +5 -3
- data/lib/miga/project/dataset.rb +3 -1
- data/lib/miga/project/hooks.rb +60 -0
- data/lib/miga/project/result.rb +6 -3
- data/lib/miga/result.rb +17 -11
- data/lib/miga/result/base.rb +6 -0
- data/lib/miga/result/source.rb +46 -0
- data/lib/miga/result/stats.rb +157 -0
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +2 -2
- data/scripts/miga.bash +0 -5
- data/scripts/stats.bash +1 -1
- data/test/hook_test.rb +110 -0
- data/utils/subclades.R +1 -1
- metadata +8 -4
- data/lib/miga/cli/action/plugins.rb +0 -28
- data/lib/miga/project/plugins.rb +0 -41
data/lib/miga/dataset/result.rb
CHANGED
@@ -6,21 +6,22 @@ require 'miga/dataset/base'
|
|
6
6
|
##
|
7
7
|
# Helper module including specific functions to add dataset results.
|
8
8
|
module MiGA::Dataset::Result
|
9
|
-
|
9
|
+
|
10
10
|
include MiGA::Dataset::Base
|
11
|
-
|
11
|
+
|
12
12
|
##
|
13
13
|
# Get the result MiGA::Result in this dataset identified by the symbol +k+.
|
14
14
|
def result(k)
|
15
15
|
return nil if @@RESULT_DIRS[k.to_sym].nil?
|
16
16
|
MiGA::Result.load(
|
17
|
-
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json"
|
17
|
+
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json"
|
18
|
+
)
|
18
19
|
end
|
19
|
-
|
20
|
+
|
20
21
|
##
|
21
22
|
# Get all the results (Array of MiGA::Result) in this dataset.
|
22
23
|
def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
|
23
|
-
|
24
|
+
|
24
25
|
##
|
25
26
|
# For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
|
26
27
|
def each_result(&blk)
|
@@ -28,15 +29,15 @@ module MiGA::Dataset::Result
|
|
28
29
|
blk.call(k, result(k)) unless result(k).nil?
|
29
30
|
end
|
30
31
|
end
|
31
|
-
|
32
|
+
|
32
33
|
##
|
33
34
|
# Look for the result with symbol key +result_type+ and register it in the
|
34
35
|
# dataset. If +save+ is false, it doesn't register the result, but it still
|
35
36
|
# returns a result if the expected files are complete. The +opts+ hash
|
36
37
|
# controls result creation (if necessary). Supported values include:
|
37
|
-
# - +is_clean+: A Boolean indicating if the input files are clean
|
38
|
-
# - +force+: A Boolean indicating if the result must be re-indexed.
|
39
|
-
# it implies save=true
|
38
|
+
# - +is_clean+: A Boolean indicating if the input files are clean
|
39
|
+
# - +force+: A Boolean indicating if the result must be re-indexed.
|
40
|
+
# If true, it implies +save = true+
|
40
41
|
# Returns MiGA::Result or nil.
|
41
42
|
def add_result(result_type, save = true, opts = {})
|
42
43
|
dir = @@RESULT_DIRS[result_type]
|
@@ -46,11 +47,14 @@ module MiGA::Dataset::Result
|
|
46
47
|
FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
|
47
48
|
else
|
48
49
|
r_pre = MiGA::Result.load("#{base}.json")
|
49
|
-
return r_pre if (r_pre.nil?
|
50
|
+
return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
|
50
51
|
end
|
51
52
|
r = File.exist?("#{base}.done") ?
|
52
53
|
self.send("add_result_#{result_type}", base, opts) : nil
|
53
|
-
|
54
|
+
unless r.nil?
|
55
|
+
r.save
|
56
|
+
pull_hook(:on_result_ready, result_type)
|
57
|
+
end
|
54
58
|
r
|
55
59
|
end
|
56
60
|
|
@@ -68,7 +72,7 @@ module MiGA::Dataset::Result
|
|
68
72
|
not ignore_task?(t) and not add_result(t, save).nil?
|
69
73
|
end
|
70
74
|
end
|
71
|
-
|
75
|
+
|
72
76
|
##
|
73
77
|
# Returns the key symbol of the next task that needs to be executed. Passes
|
74
78
|
# +save+ to #add_result.
|
@@ -95,7 +99,7 @@ module MiGA::Dataset::Result
|
|
95
99
|
def done_preprocessing?(save = false)
|
96
100
|
!first_preprocessing(save).nil? and next_preprocessing(save).nil?
|
97
101
|
end
|
98
|
-
|
102
|
+
|
99
103
|
##
|
100
104
|
# Returns an array indicating the stage of each task (sorted by execution
|
101
105
|
# order). The values are integers:
|
@@ -300,14 +304,17 @@ module MiGA::Dataset::Result
|
|
300
304
|
# Add result type +:mytaxa+ at +base+ (no +_opts+ supported).
|
301
305
|
def add_result_mytaxa(base, _opts)
|
302
306
|
if is_multi?
|
303
|
-
return nil unless result_files_exist?(base,
|
304
|
-
result_files_exist?(base,
|
307
|
+
return nil unless result_files_exist?(base, '.mytaxa') or
|
308
|
+
result_files_exist?(base, '.nomytaxa.txt')
|
305
309
|
r = MiGA::Result.new("#{base}.json")
|
306
|
-
add_files_to_ds_result(
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
310
|
+
add_files_to_ds_result(
|
311
|
+
r, name,
|
312
|
+
mytaxa: '.mytaxa', blast: '.blast',
|
313
|
+
mytaxain: '.mytaxain', nomytaxa: '.nomytaxa.txt',
|
314
|
+
species: '.mytaxa.Species.txt', genus: '.mytaxa.Genus.txt',
|
315
|
+
phylum: '.mytaxa.Phylum.txt', innominate: '.mytaxa.innominate',
|
316
|
+
kronain: '.mytaxa.krona', krona: '.html'
|
317
|
+
)
|
311
318
|
else
|
312
319
|
MiGA::Result.new("#{base}.json")
|
313
320
|
end
|
@@ -357,7 +364,7 @@ module MiGA::Dataset::Result
|
|
357
364
|
def add_result_stats(base, _opts)
|
358
365
|
MiGA::Result.new("#{base}.json")
|
359
366
|
end
|
360
|
-
|
367
|
+
|
361
368
|
##
|
362
369
|
# Add result type +:distances+ for _multi_ datasets at +base+.
|
363
370
|
def add_result_distances_multi(base)
|
@@ -383,10 +390,13 @@ module MiGA::Dataset::Result
|
|
383
390
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
|
384
391
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
385
392
|
r = MiGA::Result.new("#{base}.json")
|
386
|
-
add_files_to_ds_result(
|
387
|
-
|
388
|
-
|
389
|
-
|
393
|
+
add_files_to_ds_result(
|
394
|
+
r, name,
|
395
|
+
aai_medoids: '.aai-medoids.tsv',
|
396
|
+
haai_db: '.haai.db', aai_db: '.aai.db', ani_medoids: '.ani-medoids.tsv',
|
397
|
+
ani_db: '.ani.db', ref_tree: '.nwk', ref_tree_pdf: '.nwk.pdf',
|
398
|
+
intax_test: '.intax.txt'
|
399
|
+
)
|
390
400
|
end
|
391
401
|
|
392
402
|
##
|
data/lib/miga/project.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'miga/dataset'
|
5
5
|
require 'miga/project/result'
|
6
6
|
require 'miga/project/dataset'
|
7
|
-
require 'miga/project/
|
7
|
+
require 'miga/project/hooks'
|
8
8
|
|
9
9
|
##
|
10
10
|
# MiGA representation of a project.
|
@@ -12,7 +12,7 @@ class MiGA::Project < MiGA::MiGA
|
|
12
12
|
|
13
13
|
include MiGA::Project::Result
|
14
14
|
include MiGA::Project::Dataset
|
15
|
-
include MiGA::Project::
|
15
|
+
include MiGA::Project::Hooks
|
16
16
|
|
17
17
|
##
|
18
18
|
# Absolute path to the project folder.
|
@@ -35,7 +35,6 @@ class MiGA::Project < MiGA::MiGA
|
|
35
35
|
@path = File.absolute_path(path)
|
36
36
|
self.create if not update and not Project.exist? self.path
|
37
37
|
self.load if self.metadata.nil?
|
38
|
-
self.load_plugins
|
39
38
|
self.metadata[:type] = :mixed if type.nil?
|
40
39
|
raise "Unrecognized project type: #{type}." if @@KNOWN_TYPES[type].nil?
|
41
40
|
end
|
@@ -54,6 +53,7 @@ class MiGA::Project < MiGA::MiGA
|
|
54
53
|
{datasets: [], name: File.basename(path)})
|
55
54
|
d_path = File.expand_path('daemon/daemon.json', path)
|
56
55
|
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
|
56
|
+
pull_hook :on_create
|
57
57
|
self.load
|
58
58
|
end
|
59
59
|
|
@@ -67,6 +67,7 @@ class MiGA::Project < MiGA::MiGA
|
|
67
67
|
# Save any changes persistently, regardless of +do_not_save+.
|
68
68
|
def save!
|
69
69
|
metadata.save
|
70
|
+
pull_hook :on_save
|
70
71
|
self.load
|
71
72
|
end
|
72
73
|
|
@@ -77,6 +78,7 @@ class MiGA::Project < MiGA::MiGA
|
|
77
78
|
@dataset_names_hash = nil
|
78
79
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
79
80
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
81
|
+
pull_hook :on_load
|
80
82
|
end
|
81
83
|
|
82
84
|
##
|
data/lib/miga/project/dataset.rb
CHANGED
@@ -56,6 +56,7 @@ module MiGA::Project::Dataset
|
|
56
56
|
@metadata[:datasets] << name
|
57
57
|
@dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
|
58
58
|
save
|
59
|
+
pull_hook(:on_add_dataset, name)
|
59
60
|
end
|
60
61
|
dataset(name)
|
61
62
|
end
|
@@ -67,6 +68,7 @@ module MiGA::Project::Dataset
|
|
67
68
|
return nil if d.nil?
|
68
69
|
self.metadata[:datasets].delete(name)
|
69
70
|
save
|
71
|
+
pull_hook(:on_unlink_dataset, name)
|
70
72
|
d
|
71
73
|
end
|
72
74
|
|
@@ -121,7 +123,7 @@ module MiGA::Project::Dataset
|
|
121
123
|
##
|
122
124
|
# Are all the datasets in the project preprocessed? Save intermediate results
|
123
125
|
# if +save+ (until the first incomplete dataset is reached).
|
124
|
-
def done_preprocessing?(save=true)
|
126
|
+
def done_preprocessing?(save = true)
|
125
127
|
dataset_names.each do |dn|
|
126
128
|
ds = dataset(dn)
|
127
129
|
return false if ds.is_ref? and not ds.done_preprocessing?(save)
|
@@ -0,0 +1,60 @@
|
|
1
|
+
|
2
|
+
require 'miga/common/hooks'
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module including specific functions to handle project hooks.
|
6
|
+
# Supported events:
|
7
|
+
# - on_create(): When created
|
8
|
+
# - on_load(): When loaded
|
9
|
+
# - on_save(): When saved
|
10
|
+
# - on_add_dataset(dataset): When a dataset is added, with name +dataset+
|
11
|
+
# - on_unlink_dataset(dataset): When dataset with name +dataset+ is unlinked
|
12
|
+
# - on_result_ready(result): When any result is ready, with key +result+
|
13
|
+
# - on_result_ready_{result}(): When +result+ is ready
|
14
|
+
# - on_processing_ready(): When preprocessing is complete
|
15
|
+
# Supported hooks:
|
16
|
+
# - run_lambda(lambda, args...)
|
17
|
+
# - run_cmd(cmd)
|
18
|
+
# Internal hooks:
|
19
|
+
# - _pull_processing_ready_hooks()
|
20
|
+
# - _pull_result_hooks()
|
21
|
+
module MiGA::Project::Hooks
|
22
|
+
|
23
|
+
include MiGA::Common::Hooks
|
24
|
+
|
25
|
+
def default_hooks
|
26
|
+
{
|
27
|
+
on_result_ready: [
|
28
|
+
[:_pull_result_hooks],
|
29
|
+
[:_pull_processing_ready_hooks]
|
30
|
+
]
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
# Run +cmd+ in the command-line with {{variables}}: project, miga,
|
36
|
+
# object (as defined by the event, if any)
|
37
|
+
# - +hook_args+: +[cmd]+
|
38
|
+
# - +event_args+: +[object (optional)]+
|
39
|
+
def hook_run_cmd(hook_args, event_args)
|
40
|
+
Process.wait(
|
41
|
+
spawn hook_args.first.miga_variables(
|
42
|
+
project: path, miga: MiGA::MiGA.root_path, object: event_args.first
|
43
|
+
)
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Pull :dataset_ready hook if preprocessing is complete
|
49
|
+
def hook__pull_processing_ready_hooks(_hook_args, _event_args)
|
50
|
+
pull_hook(:on_processing_ready) if next_task(nil, false).nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Dataset Action :pull_result_hooks([], [res])
|
55
|
+
# Pull the hook specific to the type of result
|
56
|
+
def hook__pull_result_hooks(_hook_args, event_args)
|
57
|
+
pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/lib/miga/project/result.rb
CHANGED
@@ -46,11 +46,14 @@ module MiGA::Project::Result
|
|
46
46
|
FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
|
47
47
|
else
|
48
48
|
r_pre = MiGA::Result.load("#{base}.json")
|
49
|
-
return r_pre if (r_pre.nil?
|
49
|
+
return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
|
50
50
|
end
|
51
51
|
r = result_files_exist?(base, ".done") ?
|
52
52
|
send("add_result_#{name}", base) : nil
|
53
|
-
|
53
|
+
unless r.nil?
|
54
|
+
r.save
|
55
|
+
pull_hook(:on_result_ready, name)
|
56
|
+
end
|
54
57
|
r
|
55
58
|
end
|
56
59
|
|
@@ -69,7 +72,7 @@ module MiGA::Project::Result
|
|
69
72
|
# If +tasks+ is +nil+ (default), it uses the entire list of tasks.
|
70
73
|
# Returns a Symbol.
|
71
74
|
def next_task(tasks = nil, save = true)
|
72
|
-
tasks ||= @@DISTANCE_TASKS
|
75
|
+
tasks ||= @@DISTANCE_TASKS + @@INCLADE_TASKS
|
73
76
|
tasks.find do |t|
|
74
77
|
if metadata["run_#{t}"] == false or
|
75
78
|
(!is_clade? and @@INCLADE_TASKS.include?(t) and
|
data/lib/miga/result.rb
CHANGED
@@ -1,19 +1,25 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require
|
4
|
+
require 'miga/result/dates'
|
5
|
+
require 'miga/result/source'
|
6
|
+
require 'miga/result/stats'
|
5
7
|
|
6
8
|
##
|
7
9
|
# The result from a task run. It can be project-wide or dataset-specific.
|
8
10
|
class MiGA::Result < MiGA::MiGA
|
9
|
-
|
11
|
+
|
10
12
|
include MiGA::Result::Dates
|
11
|
-
|
13
|
+
include MiGA::Result::Source
|
14
|
+
include MiGA::Result::Stats
|
15
|
+
|
12
16
|
# Class-level
|
13
|
-
|
17
|
+
|
14
18
|
##
|
15
19
|
# Check if the result described by the JSON in +path+ already exists.
|
16
|
-
def self.exist?(path)
|
20
|
+
def self.exist?(path)
|
21
|
+
File.exist? path
|
22
|
+
end
|
17
23
|
|
18
24
|
##
|
19
25
|
# Load the result described by the JSON in +path+. Returns MiGA::Result if it
|
@@ -32,14 +38,14 @@ class MiGA::Result < MiGA::MiGA
|
|
32
38
|
##
|
33
39
|
# Array of MiGA::Result objects nested within the result (if any).
|
34
40
|
attr_reader :results
|
35
|
-
|
41
|
+
|
36
42
|
##
|
37
43
|
# Load or create the MiGA::Result described by the JSON file +path+.
|
38
44
|
def initialize(path)
|
39
|
-
@path = path
|
40
|
-
MiGA::Result.exist?(path) ? self.load : create
|
45
|
+
@path = File.absolute_path(path)
|
46
|
+
MiGA::Result.exist?(@path) ? self.load : create
|
41
47
|
end
|
42
|
-
|
48
|
+
|
43
49
|
##
|
44
50
|
# Is the result clean? Returns Boolean.
|
45
51
|
def clean? ; !! self[:clean] ; end
|
@@ -109,7 +115,7 @@ class MiGA::Result < MiGA::MiGA
|
|
109
115
|
##
|
110
116
|
# Initialize and #save empty result.
|
111
117
|
def create
|
112
|
-
@data = {:
|
118
|
+
@data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
|
113
119
|
save
|
114
120
|
end
|
115
121
|
|
@@ -181,5 +187,5 @@ class MiGA::Result < MiGA::MiGA
|
|
181
187
|
@data[:results] << result.path
|
182
188
|
save
|
183
189
|
end
|
184
|
-
|
190
|
+
|
185
191
|
end
|
data/lib/miga/result/base.rb
CHANGED
@@ -0,0 +1,46 @@
|
|
1
|
+
|
2
|
+
require 'miga/result/base'
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module including functions to access the source of results
|
6
|
+
module MiGA::Result::Source
|
7
|
+
|
8
|
+
##
|
9
|
+
# Load and return the source (parent object) of a result
|
10
|
+
def source
|
11
|
+
@source ||= if MiGA::Project.RESULT_DIRS[key]
|
12
|
+
project
|
13
|
+
else
|
14
|
+
project.dataset(File.basename(path, '.json'))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Detect the result key assigned to this result
|
20
|
+
def key
|
21
|
+
@key ||= MiGA::Result.RESULT_DIRS.find { |k, v| v == relative_dir }.first
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Path of the result containing the directory relative to the +data+ folder in
|
26
|
+
# the parent project
|
27
|
+
def relative_dir
|
28
|
+
@relative_dir ||= dir.sub("#{project_path}data/", '')
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Project containing the result
|
33
|
+
def project
|
34
|
+
@project ||= MiGA::Project.load(project_path)
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Path to the project containing the result. In most cases this should be
|
39
|
+
# identical to +project.path+, but this function is provided for safety,
|
40
|
+
# so the path referencing is identical to that of +self.path+ whenever they
|
41
|
+
# need to be compared.
|
42
|
+
def project_path
|
43
|
+
path[ 0 .. path.rindex('/data/') ]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,157 @@
|
|
1
|
+
|
2
|
+
require 'miga/result/base'
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module including stats-specific functions for results
|
6
|
+
module MiGA::Result::Stats
|
7
|
+
|
8
|
+
##
|
9
|
+
# (Re-)calculate and save the statistics for the result
|
10
|
+
def compute_stats
|
11
|
+
method = :"compute_stats_#{key}"
|
12
|
+
stats = self.respond_to?(method, true) ? send(method) : nil
|
13
|
+
unless stats.nil?
|
14
|
+
self[:stats] = stats
|
15
|
+
save
|
16
|
+
end
|
17
|
+
self[:stats]
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def compute_stats_raw_reads
|
23
|
+
stats = {}
|
24
|
+
if self[:files][:pair1].nil?
|
25
|
+
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true)
|
26
|
+
stats = {
|
27
|
+
reads: s[:n],
|
28
|
+
length_average: [s[:avg], 'bp'],
|
29
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
30
|
+
g_c_content: [s[:gc], '%']}
|
31
|
+
else
|
32
|
+
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true)
|
33
|
+
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true)
|
34
|
+
stats = {
|
35
|
+
read_pairs: s1[:n],
|
36
|
+
forward_length_average: [s1[:avg], 'bp'],
|
37
|
+
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
38
|
+
forward_g_c_content: [s1[:gc], '%'],
|
39
|
+
reverse_length_average: [s2[:avg], 'bp'],
|
40
|
+
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
41
|
+
reverse_g_c_content: [s2[:gc], '%']}
|
42
|
+
end
|
43
|
+
stats
|
44
|
+
end
|
45
|
+
|
46
|
+
def compute_stats_trimmed_fasta
|
47
|
+
f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
|
48
|
+
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
|
49
|
+
{
|
50
|
+
reads: s[:n],
|
51
|
+
length_average: [s[:avg], 'bp'],
|
52
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
53
|
+
g_c_content: [s[:gc], '%']
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def compute_stats_assembly
|
58
|
+
s = MiGA::MiGA.seqs_length(file_path(:largecontigs), :fasta,
|
59
|
+
n50: true, gc: true)
|
60
|
+
{
|
61
|
+
contigs: s[:n],
|
62
|
+
n50: [s[:n50], 'bp'],
|
63
|
+
total_length: [s[:tot], 'bp'],
|
64
|
+
g_c_content: [s[:gc], '%']
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def compute_stats_cds
|
69
|
+
s = MiGA::MiGA.seqs_length(file_path(:proteins), :fasta)
|
70
|
+
stats = {
|
71
|
+
predicted_proteins: s[:n],
|
72
|
+
average_length: [s[:avg], 'aa']}
|
73
|
+
asm = source.result(:assembly)
|
74
|
+
unless asm.nil? or asm[:stats][:total_length].nil?
|
75
|
+
stats[:coding_density] =
|
76
|
+
[300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
|
77
|
+
end
|
78
|
+
stats
|
79
|
+
end
|
80
|
+
|
81
|
+
def compute_stats_essential_genes
|
82
|
+
stats = {}
|
83
|
+
if source.is_multi?
|
84
|
+
stats = {median_copies: 0, mean_copies: 0}
|
85
|
+
File.open(file_path(:report), 'r') do |fh|
|
86
|
+
fh.each_line do |ln|
|
87
|
+
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
88
|
+
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
else
|
93
|
+
# Fix estimate by domain
|
94
|
+
if !(tax = source.metadata[:tax]).nil? &&
|
95
|
+
%w[Archaea Bacteria].include?(tax[:d]) &&
|
96
|
+
file_path(:raw_report).nil?
|
97
|
+
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
98
|
+
rep = file_path(:report)
|
99
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
100
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
101
|
+
$stderr.print `#{rc} ruby '#{scr}' \
|
102
|
+
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
103
|
+
add_file(:raw_report, "#{source.name}.ess/log")
|
104
|
+
add_file(:report, "#{source.name}.ess/log.domain")
|
105
|
+
end
|
106
|
+
# Extract/compute quality values
|
107
|
+
stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
|
108
|
+
File.open(file_path(:report), 'r') do |fh|
|
109
|
+
fh.each_line do |ln|
|
110
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
111
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
116
|
+
source.metadata[:quality] = case stats[:quality]
|
117
|
+
when 80..100 ; :excellent
|
118
|
+
when 50..80 ; :high
|
119
|
+
when 20..50 ; :intermediate
|
120
|
+
else ; :low
|
121
|
+
end
|
122
|
+
source.save
|
123
|
+
end
|
124
|
+
stats
|
125
|
+
end
|
126
|
+
|
127
|
+
def compute_stats_ssu
|
128
|
+
stats = {ssu: 0, complete_ssu: 0}
|
129
|
+
Zlib::GzipReader.open(file_path(:gff)) do |fh|
|
130
|
+
fh.each_line do |ln|
|
131
|
+
next if ln =~ /^#/
|
132
|
+
rl = ln.chomp.split("\t")
|
133
|
+
len = (rl[4].to_i - rl[3].to_i).abs + 1
|
134
|
+
stats[:max_length] = [stats[:max_length] || 0, len].max
|
135
|
+
stats[:ssu] += 1
|
136
|
+
stats[:complete_ssu] += 1 unless rl[8] =~ /\(partial\)/
|
137
|
+
end
|
138
|
+
end
|
139
|
+
stats
|
140
|
+
end
|
141
|
+
|
142
|
+
def compute_stats_taxonomy
|
143
|
+
stats = {}
|
144
|
+
File.open(file_path(:intax_test), 'r') do |fh|
|
145
|
+
fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
|
146
|
+
stats[:closest_relative] = $1
|
147
|
+
stats[:aai] = [$2.to_f, '%']
|
148
|
+
3.times { fh.gets }
|
149
|
+
fh.each_line do |ln|
|
150
|
+
row = ln.chomp.gsub(/^\s*/,'').split(/\s+/)
|
151
|
+
break if row.empty?
|
152
|
+
stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
|
153
|
+
end
|
154
|
+
end
|
155
|
+
stats
|
156
|
+
end
|
157
|
+
end
|