miga-base 0.5.10.0 → 0.6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +19 -9
- data/lib/miga/cli/action/run.rb +13 -4
- data/lib/miga/cli/action/stats.rb +1 -145
- data/lib/miga/cli/base.rb +0 -1
- data/lib/miga/common/hooks.rb +49 -0
- data/lib/miga/common/path.rb +0 -9
- data/lib/miga/daemon.rb +1 -2
- data/lib/miga/dataset.rb +7 -0
- data/lib/miga/dataset/hooks.rb +72 -0
- data/lib/miga/dataset/result.rb +35 -25
- data/lib/miga/project.rb +5 -3
- data/lib/miga/project/dataset.rb +3 -1
- data/lib/miga/project/hooks.rb +60 -0
- data/lib/miga/project/result.rb +6 -3
- data/lib/miga/result.rb +17 -11
- data/lib/miga/result/base.rb +6 -0
- data/lib/miga/result/source.rb +46 -0
- data/lib/miga/result/stats.rb +157 -0
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +2 -2
- data/scripts/miga.bash +0 -5
- data/scripts/stats.bash +1 -1
- data/test/hook_test.rb +110 -0
- data/utils/subclades.R +1 -1
- metadata +8 -4
- data/lib/miga/cli/action/plugins.rb +0 -28
- data/lib/miga/project/plugins.rb +0 -41
data/lib/miga/dataset/result.rb
CHANGED
@@ -6,21 +6,22 @@ require 'miga/dataset/base'
|
|
6
6
|
##
|
7
7
|
# Helper module including specific functions to add dataset results.
|
8
8
|
module MiGA::Dataset::Result
|
9
|
-
|
9
|
+
|
10
10
|
include MiGA::Dataset::Base
|
11
|
-
|
11
|
+
|
12
12
|
##
|
13
13
|
# Get the result MiGA::Result in this dataset identified by the symbol +k+.
|
14
14
|
def result(k)
|
15
15
|
return nil if @@RESULT_DIRS[k.to_sym].nil?
|
16
16
|
MiGA::Result.load(
|
17
|
-
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json"
|
17
|
+
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json"
|
18
|
+
)
|
18
19
|
end
|
19
|
-
|
20
|
+
|
20
21
|
##
|
21
22
|
# Get all the results (Array of MiGA::Result) in this dataset.
|
22
23
|
def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
|
23
|
-
|
24
|
+
|
24
25
|
##
|
25
26
|
# For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
|
26
27
|
def each_result(&blk)
|
@@ -28,15 +29,15 @@ module MiGA::Dataset::Result
|
|
28
29
|
blk.call(k, result(k)) unless result(k).nil?
|
29
30
|
end
|
30
31
|
end
|
31
|
-
|
32
|
+
|
32
33
|
##
|
33
34
|
# Look for the result with symbol key +result_type+ and register it in the
|
34
35
|
# dataset. If +save+ is false, it doesn't register the result, but it still
|
35
36
|
# returns a result if the expected files are complete. The +opts+ hash
|
36
37
|
# controls result creation (if necessary). Supported values include:
|
37
|
-
# - +is_clean+: A Boolean indicating if the input files are clean
|
38
|
-
# - +force+: A Boolean indicating if the result must be re-indexed.
|
39
|
-
# it implies save=true
|
38
|
+
# - +is_clean+: A Boolean indicating if the input files are clean
|
39
|
+
# - +force+: A Boolean indicating if the result must be re-indexed.
|
40
|
+
# If true, it implies +save = true+
|
40
41
|
# Returns MiGA::Result or nil.
|
41
42
|
def add_result(result_type, save = true, opts = {})
|
42
43
|
dir = @@RESULT_DIRS[result_type]
|
@@ -46,11 +47,14 @@ module MiGA::Dataset::Result
|
|
46
47
|
FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
|
47
48
|
else
|
48
49
|
r_pre = MiGA::Result.load("#{base}.json")
|
49
|
-
return r_pre if (r_pre.nil?
|
50
|
+
return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
|
50
51
|
end
|
51
52
|
r = File.exist?("#{base}.done") ?
|
52
53
|
self.send("add_result_#{result_type}", base, opts) : nil
|
53
|
-
|
54
|
+
unless r.nil?
|
55
|
+
r.save
|
56
|
+
pull_hook(:on_result_ready, result_type)
|
57
|
+
end
|
54
58
|
r
|
55
59
|
end
|
56
60
|
|
@@ -68,7 +72,7 @@ module MiGA::Dataset::Result
|
|
68
72
|
not ignore_task?(t) and not add_result(t, save).nil?
|
69
73
|
end
|
70
74
|
end
|
71
|
-
|
75
|
+
|
72
76
|
##
|
73
77
|
# Returns the key symbol of the next task that needs to be executed. Passes
|
74
78
|
# +save+ to #add_result.
|
@@ -95,7 +99,7 @@ module MiGA::Dataset::Result
|
|
95
99
|
def done_preprocessing?(save = false)
|
96
100
|
!first_preprocessing(save).nil? and next_preprocessing(save).nil?
|
97
101
|
end
|
98
|
-
|
102
|
+
|
99
103
|
##
|
100
104
|
# Returns an array indicating the stage of each task (sorted by execution
|
101
105
|
# order). The values are integers:
|
@@ -300,14 +304,17 @@ module MiGA::Dataset::Result
|
|
300
304
|
# Add result type +:mytaxa+ at +base+ (no +_opts+ supported).
|
301
305
|
def add_result_mytaxa(base, _opts)
|
302
306
|
if is_multi?
|
303
|
-
return nil unless result_files_exist?(base,
|
304
|
-
result_files_exist?(base,
|
307
|
+
return nil unless result_files_exist?(base, '.mytaxa') or
|
308
|
+
result_files_exist?(base, '.nomytaxa.txt')
|
305
309
|
r = MiGA::Result.new("#{base}.json")
|
306
|
-
add_files_to_ds_result(
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
310
|
+
add_files_to_ds_result(
|
311
|
+
r, name,
|
312
|
+
mytaxa: '.mytaxa', blast: '.blast',
|
313
|
+
mytaxain: '.mytaxain', nomytaxa: '.nomytaxa.txt',
|
314
|
+
species: '.mytaxa.Species.txt', genus: '.mytaxa.Genus.txt',
|
315
|
+
phylum: '.mytaxa.Phylum.txt', innominate: '.mytaxa.innominate',
|
316
|
+
kronain: '.mytaxa.krona', krona: '.html'
|
317
|
+
)
|
311
318
|
else
|
312
319
|
MiGA::Result.new("#{base}.json")
|
313
320
|
end
|
@@ -357,7 +364,7 @@ module MiGA::Dataset::Result
|
|
357
364
|
def add_result_stats(base, _opts)
|
358
365
|
MiGA::Result.new("#{base}.json")
|
359
366
|
end
|
360
|
-
|
367
|
+
|
361
368
|
##
|
362
369
|
# Add result type +:distances+ for _multi_ datasets at +base+.
|
363
370
|
def add_result_distances_multi(base)
|
@@ -383,10 +390,13 @@ module MiGA::Dataset::Result
|
|
383
390
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
|
384
391
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
385
392
|
r = MiGA::Result.new("#{base}.json")
|
386
|
-
add_files_to_ds_result(
|
387
|
-
|
388
|
-
|
389
|
-
|
393
|
+
add_files_to_ds_result(
|
394
|
+
r, name,
|
395
|
+
aai_medoids: '.aai-medoids.tsv',
|
396
|
+
haai_db: '.haai.db', aai_db: '.aai.db', ani_medoids: '.ani-medoids.tsv',
|
397
|
+
ani_db: '.ani.db', ref_tree: '.nwk', ref_tree_pdf: '.nwk.pdf',
|
398
|
+
intax_test: '.intax.txt'
|
399
|
+
)
|
390
400
|
end
|
391
401
|
|
392
402
|
##
|
data/lib/miga/project.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'miga/dataset'
|
5
5
|
require 'miga/project/result'
|
6
6
|
require 'miga/project/dataset'
|
7
|
-
require 'miga/project/
|
7
|
+
require 'miga/project/hooks'
|
8
8
|
|
9
9
|
##
|
10
10
|
# MiGA representation of a project.
|
@@ -12,7 +12,7 @@ class MiGA::Project < MiGA::MiGA
|
|
12
12
|
|
13
13
|
include MiGA::Project::Result
|
14
14
|
include MiGA::Project::Dataset
|
15
|
-
include MiGA::Project::
|
15
|
+
include MiGA::Project::Hooks
|
16
16
|
|
17
17
|
##
|
18
18
|
# Absolute path to the project folder.
|
@@ -35,7 +35,6 @@ class MiGA::Project < MiGA::MiGA
|
|
35
35
|
@path = File.absolute_path(path)
|
36
36
|
self.create if not update and not Project.exist? self.path
|
37
37
|
self.load if self.metadata.nil?
|
38
|
-
self.load_plugins
|
39
38
|
self.metadata[:type] = :mixed if type.nil?
|
40
39
|
raise "Unrecognized project type: #{type}." if @@KNOWN_TYPES[type].nil?
|
41
40
|
end
|
@@ -54,6 +53,7 @@ class MiGA::Project < MiGA::MiGA
|
|
54
53
|
{datasets: [], name: File.basename(path)})
|
55
54
|
d_path = File.expand_path('daemon/daemon.json', path)
|
56
55
|
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
|
56
|
+
pull_hook :on_create
|
57
57
|
self.load
|
58
58
|
end
|
59
59
|
|
@@ -67,6 +67,7 @@ class MiGA::Project < MiGA::MiGA
|
|
67
67
|
# Save any changes persistently, regardless of +do_not_save+.
|
68
68
|
def save!
|
69
69
|
metadata.save
|
70
|
+
pull_hook :on_save
|
70
71
|
self.load
|
71
72
|
end
|
72
73
|
|
@@ -77,6 +78,7 @@ class MiGA::Project < MiGA::MiGA
|
|
77
78
|
@dataset_names_hash = nil
|
78
79
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
79
80
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
81
|
+
pull_hook :on_load
|
80
82
|
end
|
81
83
|
|
82
84
|
##
|
data/lib/miga/project/dataset.rb
CHANGED
@@ -56,6 +56,7 @@ module MiGA::Project::Dataset
|
|
56
56
|
@metadata[:datasets] << name
|
57
57
|
@dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
|
58
58
|
save
|
59
|
+
pull_hook(:on_add_dataset, name)
|
59
60
|
end
|
60
61
|
dataset(name)
|
61
62
|
end
|
@@ -67,6 +68,7 @@ module MiGA::Project::Dataset
|
|
67
68
|
return nil if d.nil?
|
68
69
|
self.metadata[:datasets].delete(name)
|
69
70
|
save
|
71
|
+
pull_hook(:on_unlink_dataset, name)
|
70
72
|
d
|
71
73
|
end
|
72
74
|
|
@@ -121,7 +123,7 @@ module MiGA::Project::Dataset
|
|
121
123
|
##
|
122
124
|
# Are all the datasets in the project preprocessed? Save intermediate results
|
123
125
|
# if +save+ (until the first incomplete dataset is reached).
|
124
|
-
def done_preprocessing?(save=true)
|
126
|
+
def done_preprocessing?(save = true)
|
125
127
|
dataset_names.each do |dn|
|
126
128
|
ds = dataset(dn)
|
127
129
|
return false if ds.is_ref? and not ds.done_preprocessing?(save)
|
@@ -0,0 +1,60 @@
|
|
1
|
+
|
2
|
+
require 'miga/common/hooks'
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module including specific functions to handle project hooks.
|
6
|
+
# Supported events:
|
7
|
+
# - on_create(): When created
|
8
|
+
# - on_load(): When loaded
|
9
|
+
# - on_save(): When saved
|
10
|
+
# - on_add_dataset(dataset): When a dataset is added, with name +dataset+
|
11
|
+
# - on_unlink_dataset(dataset): When dataset with name +dataset+ is unlinked
|
12
|
+
# - on_result_ready(result): When any result is ready, with key +result+
|
13
|
+
# - on_result_ready_{result}(): When +result+ is ready
|
14
|
+
# - on_processing_ready(): When preprocessing is complete
|
15
|
+
# Supported hooks:
|
16
|
+
# - run_lambda(lambda, args...)
|
17
|
+
# - run_cmd(cmd)
|
18
|
+
# Internal hooks:
|
19
|
+
# - _pull_processing_ready_hooks()
|
20
|
+
# - _pull_result_hooks()
|
21
|
+
module MiGA::Project::Hooks
|
22
|
+
|
23
|
+
include MiGA::Common::Hooks
|
24
|
+
|
25
|
+
def default_hooks
|
26
|
+
{
|
27
|
+
on_result_ready: [
|
28
|
+
[:_pull_result_hooks],
|
29
|
+
[:_pull_processing_ready_hooks]
|
30
|
+
]
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
# Run +cmd+ in the command-line with {{variables}}: project, miga,
|
36
|
+
# object (as defined by the event, if any)
|
37
|
+
# - +hook_args+: +[cmd]+
|
38
|
+
# - +event_args+: +[object (optional)]+
|
39
|
+
def hook_run_cmd(hook_args, event_args)
|
40
|
+
Process.wait(
|
41
|
+
spawn hook_args.first.miga_variables(
|
42
|
+
project: path, miga: MiGA::MiGA.root_path, object: event_args.first
|
43
|
+
)
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Pull :dataset_ready hook if preprocessing is complete
|
49
|
+
def hook__pull_processing_ready_hooks(_hook_args, _event_args)
|
50
|
+
pull_hook(:on_processing_ready) if next_task(nil, false).nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Dataset Action :pull_result_hooks([], [res])
|
55
|
+
# Pull the hook specific to the type of result
|
56
|
+
def hook__pull_result_hooks(_hook_args, event_args)
|
57
|
+
pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/lib/miga/project/result.rb
CHANGED
@@ -46,11 +46,14 @@ module MiGA::Project::Result
|
|
46
46
|
FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
|
47
47
|
else
|
48
48
|
r_pre = MiGA::Result.load("#{base}.json")
|
49
|
-
return r_pre if (r_pre.nil?
|
49
|
+
return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
|
50
50
|
end
|
51
51
|
r = result_files_exist?(base, ".done") ?
|
52
52
|
send("add_result_#{name}", base) : nil
|
53
|
-
|
53
|
+
unless r.nil?
|
54
|
+
r.save
|
55
|
+
pull_hook(:on_result_ready, name)
|
56
|
+
end
|
54
57
|
r
|
55
58
|
end
|
56
59
|
|
@@ -69,7 +72,7 @@ module MiGA::Project::Result
|
|
69
72
|
# If +tasks+ is +nil+ (default), it uses the entire list of tasks.
|
70
73
|
# Returns a Symbol.
|
71
74
|
def next_task(tasks = nil, save = true)
|
72
|
-
tasks ||= @@DISTANCE_TASKS
|
75
|
+
tasks ||= @@DISTANCE_TASKS + @@INCLADE_TASKS
|
73
76
|
tasks.find do |t|
|
74
77
|
if metadata["run_#{t}"] == false or
|
75
78
|
(!is_clade? and @@INCLADE_TASKS.include?(t) and
|
data/lib/miga/result.rb
CHANGED
@@ -1,19 +1,25 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require
|
4
|
+
require 'miga/result/dates'
|
5
|
+
require 'miga/result/source'
|
6
|
+
require 'miga/result/stats'
|
5
7
|
|
6
8
|
##
|
7
9
|
# The result from a task run. It can be project-wide or dataset-specific.
|
8
10
|
class MiGA::Result < MiGA::MiGA
|
9
|
-
|
11
|
+
|
10
12
|
include MiGA::Result::Dates
|
11
|
-
|
13
|
+
include MiGA::Result::Source
|
14
|
+
include MiGA::Result::Stats
|
15
|
+
|
12
16
|
# Class-level
|
13
|
-
|
17
|
+
|
14
18
|
##
|
15
19
|
# Check if the result described by the JSON in +path+ already exists.
|
16
|
-
def self.exist?(path)
|
20
|
+
def self.exist?(path)
|
21
|
+
File.exist? path
|
22
|
+
end
|
17
23
|
|
18
24
|
##
|
19
25
|
# Load the result described by the JSON in +path+. Returns MiGA::Result if it
|
@@ -32,14 +38,14 @@ class MiGA::Result < MiGA::MiGA
|
|
32
38
|
##
|
33
39
|
# Array of MiGA::Result objects nested within the result (if any).
|
34
40
|
attr_reader :results
|
35
|
-
|
41
|
+
|
36
42
|
##
|
37
43
|
# Load or create the MiGA::Result described by the JSON file +path+.
|
38
44
|
def initialize(path)
|
39
|
-
@path = path
|
40
|
-
MiGA::Result.exist?(path) ? self.load : create
|
45
|
+
@path = File.absolute_path(path)
|
46
|
+
MiGA::Result.exist?(@path) ? self.load : create
|
41
47
|
end
|
42
|
-
|
48
|
+
|
43
49
|
##
|
44
50
|
# Is the result clean? Returns Boolean.
|
45
51
|
def clean? ; !! self[:clean] ; end
|
@@ -109,7 +115,7 @@ class MiGA::Result < MiGA::MiGA
|
|
109
115
|
##
|
110
116
|
# Initialize and #save empty result.
|
111
117
|
def create
|
112
|
-
@data = {:
|
118
|
+
@data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
|
113
119
|
save
|
114
120
|
end
|
115
121
|
|
@@ -181,5 +187,5 @@ class MiGA::Result < MiGA::MiGA
|
|
181
187
|
@data[:results] << result.path
|
182
188
|
save
|
183
189
|
end
|
184
|
-
|
190
|
+
|
185
191
|
end
|
data/lib/miga/result/base.rb
CHANGED
@@ -0,0 +1,46 @@
|
|
1
|
+
|
2
|
+
require 'miga/result/base'
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module including functions to access the source of results
|
6
|
+
module MiGA::Result::Source
|
7
|
+
|
8
|
+
##
|
9
|
+
# Load and return the source (parent object) of a result
|
10
|
+
def source
|
11
|
+
@source ||= if MiGA::Project.RESULT_DIRS[key]
|
12
|
+
project
|
13
|
+
else
|
14
|
+
project.dataset(File.basename(path, '.json'))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Detect the result key assigned to this result
|
20
|
+
def key
|
21
|
+
@key ||= MiGA::Result.RESULT_DIRS.find { |k, v| v == relative_dir }.first
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Path of the result containing the directory relative to the +data+ folder in
|
26
|
+
# the parent project
|
27
|
+
def relative_dir
|
28
|
+
@relative_dir ||= dir.sub("#{project_path}data/", '')
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Project containing the result
|
33
|
+
def project
|
34
|
+
@project ||= MiGA::Project.load(project_path)
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Path to the project containing the result. In most cases this should be
|
39
|
+
# identical to +project.path+, but this function is provided for safety,
|
40
|
+
# so the path referencing is identical to that of +self.path+ whenever they
|
41
|
+
# need to be compared.
|
42
|
+
def project_path
|
43
|
+
path[ 0 .. path.rindex('/data/') ]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,157 @@
|
|
1
|
+
|
2
|
+
require 'miga/result/base'
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module including stats-specific functions for results
|
6
|
+
module MiGA::Result::Stats
|
7
|
+
|
8
|
+
##
|
9
|
+
# (Re-)calculate and save the statistics for the result
|
10
|
+
def compute_stats
|
11
|
+
method = :"compute_stats_#{key}"
|
12
|
+
stats = self.respond_to?(method, true) ? send(method) : nil
|
13
|
+
unless stats.nil?
|
14
|
+
self[:stats] = stats
|
15
|
+
save
|
16
|
+
end
|
17
|
+
self[:stats]
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def compute_stats_raw_reads
|
23
|
+
stats = {}
|
24
|
+
if self[:files][:pair1].nil?
|
25
|
+
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true)
|
26
|
+
stats = {
|
27
|
+
reads: s[:n],
|
28
|
+
length_average: [s[:avg], 'bp'],
|
29
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
30
|
+
g_c_content: [s[:gc], '%']}
|
31
|
+
else
|
32
|
+
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true)
|
33
|
+
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true)
|
34
|
+
stats = {
|
35
|
+
read_pairs: s1[:n],
|
36
|
+
forward_length_average: [s1[:avg], 'bp'],
|
37
|
+
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
38
|
+
forward_g_c_content: [s1[:gc], '%'],
|
39
|
+
reverse_length_average: [s2[:avg], 'bp'],
|
40
|
+
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
41
|
+
reverse_g_c_content: [s2[:gc], '%']}
|
42
|
+
end
|
43
|
+
stats
|
44
|
+
end
|
45
|
+
|
46
|
+
def compute_stats_trimmed_fasta
|
47
|
+
f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
|
48
|
+
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
|
49
|
+
{
|
50
|
+
reads: s[:n],
|
51
|
+
length_average: [s[:avg], 'bp'],
|
52
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
53
|
+
g_c_content: [s[:gc], '%']
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def compute_stats_assembly
|
58
|
+
s = MiGA::MiGA.seqs_length(file_path(:largecontigs), :fasta,
|
59
|
+
n50: true, gc: true)
|
60
|
+
{
|
61
|
+
contigs: s[:n],
|
62
|
+
n50: [s[:n50], 'bp'],
|
63
|
+
total_length: [s[:tot], 'bp'],
|
64
|
+
g_c_content: [s[:gc], '%']
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def compute_stats_cds
|
69
|
+
s = MiGA::MiGA.seqs_length(file_path(:proteins), :fasta)
|
70
|
+
stats = {
|
71
|
+
predicted_proteins: s[:n],
|
72
|
+
average_length: [s[:avg], 'aa']}
|
73
|
+
asm = source.result(:assembly)
|
74
|
+
unless asm.nil? or asm[:stats][:total_length].nil?
|
75
|
+
stats[:coding_density] =
|
76
|
+
[300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
|
77
|
+
end
|
78
|
+
stats
|
79
|
+
end
|
80
|
+
|
81
|
+
def compute_stats_essential_genes
|
82
|
+
stats = {}
|
83
|
+
if source.is_multi?
|
84
|
+
stats = {median_copies: 0, mean_copies: 0}
|
85
|
+
File.open(file_path(:report), 'r') do |fh|
|
86
|
+
fh.each_line do |ln|
|
87
|
+
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
88
|
+
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
else
|
93
|
+
# Fix estimate by domain
|
94
|
+
if !(tax = source.metadata[:tax]).nil? &&
|
95
|
+
%w[Archaea Bacteria].include?(tax[:d]) &&
|
96
|
+
file_path(:raw_report).nil?
|
97
|
+
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
98
|
+
rep = file_path(:report)
|
99
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
100
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
101
|
+
$stderr.print `#{rc} ruby '#{scr}' \
|
102
|
+
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
103
|
+
add_file(:raw_report, "#{source.name}.ess/log")
|
104
|
+
add_file(:report, "#{source.name}.ess/log.domain")
|
105
|
+
end
|
106
|
+
# Extract/compute quality values
|
107
|
+
stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
|
108
|
+
File.open(file_path(:report), 'r') do |fh|
|
109
|
+
fh.each_line do |ln|
|
110
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
111
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
116
|
+
source.metadata[:quality] = case stats[:quality]
|
117
|
+
when 80..100 ; :excellent
|
118
|
+
when 50..80 ; :high
|
119
|
+
when 20..50 ; :intermediate
|
120
|
+
else ; :low
|
121
|
+
end
|
122
|
+
source.save
|
123
|
+
end
|
124
|
+
stats
|
125
|
+
end
|
126
|
+
|
127
|
+
def compute_stats_ssu
|
128
|
+
stats = {ssu: 0, complete_ssu: 0}
|
129
|
+
Zlib::GzipReader.open(file_path(:gff)) do |fh|
|
130
|
+
fh.each_line do |ln|
|
131
|
+
next if ln =~ /^#/
|
132
|
+
rl = ln.chomp.split("\t")
|
133
|
+
len = (rl[4].to_i - rl[3].to_i).abs + 1
|
134
|
+
stats[:max_length] = [stats[:max_length] || 0, len].max
|
135
|
+
stats[:ssu] += 1
|
136
|
+
stats[:complete_ssu] += 1 unless rl[8] =~ /\(partial\)/
|
137
|
+
end
|
138
|
+
end
|
139
|
+
stats
|
140
|
+
end
|
141
|
+
|
142
|
+
def compute_stats_taxonomy
|
143
|
+
stats = {}
|
144
|
+
File.open(file_path(:intax_test), 'r') do |fh|
|
145
|
+
fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
|
146
|
+
stats[:closest_relative] = $1
|
147
|
+
stats[:aai] = [$2.to_f, '%']
|
148
|
+
3.times { fh.gets }
|
149
|
+
fh.each_line do |ln|
|
150
|
+
row = ln.chomp.gsub(/^\s*/,'').split(/\s+/)
|
151
|
+
break if row.empty?
|
152
|
+
stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
|
153
|
+
end
|
154
|
+
end
|
155
|
+
stats
|
156
|
+
end
|
157
|
+
end
|