miga-base 0.5.10.0 → 0.6.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,21 +6,22 @@ require 'miga/dataset/base'
6
6
  ##
7
7
  # Helper module including specific functions to add dataset results.
8
8
  module MiGA::Dataset::Result
9
-
9
+
10
10
  include MiGA::Dataset::Base
11
-
11
+
12
12
  ##
13
13
  # Get the result MiGA::Result in this dataset identified by the symbol +k+.
14
14
  def result(k)
15
15
  return nil if @@RESULT_DIRS[k.to_sym].nil?
16
16
  MiGA::Result.load(
17
- "#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json" )
17
+ "#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json"
18
+ )
18
19
  end
19
-
20
+
20
21
  ##
21
22
  # Get all the results (Array of MiGA::Result) in this dataset.
22
23
  def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
23
-
24
+
24
25
  ##
25
26
  # For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
26
27
  def each_result(&blk)
@@ -28,15 +29,15 @@ module MiGA::Dataset::Result
28
29
  blk.call(k, result(k)) unless result(k).nil?
29
30
  end
30
31
  end
31
-
32
+
32
33
  ##
33
34
  # Look for the result with symbol key +result_type+ and register it in the
34
35
  # dataset. If +save+ is false, it doesn't register the result, but it still
35
36
  # returns a result if the expected files are complete. The +opts+ hash
36
37
  # controls result creation (if necessary). Supported values include:
37
- # - +is_clean+: A Boolean indicating if the input files are clean.
38
- # - +force+: A Boolean indicating if the result must be re-indexed. If true,
39
- # it implies save=true.
38
+ # - +is_clean+: A Boolean indicating if the input files are clean
39
+ # - +force+: A Boolean indicating if the result must be re-indexed.
40
+ # If true, it implies +save = true+
40
41
  # Returns MiGA::Result or nil.
41
42
  def add_result(result_type, save = true, opts = {})
42
43
  dir = @@RESULT_DIRS[result_type]
@@ -46,11 +47,14 @@ module MiGA::Dataset::Result
46
47
  FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
47
48
  else
48
49
  r_pre = MiGA::Result.load("#{base}.json")
49
- return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
50
+ return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
50
51
  end
51
52
  r = File.exist?("#{base}.done") ?
52
53
  self.send("add_result_#{result_type}", base, opts) : nil
53
- r.save unless r.nil?
54
+ unless r.nil?
55
+ r.save
56
+ pull_hook(:on_result_ready, result_type)
57
+ end
54
58
  r
55
59
  end
56
60
 
@@ -68,7 +72,7 @@ module MiGA::Dataset::Result
68
72
  not ignore_task?(t) and not add_result(t, save).nil?
69
73
  end
70
74
  end
71
-
75
+
72
76
  ##
73
77
  # Returns the key symbol of the next task that needs to be executed. Passes
74
78
  # +save+ to #add_result.
@@ -95,7 +99,7 @@ module MiGA::Dataset::Result
95
99
  def done_preprocessing?(save = false)
96
100
  !first_preprocessing(save).nil? and next_preprocessing(save).nil?
97
101
  end
98
-
102
+
99
103
  ##
100
104
  # Returns an array indicating the stage of each task (sorted by execution
101
105
  # order). The values are integers:
@@ -300,14 +304,17 @@ module MiGA::Dataset::Result
300
304
  # Add result type +:mytaxa+ at +base+ (no +_opts+ supported).
301
305
  def add_result_mytaxa(base, _opts)
302
306
  if is_multi?
303
- return nil unless result_files_exist?(base, ".mytaxa") or
304
- result_files_exist?(base, ".nomytaxa.txt")
307
+ return nil unless result_files_exist?(base, '.mytaxa') or
308
+ result_files_exist?(base, '.nomytaxa.txt')
305
309
  r = MiGA::Result.new("#{base}.json")
306
- add_files_to_ds_result(r, name, mytaxa: ".mytaxa", blast: ".blast",
307
- mytaxain: ".mytaxain", nomytaxa: ".nomytaxa.txt",
308
- species: ".mytaxa.Species.txt", genus: ".mytaxa.Genus.txt",
309
- phylum: ".mytaxa.Phylum.txt", innominate: ".mytaxa.innominate",
310
- kronain: ".mytaxa.krona", krona: ".html")
310
+ add_files_to_ds_result(
311
+ r, name,
312
+ mytaxa: '.mytaxa', blast: '.blast',
313
+ mytaxain: '.mytaxain', nomytaxa: '.nomytaxa.txt',
314
+ species: '.mytaxa.Species.txt', genus: '.mytaxa.Genus.txt',
315
+ phylum: '.mytaxa.Phylum.txt', innominate: '.mytaxa.innominate',
316
+ kronain: '.mytaxa.krona', krona: '.html'
317
+ )
311
318
  else
312
319
  MiGA::Result.new("#{base}.json")
313
320
  end
@@ -357,7 +364,7 @@ module MiGA::Dataset::Result
357
364
  def add_result_stats(base, _opts)
358
365
  MiGA::Result.new("#{base}.json")
359
366
  end
360
-
367
+
361
368
  ##
362
369
  # Add result type +:distances+ for _multi_ datasets at +base+.
363
370
  def add_result_distances_multi(base)
@@ -383,10 +390,13 @@ module MiGA::Dataset::Result
383
390
  result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
384
391
  result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
385
392
  r = MiGA::Result.new("#{base}.json")
386
- add_files_to_ds_result(r, name, aai_medoids: ".aai-medoids.tsv",
387
- haai_db: ".haai.db", aai_db: ".aai.db", ani_medoids: ".ani-medoids.tsv",
388
- ani_db: ".ani.db", ref_tree: ".nwk", ref_tree_pdf: ".nwk.pdf",
389
- intax_test: ".intax.txt")
393
+ add_files_to_ds_result(
394
+ r, name,
395
+ aai_medoids: '.aai-medoids.tsv',
396
+ haai_db: '.haai.db', aai_db: '.aai.db', ani_medoids: '.ani-medoids.tsv',
397
+ ani_db: '.ani.db', ref_tree: '.nwk', ref_tree_pdf: '.nwk.pdf',
398
+ intax_test: '.intax.txt'
399
+ )
390
400
  end
391
401
 
392
402
  ##
data/lib/miga/project.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  require 'miga/dataset'
5
5
  require 'miga/project/result'
6
6
  require 'miga/project/dataset'
7
- require 'miga/project/plugins'
7
+ require 'miga/project/hooks'
8
8
 
9
9
  ##
10
10
  # MiGA representation of a project.
@@ -12,7 +12,7 @@ class MiGA::Project < MiGA::MiGA
12
12
 
13
13
  include MiGA::Project::Result
14
14
  include MiGA::Project::Dataset
15
- include MiGA::Project::Plugins
15
+ include MiGA::Project::Hooks
16
16
 
17
17
  ##
18
18
  # Absolute path to the project folder.
@@ -35,7 +35,6 @@ class MiGA::Project < MiGA::MiGA
35
35
  @path = File.absolute_path(path)
36
36
  self.create if not update and not Project.exist? self.path
37
37
  self.load if self.metadata.nil?
38
- self.load_plugins
39
38
  self.metadata[:type] = :mixed if type.nil?
40
39
  raise "Unrecognized project type: #{type}." if @@KNOWN_TYPES[type].nil?
41
40
  end
@@ -54,6 +53,7 @@ class MiGA::Project < MiGA::MiGA
54
53
  {datasets: [], name: File.basename(path)})
55
54
  d_path = File.expand_path('daemon/daemon.json', path)
56
55
  File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
56
+ pull_hook :on_create
57
57
  self.load
58
58
  end
59
59
 
@@ -67,6 +67,7 @@ class MiGA::Project < MiGA::MiGA
67
67
  # Save any changes persistently, regardless of +do_not_save+.
68
68
  def save!
69
69
  metadata.save
70
+ pull_hook :on_save
70
71
  self.load
71
72
  end
72
73
 
@@ -77,6 +78,7 @@ class MiGA::Project < MiGA::MiGA
77
78
  @dataset_names_hash = nil
78
79
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
79
80
  raise "Couldn't find project metadata at #{path}" if metadata.nil?
81
+ pull_hook :on_load
80
82
  end
81
83
 
82
84
  ##
@@ -56,6 +56,7 @@ module MiGA::Project::Dataset
56
56
  @metadata[:datasets] << name
57
57
  @dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
58
58
  save
59
+ pull_hook(:on_add_dataset, name)
59
60
  end
60
61
  dataset(name)
61
62
  end
@@ -67,6 +68,7 @@ module MiGA::Project::Dataset
67
68
  return nil if d.nil?
68
69
  self.metadata[:datasets].delete(name)
69
70
  save
71
+ pull_hook(:on_unlink_dataset, name)
70
72
  d
71
73
  end
72
74
 
@@ -121,7 +123,7 @@ module MiGA::Project::Dataset
121
123
  ##
122
124
  # Are all the datasets in the project preprocessed? Save intermediate results
123
125
  # if +save+ (until the first incomplete dataset is reached).
124
- def done_preprocessing?(save=true)
126
+ def done_preprocessing?(save = true)
125
127
  dataset_names.each do |dn|
126
128
  ds = dataset(dn)
127
129
  return false if ds.is_ref? and not ds.done_preprocessing?(save)
@@ -0,0 +1,60 @@
1
+
2
+ require 'miga/common/hooks'
3
+
4
+ ##
5
+ # Helper module including specific functions to handle project hooks.
6
+ # Supported events:
7
+ # - on_create(): When created
8
+ # - on_load(): When loaded
9
+ # - on_save(): When saved
10
+ # - on_add_dataset(dataset): When a dataset is added, with name +dataset+
11
+ # - on_unlink_dataset(dataset): When dataset with name +dataset+ is unlinked
12
+ # - on_result_ready(result): When any result is ready, with key +result+
13
+ # - on_result_ready_{result}(): When +result+ is ready
14
+ # - on_processing_ready(): When preprocessing is complete
15
+ # Supported hooks:
16
+ # - run_lambda(lambda, args...)
17
+ # - run_cmd(cmd)
18
+ # Internal hooks:
19
+ # - _pull_processing_ready_hooks()
20
+ # - _pull_result_hooks()
21
+ module MiGA::Project::Hooks
22
+
23
+ include MiGA::Common::Hooks
24
+
25
+ def default_hooks
26
+ {
27
+ on_result_ready: [
28
+ [:_pull_result_hooks],
29
+ [:_pull_processing_ready_hooks]
30
+ ]
31
+ }
32
+ end
33
+
34
+ ##
35
+ # Run +cmd+ in the command-line with {{variables}}: project, miga,
36
+ # object (as defined by the event, if any)
37
+ # - +hook_args+: +[cmd]+
38
+ # - +event_args+: +[object (optional)]+
39
+ def hook_run_cmd(hook_args, event_args)
40
+ Process.wait(
41
+ spawn hook_args.first.miga_variables(
42
+ project: path, miga: MiGA::MiGA.root_path, object: event_args.first
43
+ )
44
+ )
45
+ end
46
+
47
+ ##
48
+ # Pull :dataset_ready hook if preprocessing is complete
49
+ def hook__pull_processing_ready_hooks(_hook_args, _event_args)
50
+ pull_hook(:on_processing_ready) if next_task(nil, false).nil?
51
+ end
52
+
53
+ ##
54
+ # Dataset Action :pull_result_hooks([], [res])
55
+ # Pull the hook specific to the type of result
56
+ def hook__pull_result_hooks(_hook_args, event_args)
57
+ pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
58
+ end
59
+
60
+ end
@@ -46,11 +46,14 @@ module MiGA::Project::Result
46
46
  FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
47
47
  else
48
48
  r_pre = MiGA::Result.load("#{base}.json")
49
- return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
49
+ return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
50
50
  end
51
51
  r = result_files_exist?(base, ".done") ?
52
52
  send("add_result_#{name}", base) : nil
53
- r.save unless r.nil?
53
+ unless r.nil?
54
+ r.save
55
+ pull_hook(:on_result_ready, name)
56
+ end
54
57
  r
55
58
  end
56
59
 
@@ -69,7 +72,7 @@ module MiGA::Project::Result
69
72
  # If +tasks+ is +nil+ (default), it uses the entire list of tasks.
70
73
  # Returns a Symbol.
71
74
  def next_task(tasks = nil, save = true)
72
- tasks ||= @@DISTANCE_TASKS+@@INCLADE_TASKS
75
+ tasks ||= @@DISTANCE_TASKS + @@INCLADE_TASKS
73
76
  tasks.find do |t|
74
77
  if metadata["run_#{t}"] == false or
75
78
  (!is_clade? and @@INCLADE_TASKS.include?(t) and
data/lib/miga/result.rb CHANGED
@@ -1,19 +1,25 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/result/dates"
4
+ require 'miga/result/dates'
5
+ require 'miga/result/source'
6
+ require 'miga/result/stats'
5
7
 
6
8
  ##
7
9
  # The result from a task run. It can be project-wide or dataset-specific.
8
10
  class MiGA::Result < MiGA::MiGA
9
-
11
+
10
12
  include MiGA::Result::Dates
11
-
13
+ include MiGA::Result::Source
14
+ include MiGA::Result::Stats
15
+
12
16
  # Class-level
13
-
17
+
14
18
  ##
15
19
  # Check if the result described by the JSON in +path+ already exists.
16
- def self.exist?(path) File.exist? path end
20
+ def self.exist?(path)
21
+ File.exist? path
22
+ end
17
23
 
18
24
  ##
19
25
  # Load the result described by the JSON in +path+. Returns MiGA::Result if it
@@ -32,14 +38,14 @@ class MiGA::Result < MiGA::MiGA
32
38
  ##
33
39
  # Array of MiGA::Result objects nested within the result (if any).
34
40
  attr_reader :results
35
-
41
+
36
42
  ##
37
43
  # Load or create the MiGA::Result described by the JSON file +path+.
38
44
  def initialize(path)
39
- @path = path
40
- MiGA::Result.exist?(path) ? self.load : create
45
+ @path = File.absolute_path(path)
46
+ MiGA::Result.exist?(@path) ? self.load : create
41
47
  end
42
-
48
+
43
49
  ##
44
50
  # Is the result clean? Returns Boolean.
45
51
  def clean? ; !! self[:clean] ; end
@@ -109,7 +115,7 @@ class MiGA::Result < MiGA::MiGA
109
115
  ##
110
116
  # Initialize and #save empty result.
111
117
  def create
112
- @data = {:created=>Time.now.to_s, :results=>[], :stats=>{}, :files=>{}}
118
+ @data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
113
119
  save
114
120
  end
115
121
 
@@ -181,5 +187,5 @@ class MiGA::Result < MiGA::MiGA
181
187
  @data[:results] << result.path
182
188
  save
183
189
  end
184
-
190
+
185
191
  end
@@ -1,5 +1,11 @@
1
1
 
2
2
  class MiGA::Result < MiGA::MiGA
3
+ class << self
4
+ def RESULT_DIRS
5
+ @@RESULT_DIRS ||=
6
+ MiGA::Dataset.RESULT_DIRS.merge(MiGA::Project.RESULT_DIRS)
7
+ end
8
+ end
3
9
  end
4
10
 
5
11
  module MiGA::Result::Base
@@ -0,0 +1,46 @@
1
+
2
+ require 'miga/result/base'
3
+
4
+ ##
5
+ # Helper module including functions to access the source of results
6
+ module MiGA::Result::Source
7
+
8
+ ##
9
+ # Load and return the source (parent object) of a result
10
+ def source
11
+ @source ||= if MiGA::Project.RESULT_DIRS[key]
12
+ project
13
+ else
14
+ project.dataset(File.basename(path, '.json'))
15
+ end
16
+ end
17
+
18
+ ##
19
+ # Detect the result key assigned to this result
20
+ def key
21
+ @key ||= MiGA::Result.RESULT_DIRS.find { |k, v| v == relative_dir }.first
22
+ end
23
+
24
+ ##
25
+ # Path of the result containing the directory relative to the +data+ folder in
26
+ # the parent project
27
+ def relative_dir
28
+ @relative_dir ||= dir.sub("#{project_path}data/", '')
29
+ end
30
+
31
+ ##
32
+ # Project containing the result
33
+ def project
34
+ @project ||= MiGA::Project.load(project_path)
35
+ end
36
+
37
+ ##
38
+ # Path to the project containing the result. In most cases this should be
39
+ # identical to +project.path+, but this function is provided for safety,
40
+ # so the path referencing is identical to that of +self.path+ whenever they
41
+ # need to be compared.
42
+ def project_path
43
+ path[ 0 .. path.rindex('/data/') ]
44
+ end
45
+ end
46
+
@@ -0,0 +1,157 @@
1
+
2
+ require 'miga/result/base'
3
+
4
+ ##
5
+ # Helper module including stats-specific functions for results
6
+ module MiGA::Result::Stats
7
+
8
+ ##
9
+ # (Re-)calculate and save the statistics for the result
10
+ def compute_stats
11
+ method = :"compute_stats_#{key}"
12
+ stats = self.respond_to?(method, true) ? send(method) : nil
13
+ unless stats.nil?
14
+ self[:stats] = stats
15
+ save
16
+ end
17
+ self[:stats]
18
+ end
19
+
20
+ private
21
+
22
+ def compute_stats_raw_reads
23
+ stats = {}
24
+ if self[:files][:pair1].nil?
25
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true)
26
+ stats = {
27
+ reads: s[:n],
28
+ length_average: [s[:avg], 'bp'],
29
+ length_standard_deviation: [s[:sd], 'bp'],
30
+ g_c_content: [s[:gc], '%']}
31
+ else
32
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true)
33
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true)
34
+ stats = {
35
+ read_pairs: s1[:n],
36
+ forward_length_average: [s1[:avg], 'bp'],
37
+ forward_length_standard_deviation: [s1[:sd], 'bp'],
38
+ forward_g_c_content: [s1[:gc], '%'],
39
+ reverse_length_average: [s2[:avg], 'bp'],
40
+ reverse_length_standard_deviation: [s2[:sd], 'bp'],
41
+ reverse_g_c_content: [s2[:gc], '%']}
42
+ end
43
+ stats
44
+ end
45
+
46
+ def compute_stats_trimmed_fasta
47
+ f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
48
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
49
+ {
50
+ reads: s[:n],
51
+ length_average: [s[:avg], 'bp'],
52
+ length_standard_deviation: [s[:sd], 'bp'],
53
+ g_c_content: [s[:gc], '%']
54
+ }
55
+ end
56
+
57
+ def compute_stats_assembly
58
+ s = MiGA::MiGA.seqs_length(file_path(:largecontigs), :fasta,
59
+ n50: true, gc: true)
60
+ {
61
+ contigs: s[:n],
62
+ n50: [s[:n50], 'bp'],
63
+ total_length: [s[:tot], 'bp'],
64
+ g_c_content: [s[:gc], '%']
65
+ }
66
+ end
67
+
68
+ def compute_stats_cds
69
+ s = MiGA::MiGA.seqs_length(file_path(:proteins), :fasta)
70
+ stats = {
71
+ predicted_proteins: s[:n],
72
+ average_length: [s[:avg], 'aa']}
73
+ asm = source.result(:assembly)
74
+ unless asm.nil? or asm[:stats][:total_length].nil?
75
+ stats[:coding_density] =
76
+ [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
77
+ end
78
+ stats
79
+ end
80
+
81
+ def compute_stats_essential_genes
82
+ stats = {}
83
+ if source.is_multi?
84
+ stats = {median_copies: 0, mean_copies: 0}
85
+ File.open(file_path(:report), 'r') do |fh|
86
+ fh.each_line do |ln|
87
+ if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
88
+ stats["#{$1.downcase}_copies".to_sym] = $2.to_f
89
+ end
90
+ end
91
+ end
92
+ else
93
+ # Fix estimate by domain
94
+ if !(tax = source.metadata[:tax]).nil? &&
95
+ %w[Archaea Bacteria].include?(tax[:d]) &&
96
+ file_path(:raw_report).nil?
97
+ scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
98
+ rep = file_path(:report)
99
+ rc_p = File.expand_path('.miga_rc', ENV['HOME'])
100
+ rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
101
+ $stderr.print `#{rc} ruby '#{scr}' \
102
+ '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
103
+ add_file(:raw_report, "#{source.name}.ess/log")
104
+ add_file(:report, "#{source.name}.ess/log.domain")
105
+ end
106
+ # Extract/compute quality values
107
+ stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
108
+ File.open(file_path(:report), 'r') do |fh|
109
+ fh.each_line do |ln|
110
+ if /^! (Completeness|Contamination): (.*)%/.match(ln)
111
+ stats[$1.downcase.to_sym][0] = $2.to_f
112
+ end
113
+ end
114
+ end
115
+ stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
116
+ source.metadata[:quality] = case stats[:quality]
117
+ when 80..100 ; :excellent
118
+ when 50..80 ; :high
119
+ when 20..50 ; :intermediate
120
+ else ; :low
121
+ end
122
+ source.save
123
+ end
124
+ stats
125
+ end
126
+
127
+ def compute_stats_ssu
128
+ stats = {ssu: 0, complete_ssu: 0}
129
+ Zlib::GzipReader.open(file_path(:gff)) do |fh|
130
+ fh.each_line do |ln|
131
+ next if ln =~ /^#/
132
+ rl = ln.chomp.split("\t")
133
+ len = (rl[4].to_i - rl[3].to_i).abs + 1
134
+ stats[:max_length] = [stats[:max_length] || 0, len].max
135
+ stats[:ssu] += 1
136
+ stats[:complete_ssu] += 1 unless rl[8] =~ /\(partial\)/
137
+ end
138
+ end
139
+ stats
140
+ end
141
+
142
+ def compute_stats_taxonomy
143
+ stats = {}
144
+ File.open(file_path(:intax_test), 'r') do |fh|
145
+ fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
146
+ stats[:closest_relative] = $1
147
+ stats[:aai] = [$2.to_f, '%']
148
+ 3.times { fh.gets }
149
+ fh.each_line do |ln|
150
+ row = ln.chomp.gsub(/^\s*/,'').split(/\s+/)
151
+ break if row.empty?
152
+ stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
153
+ end
154
+ end
155
+ stats
156
+ end
157
+ end