miga-base 0.5.10.0 → 0.6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,21 +6,22 @@ require 'miga/dataset/base'
6
6
  ##
7
7
  # Helper module including specific functions to add dataset results.
8
8
  module MiGA::Dataset::Result
9
-
9
+
10
10
  include MiGA::Dataset::Base
11
-
11
+
12
12
  ##
13
13
  # Get the result MiGA::Result in this dataset identified by the symbol +k+.
14
14
  def result(k)
15
15
  return nil if @@RESULT_DIRS[k.to_sym].nil?
16
16
  MiGA::Result.load(
17
- "#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json" )
17
+ "#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json"
18
+ )
18
19
  end
19
-
20
+
20
21
  ##
21
22
  # Get all the results (Array of MiGA::Result) in this dataset.
22
23
  def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
23
-
24
+
24
25
  ##
25
26
  # For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
26
27
  def each_result(&blk)
@@ -28,15 +29,15 @@ module MiGA::Dataset::Result
28
29
  blk.call(k, result(k)) unless result(k).nil?
29
30
  end
30
31
  end
31
-
32
+
32
33
  ##
33
34
  # Look for the result with symbol key +result_type+ and register it in the
34
35
  # dataset. If +save+ is false, it doesn't register the result, but it still
35
36
  # returns a result if the expected files are complete. The +opts+ hash
36
37
  # controls result creation (if necessary). Supported values include:
37
- # - +is_clean+: A Boolean indicating if the input files are clean.
38
- # - +force+: A Boolean indicating if the result must be re-indexed. If true,
39
- # it implies save=true.
38
+ # - +is_clean+: A Boolean indicating if the input files are clean
39
+ # - +force+: A Boolean indicating if the result must be re-indexed.
40
+ # If true, it implies +save = true+
40
41
  # Returns MiGA::Result or nil.
41
42
  def add_result(result_type, save = true, opts = {})
42
43
  dir = @@RESULT_DIRS[result_type]
@@ -46,11 +47,14 @@ module MiGA::Dataset::Result
46
47
  FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
47
48
  else
48
49
  r_pre = MiGA::Result.load("#{base}.json")
49
- return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
50
+ return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
50
51
  end
51
52
  r = File.exist?("#{base}.done") ?
52
53
  self.send("add_result_#{result_type}", base, opts) : nil
53
- r.save unless r.nil?
54
+ unless r.nil?
55
+ r.save
56
+ pull_hook(:on_result_ready, result_type)
57
+ end
54
58
  r
55
59
  end
56
60
 
@@ -68,7 +72,7 @@ module MiGA::Dataset::Result
68
72
  not ignore_task?(t) and not add_result(t, save).nil?
69
73
  end
70
74
  end
71
-
75
+
72
76
  ##
73
77
  # Returns the key symbol of the next task that needs to be executed. Passes
74
78
  # +save+ to #add_result.
@@ -95,7 +99,7 @@ module MiGA::Dataset::Result
95
99
  def done_preprocessing?(save = false)
96
100
  !first_preprocessing(save).nil? and next_preprocessing(save).nil?
97
101
  end
98
-
102
+
99
103
  ##
100
104
  # Returns an array indicating the stage of each task (sorted by execution
101
105
  # order). The values are integers:
@@ -300,14 +304,17 @@ module MiGA::Dataset::Result
300
304
  # Add result type +:mytaxa+ at +base+ (no +_opts+ supported).
301
305
  def add_result_mytaxa(base, _opts)
302
306
  if is_multi?
303
- return nil unless result_files_exist?(base, ".mytaxa") or
304
- result_files_exist?(base, ".nomytaxa.txt")
307
+ return nil unless result_files_exist?(base, '.mytaxa') or
308
+ result_files_exist?(base, '.nomytaxa.txt')
305
309
  r = MiGA::Result.new("#{base}.json")
306
- add_files_to_ds_result(r, name, mytaxa: ".mytaxa", blast: ".blast",
307
- mytaxain: ".mytaxain", nomytaxa: ".nomytaxa.txt",
308
- species: ".mytaxa.Species.txt", genus: ".mytaxa.Genus.txt",
309
- phylum: ".mytaxa.Phylum.txt", innominate: ".mytaxa.innominate",
310
- kronain: ".mytaxa.krona", krona: ".html")
310
+ add_files_to_ds_result(
311
+ r, name,
312
+ mytaxa: '.mytaxa', blast: '.blast',
313
+ mytaxain: '.mytaxain', nomytaxa: '.nomytaxa.txt',
314
+ species: '.mytaxa.Species.txt', genus: '.mytaxa.Genus.txt',
315
+ phylum: '.mytaxa.Phylum.txt', innominate: '.mytaxa.innominate',
316
+ kronain: '.mytaxa.krona', krona: '.html'
317
+ )
311
318
  else
312
319
  MiGA::Result.new("#{base}.json")
313
320
  end
@@ -357,7 +364,7 @@ module MiGA::Dataset::Result
357
364
  def add_result_stats(base, _opts)
358
365
  MiGA::Result.new("#{base}.json")
359
366
  end
360
-
367
+
361
368
  ##
362
369
  # Add result type +:distances+ for _multi_ datasets at +base+.
363
370
  def add_result_distances_multi(base)
@@ -383,10 +390,13 @@ module MiGA::Dataset::Result
383
390
  result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
384
391
  result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
385
392
  r = MiGA::Result.new("#{base}.json")
386
- add_files_to_ds_result(r, name, aai_medoids: ".aai-medoids.tsv",
387
- haai_db: ".haai.db", aai_db: ".aai.db", ani_medoids: ".ani-medoids.tsv",
388
- ani_db: ".ani.db", ref_tree: ".nwk", ref_tree_pdf: ".nwk.pdf",
389
- intax_test: ".intax.txt")
393
+ add_files_to_ds_result(
394
+ r, name,
395
+ aai_medoids: '.aai-medoids.tsv',
396
+ haai_db: '.haai.db', aai_db: '.aai.db', ani_medoids: '.ani-medoids.tsv',
397
+ ani_db: '.ani.db', ref_tree: '.nwk', ref_tree_pdf: '.nwk.pdf',
398
+ intax_test: '.intax.txt'
399
+ )
390
400
  end
391
401
 
392
402
  ##
data/lib/miga/project.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  require 'miga/dataset'
5
5
  require 'miga/project/result'
6
6
  require 'miga/project/dataset'
7
- require 'miga/project/plugins'
7
+ require 'miga/project/hooks'
8
8
 
9
9
  ##
10
10
  # MiGA representation of a project.
@@ -12,7 +12,7 @@ class MiGA::Project < MiGA::MiGA
12
12
 
13
13
  include MiGA::Project::Result
14
14
  include MiGA::Project::Dataset
15
- include MiGA::Project::Plugins
15
+ include MiGA::Project::Hooks
16
16
 
17
17
  ##
18
18
  # Absolute path to the project folder.
@@ -35,7 +35,6 @@ class MiGA::Project < MiGA::MiGA
35
35
  @path = File.absolute_path(path)
36
36
  self.create if not update and not Project.exist? self.path
37
37
  self.load if self.metadata.nil?
38
- self.load_plugins
39
38
  self.metadata[:type] = :mixed if type.nil?
40
39
  raise "Unrecognized project type: #{type}." if @@KNOWN_TYPES[type].nil?
41
40
  end
@@ -54,6 +53,7 @@ class MiGA::Project < MiGA::MiGA
54
53
  {datasets: [], name: File.basename(path)})
55
54
  d_path = File.expand_path('daemon/daemon.json', path)
56
55
  File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
56
+ pull_hook :on_create
57
57
  self.load
58
58
  end
59
59
 
@@ -67,6 +67,7 @@ class MiGA::Project < MiGA::MiGA
67
67
  # Save any changes persistently, regardless of +do_not_save+.
68
68
  def save!
69
69
  metadata.save
70
+ pull_hook :on_save
70
71
  self.load
71
72
  end
72
73
 
@@ -77,6 +78,7 @@ class MiGA::Project < MiGA::MiGA
77
78
  @dataset_names_hash = nil
78
79
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
79
80
  raise "Couldn't find project metadata at #{path}" if metadata.nil?
81
+ pull_hook :on_load
80
82
  end
81
83
 
82
84
  ##
@@ -56,6 +56,7 @@ module MiGA::Project::Dataset
56
56
  @metadata[:datasets] << name
57
57
  @dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
58
58
  save
59
+ pull_hook(:on_add_dataset, name)
59
60
  end
60
61
  dataset(name)
61
62
  end
@@ -67,6 +68,7 @@ module MiGA::Project::Dataset
67
68
  return nil if d.nil?
68
69
  self.metadata[:datasets].delete(name)
69
70
  save
71
+ pull_hook(:on_unlink_dataset, name)
70
72
  d
71
73
  end
72
74
 
@@ -121,7 +123,7 @@ module MiGA::Project::Dataset
121
123
  ##
122
124
  # Are all the datasets in the project preprocessed? Save intermediate results
123
125
  # if +save+ (until the first incomplete dataset is reached).
124
- def done_preprocessing?(save=true)
126
+ def done_preprocessing?(save = true)
125
127
  dataset_names.each do |dn|
126
128
  ds = dataset(dn)
127
129
  return false if ds.is_ref? and not ds.done_preprocessing?(save)
@@ -0,0 +1,60 @@
1
+
2
+ require 'miga/common/hooks'
3
+
4
+ ##
5
+ # Helper module including specific functions to handle project hooks.
6
+ # Supported events:
7
+ # - on_create(): When created
8
+ # - on_load(): When loaded
9
+ # - on_save(): When saved
10
+ # - on_add_dataset(dataset): When a dataset is added, with name +dataset+
11
+ # - on_unlink_dataset(dataset): When dataset with name +dataset+ is unlinked
12
+ # - on_result_ready(result): When any result is ready, with key +result+
13
+ # - on_result_ready_{result}(): When +result+ is ready
14
+ # - on_processing_ready(): When preprocessing is complete
15
+ # Supported hooks:
16
+ # - run_lambda(lambda, args...)
17
+ # - run_cmd(cmd)
18
+ # Internal hooks:
19
+ # - _pull_processing_ready_hooks()
20
+ # - _pull_result_hooks()
21
+ module MiGA::Project::Hooks
22
+
23
+ include MiGA::Common::Hooks
24
+
25
+ def default_hooks
26
+ {
27
+ on_result_ready: [
28
+ [:_pull_result_hooks],
29
+ [:_pull_processing_ready_hooks]
30
+ ]
31
+ }
32
+ end
33
+
34
+ ##
35
+ # Run +cmd+ in the command-line with {{variables}}: project, miga,
36
+ # object (as defined by the event, if any)
37
+ # - +hook_args+: +[cmd]+
38
+ # - +event_args+: +[object (optional)]+
39
+ def hook_run_cmd(hook_args, event_args)
40
+ Process.wait(
41
+ spawn hook_args.first.miga_variables(
42
+ project: path, miga: MiGA::MiGA.root_path, object: event_args.first
43
+ )
44
+ )
45
+ end
46
+
47
+ ##
48
+ # Pull :dataset_ready hook if preprocessing is complete
49
+ def hook__pull_processing_ready_hooks(_hook_args, _event_args)
50
+ pull_hook(:on_processing_ready) if next_task(nil, false).nil?
51
+ end
52
+
53
+ ##
54
+ # Dataset Action :pull_result_hooks([], [res])
55
+ # Pull the hook specific to the type of result
56
+ def hook__pull_result_hooks(_hook_args, event_args)
57
+ pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
58
+ end
59
+
60
+ end
@@ -46,11 +46,14 @@ module MiGA::Project::Result
46
46
  FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
47
47
  else
48
48
  r_pre = MiGA::Result.load("#{base}.json")
49
- return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
49
+ return r_pre if (r_pre.nil? && !save) || !r_pre.nil?
50
50
  end
51
51
  r = result_files_exist?(base, ".done") ?
52
52
  send("add_result_#{name}", base) : nil
53
- r.save unless r.nil?
53
+ unless r.nil?
54
+ r.save
55
+ pull_hook(:on_result_ready, name)
56
+ end
54
57
  r
55
58
  end
56
59
 
@@ -69,7 +72,7 @@ module MiGA::Project::Result
69
72
  # If +tasks+ is +nil+ (default), it uses the entire list of tasks.
70
73
  # Returns a Symbol.
71
74
  def next_task(tasks = nil, save = true)
72
- tasks ||= @@DISTANCE_TASKS+@@INCLADE_TASKS
75
+ tasks ||= @@DISTANCE_TASKS + @@INCLADE_TASKS
73
76
  tasks.find do |t|
74
77
  if metadata["run_#{t}"] == false or
75
78
  (!is_clade? and @@INCLADE_TASKS.include?(t) and
data/lib/miga/result.rb CHANGED
@@ -1,19 +1,25 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/result/dates"
4
+ require 'miga/result/dates'
5
+ require 'miga/result/source'
6
+ require 'miga/result/stats'
5
7
 
6
8
  ##
7
9
  # The result from a task run. It can be project-wide or dataset-specific.
8
10
  class MiGA::Result < MiGA::MiGA
9
-
11
+
10
12
  include MiGA::Result::Dates
11
-
13
+ include MiGA::Result::Source
14
+ include MiGA::Result::Stats
15
+
12
16
  # Class-level
13
-
17
+
14
18
  ##
15
19
  # Check if the result described by the JSON in +path+ already exists.
16
- def self.exist?(path) File.exist? path end
20
+ def self.exist?(path)
21
+ File.exist? path
22
+ end
17
23
 
18
24
  ##
19
25
  # Load the result described by the JSON in +path+. Returns MiGA::Result if it
@@ -32,14 +38,14 @@ class MiGA::Result < MiGA::MiGA
32
38
  ##
33
39
  # Array of MiGA::Result objects nested within the result (if any).
34
40
  attr_reader :results
35
-
41
+
36
42
  ##
37
43
  # Load or create the MiGA::Result described by the JSON file +path+.
38
44
  def initialize(path)
39
- @path = path
40
- MiGA::Result.exist?(path) ? self.load : create
45
+ @path = File.absolute_path(path)
46
+ MiGA::Result.exist?(@path) ? self.load : create
41
47
  end
42
-
48
+
43
49
  ##
44
50
  # Is the result clean? Returns Boolean.
45
51
  def clean? ; !! self[:clean] ; end
@@ -109,7 +115,7 @@ class MiGA::Result < MiGA::MiGA
109
115
  ##
110
116
  # Initialize and #save empty result.
111
117
  def create
112
- @data = {:created=>Time.now.to_s, :results=>[], :stats=>{}, :files=>{}}
118
+ @data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
113
119
  save
114
120
  end
115
121
 
@@ -181,5 +187,5 @@ class MiGA::Result < MiGA::MiGA
181
187
  @data[:results] << result.path
182
188
  save
183
189
  end
184
-
190
+
185
191
  end
@@ -1,5 +1,11 @@
1
1
 
2
2
  class MiGA::Result < MiGA::MiGA
3
+ class << self
4
+ def RESULT_DIRS
5
+ @@RESULT_DIRS ||=
6
+ MiGA::Dataset.RESULT_DIRS.merge(MiGA::Project.RESULT_DIRS)
7
+ end
8
+ end
3
9
  end
4
10
 
5
11
  module MiGA::Result::Base
@@ -0,0 +1,46 @@
1
+
2
+ require 'miga/result/base'
3
+
4
+ ##
5
+ # Helper module including functions to access the source of results
6
+ module MiGA::Result::Source
7
+
8
+ ##
9
+ # Load and return the source (parent object) of a result
10
+ def source
11
+ @source ||= if MiGA::Project.RESULT_DIRS[key]
12
+ project
13
+ else
14
+ project.dataset(File.basename(path, '.json'))
15
+ end
16
+ end
17
+
18
+ ##
19
+ # Detect the result key assigned to this result
20
+ def key
21
+ @key ||= MiGA::Result.RESULT_DIRS.find { |k, v| v == relative_dir }.first
22
+ end
23
+
24
+ ##
25
+ # Path of the result containing the directory relative to the +data+ folder in
26
+ # the parent project
27
+ def relative_dir
28
+ @relative_dir ||= dir.sub("#{project_path}data/", '')
29
+ end
30
+
31
+ ##
32
+ # Project containing the result
33
+ def project
34
+ @project ||= MiGA::Project.load(project_path)
35
+ end
36
+
37
+ ##
38
+ # Path to the project containing the result. In most cases this should be
39
+ # identical to +project.path+, but this function is provided for safety,
40
+ # so the path referencing is identical to that of +self.path+ whenever they
41
+ # need to be compared.
42
+ def project_path
43
+ path[ 0 .. path.rindex('/data/') ]
44
+ end
45
+ end
46
+
@@ -0,0 +1,157 @@
1
+
2
+ require 'miga/result/base'
3
+
4
+ ##
5
+ # Helper module including stats-specific functions for results
6
+ module MiGA::Result::Stats
7
+
8
+ ##
9
+ # (Re-)calculate and save the statistics for the result
10
+ def compute_stats
11
+ method = :"compute_stats_#{key}"
12
+ stats = self.respond_to?(method, true) ? send(method) : nil
13
+ unless stats.nil?
14
+ self[:stats] = stats
15
+ save
16
+ end
17
+ self[:stats]
18
+ end
19
+
20
+ private
21
+
22
+ def compute_stats_raw_reads
23
+ stats = {}
24
+ if self[:files][:pair1].nil?
25
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true)
26
+ stats = {
27
+ reads: s[:n],
28
+ length_average: [s[:avg], 'bp'],
29
+ length_standard_deviation: [s[:sd], 'bp'],
30
+ g_c_content: [s[:gc], '%']}
31
+ else
32
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true)
33
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true)
34
+ stats = {
35
+ read_pairs: s1[:n],
36
+ forward_length_average: [s1[:avg], 'bp'],
37
+ forward_length_standard_deviation: [s1[:sd], 'bp'],
38
+ forward_g_c_content: [s1[:gc], '%'],
39
+ reverse_length_average: [s2[:avg], 'bp'],
40
+ reverse_length_standard_deviation: [s2[:sd], 'bp'],
41
+ reverse_g_c_content: [s2[:gc], '%']}
42
+ end
43
+ stats
44
+ end
45
+
46
+ def compute_stats_trimmed_fasta
47
+ f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
48
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
49
+ {
50
+ reads: s[:n],
51
+ length_average: [s[:avg], 'bp'],
52
+ length_standard_deviation: [s[:sd], 'bp'],
53
+ g_c_content: [s[:gc], '%']
54
+ }
55
+ end
56
+
57
+ def compute_stats_assembly
58
+ s = MiGA::MiGA.seqs_length(file_path(:largecontigs), :fasta,
59
+ n50: true, gc: true)
60
+ {
61
+ contigs: s[:n],
62
+ n50: [s[:n50], 'bp'],
63
+ total_length: [s[:tot], 'bp'],
64
+ g_c_content: [s[:gc], '%']
65
+ }
66
+ end
67
+
68
+ def compute_stats_cds
69
+ s = MiGA::MiGA.seqs_length(file_path(:proteins), :fasta)
70
+ stats = {
71
+ predicted_proteins: s[:n],
72
+ average_length: [s[:avg], 'aa']}
73
+ asm = source.result(:assembly)
74
+ unless asm.nil? or asm[:stats][:total_length].nil?
75
+ stats[:coding_density] =
76
+ [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
77
+ end
78
+ stats
79
+ end
80
+
81
+ def compute_stats_essential_genes
82
+ stats = {}
83
+ if source.is_multi?
84
+ stats = {median_copies: 0, mean_copies: 0}
85
+ File.open(file_path(:report), 'r') do |fh|
86
+ fh.each_line do |ln|
87
+ if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
88
+ stats["#{$1.downcase}_copies".to_sym] = $2.to_f
89
+ end
90
+ end
91
+ end
92
+ else
93
+ # Fix estimate by domain
94
+ if !(tax = source.metadata[:tax]).nil? &&
95
+ %w[Archaea Bacteria].include?(tax[:d]) &&
96
+ file_path(:raw_report).nil?
97
+ scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
98
+ rep = file_path(:report)
99
+ rc_p = File.expand_path('.miga_rc', ENV['HOME'])
100
+ rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
101
+ $stderr.print `#{rc} ruby '#{scr}' \
102
+ '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
103
+ add_file(:raw_report, "#{source.name}.ess/log")
104
+ add_file(:report, "#{source.name}.ess/log.domain")
105
+ end
106
+ # Extract/compute quality values
107
+ stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
108
+ File.open(file_path(:report), 'r') do |fh|
109
+ fh.each_line do |ln|
110
+ if /^! (Completeness|Contamination): (.*)%/.match(ln)
111
+ stats[$1.downcase.to_sym][0] = $2.to_f
112
+ end
113
+ end
114
+ end
115
+ stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
116
+ source.metadata[:quality] = case stats[:quality]
117
+ when 80..100 ; :excellent
118
+ when 50..80 ; :high
119
+ when 20..50 ; :intermediate
120
+ else ; :low
121
+ end
122
+ source.save
123
+ end
124
+ stats
125
+ end
126
+
127
+ def compute_stats_ssu
128
+ stats = {ssu: 0, complete_ssu: 0}
129
+ Zlib::GzipReader.open(file_path(:gff)) do |fh|
130
+ fh.each_line do |ln|
131
+ next if ln =~ /^#/
132
+ rl = ln.chomp.split("\t")
133
+ len = (rl[4].to_i - rl[3].to_i).abs + 1
134
+ stats[:max_length] = [stats[:max_length] || 0, len].max
135
+ stats[:ssu] += 1
136
+ stats[:complete_ssu] += 1 unless rl[8] =~ /\(partial\)/
137
+ end
138
+ end
139
+ stats
140
+ end
141
+
142
+ def compute_stats_taxonomy
143
+ stats = {}
144
+ File.open(file_path(:intax_test), 'r') do |fh|
145
+ fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
146
+ stats[:closest_relative] = $1
147
+ stats[:aai] = [$2.to_f, '%']
148
+ 3.times { fh.gets }
149
+ fh.each_line do |ln|
150
+ row = ln.chomp.gsub(/^\s*/,'').split(/\s+/)
151
+ break if row.empty?
152
+ stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
153
+ end
154
+ end
155
+ stats
156
+ end
157
+ end