miga-base 0.3.1.3 → 0.3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ class MiGA::Project < MiGA::MiGA
5
+
6
+ class << self
7
+ ##
8
+ # Does the project at +path+ exist?
9
+ def exist?(path)
10
+ Dir.exist?(path) and File.exist?("#{path}/miga.project.json")
11
+ end
12
+
13
+ ##
14
+ # Load the project at +path+. Returns MiGA::Project if project exists, nil
15
+ # otherwise.
16
+ def load(path)
17
+ return nil unless exist? path
18
+ new path
19
+ end
20
+
21
+ def INCLADE_TASKS ; @@INCLADE_TASKS ; end
22
+ def DISTANCE_TASKS ; @@DISTANCE_TASKS ; end
23
+ def KNOWN_TYPES ; @@KNOWN_TYPES ; end
24
+ def RESULT_DIRS ; @@RESULT_DIRS ; end
25
+
26
+ end
27
+
28
+ end
29
+
30
+ module MiGA::Project::Base
31
+
32
+ ##
33
+ # Top-level folders inside a project.
34
+ @@FOLDERS = %w[data metadata daemon]
35
+
36
+ ##
37
+ # Folders for results.
38
+ @@DATA_FOLDERS = %w[
39
+ 01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
40
+ 05.assembly 06.cds
41
+ 07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
42
+ 07.annotation/01.function/01.essential
43
+ 07.annotation/01.function/02.ssu
44
+ 07.annotation/02.taxonomy/01.mytaxa
45
+ 07.annotation/03.qa 07.annotation/03.qa/01.checkm
46
+ 07.annotation/03.qa/02.mytaxa_scan
47
+ 08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
48
+ 09.distances 09.distances/01.haai 09.distances/02.aai
49
+ 09.distances/03.ani 09.distances/04.ssu 09.distances/05.taxonomy
50
+ 10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
51
+ 10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
52
+ 10.clades/04.phylogeny/02.core 10.clades/05.metadata
53
+ 90.stats
54
+ ]
55
+
56
+ ##
57
+ # Directories containing the results from project-wide tasks.
58
+ @@RESULT_DIRS = {
59
+ project_stats: "90.stats",
60
+ # Distances
61
+ haai_distances: "09.distances/01.haai",
62
+ aai_distances: "09.distances/02.aai",
63
+ ani_distances: "09.distances/03.ani",
64
+ #ssu_distances: "09.distances/04.ssu",
65
+ # Clade identification
66
+ clade_finding: "10.clades/01.find",
67
+ # Clade analysis
68
+ subclades: "10.clades/02.ani",
69
+ ogs: "10.clades/03.ogs"
70
+ #ess_phylogeny: "10.clades/04.phylogeny/01.essential",
71
+ #core_phylogeny: "10.clades/04.phylogeny/02.core",
72
+ #clade_metadata: "10.clades/05.metadata"
73
+ }
74
+
75
+ ##
76
+ # Supported types of projects.
77
+ @@KNOWN_TYPES = {
78
+ mixed: {
79
+ description: "Mixed collection of genomes, metagenomes, and viromes.",
80
+ single: true, multi: true},
81
+ genomes: {description: "Collection of genomes.",
82
+ single: true, multi: false},
83
+ clade: {description: "Collection of closely-related genomes (ANI >= 90%).",
84
+ single: true, multi: false},
85
+ metagenomes: {description: "Collection of metagenomes and/or viromes.",
86
+ single: false, multi: true}
87
+ }
88
+
89
+ ##
90
+ # Project-wide distance estimations.
91
+ @@DISTANCE_TASKS = [:project_stats,
92
+ :haai_distances, :aai_distances, :ani_distances, :clade_finding]
93
+
94
+ ##
95
+ # Project-wide tasks for :clade projects.
96
+ @@INCLADE_TASKS = [:subclades, :ogs]
97
+
98
+ end
99
+
@@ -0,0 +1,148 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ ##
5
+ # Helper module including specific functions handle datasets.
6
+ module MiGA::Project::Dataset
7
+
8
+
9
+ ##
10
+ # Returns Array of MiGA::Dataset.
11
+ def datasets
12
+ metadata[:datasets].map{ |name| dataset(name) }
13
+ end
14
+
15
+ ##
16
+ # Returns Array of String (without evaluating dataset objects).
17
+ def dataset_names
18
+ metadata[:datasets]
19
+ end
20
+
21
+ ##
22
+ # Returns MiGA::Dataset.
23
+ def dataset(name)
24
+ name = name.miga_name
25
+ return nil unless MiGA::Dataset.exist?(self, name)
26
+ @datasets ||= {}
27
+ @datasets[name] ||= MiGA::Dataset.new(self, name)
28
+ @datasets[name]
29
+ end
30
+
31
+ ##
32
+ # Iterate through datasets, with one or two variables passed to +blk+.
33
+ # If one, the dataset MiGA::Dataset object is passed. If two, the name and
34
+ # the dataset object are passed.
35
+ def each_dataset(&blk)
36
+ metadata[:datasets].each do |name|
37
+ if blk.arity == 1
38
+ blk.call(dataset(name))
39
+ else
40
+ blk.call(name, dataset(name))
41
+ end
42
+ end
43
+ end
44
+
45
+ ##
46
+ # Add dataset identified by +name+ and return MiGA::Dataset.
47
+ def add_dataset(name)
48
+ unless metadata[:datasets].include? name
49
+ MiGA::Dataset.new(self, name)
50
+ @metadata[:datasets] << name
51
+ save
52
+ end
53
+ dataset(name)
54
+ end
55
+
56
+ ##
57
+ # Unlink dataset identified by +name+ and return MiGA::Dataset.
58
+ def unlink_dataset(name)
59
+ d = dataset(name)
60
+ return nil if d.nil?
61
+ self.metadata[:datasets].delete(name)
62
+ save
63
+ d
64
+ end
65
+
66
+ ##
67
+ # Import the dataset +ds+, a MiGA::Dataset, using +method+ which is any method
68
+ # supported by File#generic_transfer.
69
+ def import_dataset(ds, method=:hardlink)
70
+ raise "Impossible to import dataset, it already exists: #{ds.name}." if
71
+ MiGA::Dataset.exist?(self, ds.name)
72
+ # Import dataset results
73
+ ds.each_result do |task, result|
74
+ # import result files
75
+ result.each_file do |file|
76
+ File.generic_transfer("#{result.dir}/#{file}",
77
+ "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/#{file}", method)
78
+ end
79
+ # import result metadata
80
+ %w(json start done).each do |suffix|
81
+ if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
82
+ File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
83
+ "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/" +
84
+ "#{ds.name}.#{suffix}", method)
85
+ end
86
+ end
87
+ end
88
+ # Import dataset metadata
89
+ File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
90
+ "#{self.path}/metadata/#{ds.name}.json", method)
91
+ # Save dataset
92
+ self.add_dataset(ds.name)
93
+ end
94
+
95
+ ##
96
+ # Find all datasets with (potential) result files but are yet unregistered.
97
+ def unregistered_datasets
98
+ datasets = []
99
+ MiGA::Dataset.RESULT_DIRS.values.each do |dir|
100
+ dir_p = "#{path}/data/#{dir}"
101
+ next unless Dir.exist? dir_p
102
+ Dir.entries(dir_p).each do |file|
103
+ next unless
104
+ file =~ %r{
105
+ \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
106
+ }x
107
+ m = /([^\.]+)/.match(file)
108
+ datasets << m[1] unless m.nil? or m[1] == "miga-project"
109
+ end
110
+ end
111
+ datasets.uniq - metadata[:datasets]
112
+ end
113
+
114
+ ##
115
+ # Are all the datasets in the project preprocessed? Save intermediate results
116
+ # if +save+ (until the first incomplete dataset is reached).
117
+ def done_preprocessing?(save=true)
118
+ dataset_names.each do |dn|
119
+ ds = dataset(dn)
120
+ return false if ds.is_ref? and not ds.done_preprocessing?(save)
121
+ end
122
+ true
123
+ end
124
+
125
+ ##
126
+ # Returns a two-dimensional matrix (Array of Array) where the first index
127
+ # corresponds to the dataset, the second index corresponds to the dataset
128
+ # task, and the value corresponds to:
129
+ # - 0: Before execution.
130
+ # - 1: Done (or not required).
131
+ # - 2: To do.
132
+ def profile_datasets_advance
133
+ advance = []
134
+ self.each_dataset_profile_advance do |ds_adv|
135
+ advance << ds_adv
136
+ end
137
+ advance
138
+ end
139
+
140
+ ##
141
+ # Call +blk+ passing the result of MiGA::Dataset#profile_advance for each
142
+ # registered dataset.
143
+ def each_dataset_profile_advance(&blk)
144
+ each_dataset { |ds| blk.call(ds.profile_advance) }
145
+ end
146
+
147
+ end
148
+
@@ -0,0 +1,41 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ ##
5
+ # Helper module including specific functions handle plugins.
6
+ module MiGA::Project::Plugins
7
+
8
+ ##
9
+ # Installs the plugin in the specified path.
10
+ def install_plugin(path)
11
+ abs_path = File.absolute_path(path)
12
+ raise "Plugin already installed in project: #{abs_path}." unless
13
+ metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
14
+ raise "Malformed MiGA plugin: #{abs_path}." unless
15
+ File.exist?(File.expand_path("miga-plugin.json", abs_path))
16
+ self.metadata[:plugins] ||= []
17
+ self.metadata[:plugins] << abs_path
18
+ save
19
+ end
20
+
21
+ ##
22
+ # Uninstall the plugin in the specified path.
23
+ def uninstall_plugin(path)
24
+ abs_path = File.absolute_path(path)
25
+ raise "Plugin not currently installed: #{abs_path}." if
26
+ metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
27
+ self.metadata[:plugins].delete(abs_path)
28
+ save
29
+ end
30
+
31
+ ##
32
+ # List plugins installed in the project.
33
+ def plugins ; metadata[:plugins] ||= [] ; end
34
+
35
+ ##
36
+ # Loads the plugins installed in the project.
37
+ def load_plugins
38
+ plugins.each { |pl| require File.expand_path("lib-plugin.rb", pl) }
39
+ end
40
+
41
+ end
@@ -1,10 +1,75 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
+ require "miga/result"
5
+ require "miga/project/base"
6
+
4
7
  ##
5
8
  # Helper module including specific functions to add project results.
6
- module MiGA::ProjectResult
9
+ module MiGA::Project::Result
10
+
11
+ include MiGA::Project::Base
12
+
13
+ ##
14
+ # Get result identified by Symbol +name+, returns MiGA::Result.
15
+ def result(name)
16
+ dir = @@RESULT_DIRS[name.to_sym]
17
+ return nil if dir.nil?
18
+ MiGA::Result.load("#{path}/data/#{dir}/miga-project.json")
19
+ end
7
20
 
21
+ ##
22
+ # Get all results, an Array of MiGA::Result.
23
+ def results
24
+ @@RESULT_DIRS.keys.map{ |k| result(k) }.reject{ |r| r.nil? }
25
+ end
26
+
27
+ ##
28
+ # Add the result identified by Symbol +name+, and return MiGA::Result. Save
29
+ # the result if +save+. The +opts+ hash controls result creation (if
30
+ # necessary).
31
+ # Supported values include:
32
+ # - +force+: A Boolean indicating if the result must be re-indexed. If true,
33
+ # it implies save=true.
34
+ def add_result(name, save=true, opts={})
35
+ return nil if @@RESULT_DIRS[name].nil?
36
+ base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
37
+ unless opts[:force]
38
+ r_pre = MiGA::Result.load("#{base}.json")
39
+ return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
40
+ end
41
+ r = result_files_exist?(base, ".done") ?
42
+ send("add_result_#{name}", base) : nil
43
+ r.save unless r.nil?
44
+ r
45
+ end
46
+
47
+ ##
48
+ # Get the next distances task, saving intermediate results if +save+. Returns
49
+ # a Symbol.
50
+ def next_distances(save=true) ; next_task(@@DISTANCE_TASKS, save) ; end
51
+
52
+ ##
53
+ # Get the next inclade task, saving intermediate results if +save+. Returns a
54
+ # Symbol.
55
+ def next_inclade(save=true) ; next_task(@@INCLADE_TASKS, save) ; end
56
+
57
+ ##
58
+ # Get the next task from +tasks+, saving intermediate results if +save+.
59
+ # Returns a Symbol.
60
+ def next_task(tasks=@@DISTANCE_TASKS+@@INCLADE_TASKS, save=true)
61
+ tasks.find do |t|
62
+ if metadata["run_#{t}"]==false or
63
+ (!is_clade? and @@INCLADE_TASKS.include?(t) and
64
+ metadata["run_#{t}"]!=true)
65
+ false
66
+ else
67
+ add_result(t, save).nil?
68
+ end
69
+ end
70
+ end
71
+
72
+
8
73
  private
9
74
 
10
75
  ##
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 1, 3]
13
+ VERSION = [0.3, 1, 4]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
data/scripts/cds.bash CHANGED
@@ -11,6 +11,13 @@ cd "$PROJECT/data/06.cds"
11
11
  # Initialize
12
12
  miga date > "$DATASET.start"
13
13
 
14
+ # Gunzip (if necessary)
15
+ if [[ -e "../05.assembly/$DATASET.LargeContigs.fna.gz" \
16
+ && ! -e "../05.assembly/$DATASET.LargeContigs.fna" ]] ; then
17
+ gzip -d "../05.assembly/$DATASET.LargeContigs.fna.gz"
18
+ miga add_result -P "$PROJECT" -D "$DATASET" -r assembly -f
19
+ fi
20
+
14
21
  # Run Prodigal
15
22
  TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
16
23
  --metadata "type" | awk '{print $2}')
@@ -28,14 +28,16 @@ class RemoteDatasetTest < Test::Unit::TestCase
28
28
  assert_raise { MiGA::RemoteDataset.new("ids", :google, :ebi) }
29
29
  end
30
30
 
31
- def test_ebi
31
+ def test_rest
32
32
  hiv2 = "M30502.1"
33
- rd = MiGA::RemoteDataset.new(hiv2, :embl, :ebi)
34
- assert_equal([hiv2], rd.ids)
35
- omit_if(!$remote_tests, "Remote access is error-prone.")
36
- tx = rd.get_ncbi_taxonomy
37
- assert_equal(MiGA::Taxonomy, tx.class)
38
- assert_equal("Lentivirus", tx[:g])
33
+ {embl: :ebi, nuccore: :ncbi}.each do |db, universe|
34
+ rd = MiGA::RemoteDataset.new(hiv2, db, universe)
35
+ assert_equal([hiv2], rd.ids)
36
+ omit_if(!$remote_tests, "Remote access is error-prone.")
37
+ tx = rd.get_ncbi_taxonomy
38
+ assert_equal(MiGA::Taxonomy, tx.class, "Failed on #{universe}:#{db}")
39
+ assert_equal("Lentivirus", tx[:g], "Failed on #{universe}:#{db}")
40
+ end
39
41
  end
40
42
 
41
43
  def test_net_ftp
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'miga'
4
+
5
+ $project = MiGA::Project.load(ARGV.shift)
6
+ $dataset = $project.dataset(ARGV.shift)
7
+ opts = Hash[ARGV]
8
+
9
+ exit(0) if dataset.is_multi?
10
+
11
+ if dataset.is_ref?
12
+ require_relative 'distances/ref-nomulti.rb'
13
+ else
14
+ require_relative 'distances/noref-nomulti.rb'
15
+ end
16
+ # TODO run_distances!!!
@@ -0,0 +1,58 @@
1
+
2
+ require 'sqlite3'
3
+
4
+ $opts = {}
5
+ if ENV["MIGA_AAI_SAVE_RBM"].nil?
6
+ $opts[:aai_save_rbm] = $project.is_clade? ? "save-rbm" : "no-save-rbm"
7
+ else
8
+ $opts[:aai_save_rbm] = ENV["MIGA_AAI_SAVE_RBM"]
9
+ end
10
+ $opts[:thr] = ENV["CORES"].nil? ? 2 : ENV["CORES"].to_i
11
+
12
+ def ani(f1, f2, db, opts={})
13
+ opts = $opts.merge(opts)
14
+ v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" --name1 "#{ds_name f1}" --name2 "#{ds_name f2}" \
15
+ -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm --lookup-first`
16
+ v.nil? or v.empty? ? 0 : v.to_f
17
+ end
18
+
19
+ def make_empty_aai_db(db)
20
+ SQLite3::Database.new(db) do |conn|
21
+ conn.execute "create table if not exists aai(" +
22
+ "seq1 varchar(256), seq2 varchar(256), " +
23
+ "aai float, sd float, n int omega int" +
24
+ ")"
25
+ end unless File.size?(db)
26
+ end
27
+
28
+ def aai(f1, f2, db, opts={})
29
+ opts = $opts.merge(opts)
30
+ v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" --name1 "#{ds_name f1}" --name2 "#{ds_name f2}" \
31
+ -t "#{opts[:thr]}" -a --lookup-first "--#{opts[:aai_save_rbm]}"`.chomp
32
+ v.nil? or v.empty? ? 0 : v.to_f
33
+ end
34
+
35
+ def haai(f1, f2, db, aai_db, opts={})
36
+ opts = $opts.merge(opts)
37
+ haai = aai(f1, f2, db, aai_save_rbm: "no-save-rbm")
38
+ return 0 if haai.nil? or haai == 0 or haai > 90.0
39
+ aai = 100.0 - Math.exp(2.435076 + 0.4275193*Math.log(100.0-haai))
40
+ make_empty_aai_db(aai_db)
41
+ SQLite3::Database.new(db) do |conn|
42
+ conn.execute "insert into aai values(?, ?, ?, 0, 0, 0)",
43
+ [ds_name(f1), ds_name(f2), aai]
44
+ end
45
+ aai
46
+ end
47
+
48
+ def haai_or_aai(f1_h, f2_h, db_h, f1, f2, db, opts={})
49
+ haai=haai(f1_h, f2_h, db_h, db, opts)
50
+ aai = aai(f1, f2, db, opts) if aai.nil? or aai.zero?
51
+ aai
52
+ end
53
+
54
+ def val_from_db(n1, n2, db, metric)
55
+ SQLite3::Database.new(db) do |conn|
56
+ return conn.execute("select #{metric} from #{metric} where seq1=? and seq2=?", [n1, n2]).first.first
57
+ end if File.size? db
58
+ end