miga-base 0.3.1.3 → 0.3.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,99 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ class MiGA::Project < MiGA::MiGA
5
+
6
+ class << self
7
+ ##
8
+ # Does the project at +path+ exist?
9
+ def exist?(path)
10
+ Dir.exist?(path) and File.exist?("#{path}/miga.project.json")
11
+ end
12
+
13
+ ##
14
+ # Load the project at +path+. Returns MiGA::Project if project exists, nil
15
+ # otherwise.
16
+ def load(path)
17
+ return nil unless exist? path
18
+ new path
19
+ end
20
+
21
+ def INCLADE_TASKS ; @@INCLADE_TASKS ; end
22
+ def DISTANCE_TASKS ; @@DISTANCE_TASKS ; end
23
+ def KNOWN_TYPES ; @@KNOWN_TYPES ; end
24
+ def RESULT_DIRS ; @@RESULT_DIRS ; end
25
+
26
+ end
27
+
28
+ end
29
+
30
+ module MiGA::Project::Base
31
+
32
+ ##
33
+ # Top-level folders inside a project.
34
+ @@FOLDERS = %w[data metadata daemon]
35
+
36
+ ##
37
+ # Folders for results.
38
+ @@DATA_FOLDERS = %w[
39
+ 01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
40
+ 05.assembly 06.cds
41
+ 07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
42
+ 07.annotation/01.function/01.essential
43
+ 07.annotation/01.function/02.ssu
44
+ 07.annotation/02.taxonomy/01.mytaxa
45
+ 07.annotation/03.qa 07.annotation/03.qa/01.checkm
46
+ 07.annotation/03.qa/02.mytaxa_scan
47
+ 08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
48
+ 09.distances 09.distances/01.haai 09.distances/02.aai
49
+ 09.distances/03.ani 09.distances/04.ssu 09.distances/05.taxonomy
50
+ 10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
51
+ 10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
52
+ 10.clades/04.phylogeny/02.core 10.clades/05.metadata
53
+ 90.stats
54
+ ]
55
+
56
+ ##
57
+ # Directories containing the results from project-wide tasks.
58
+ @@RESULT_DIRS = {
59
+ project_stats: "90.stats",
60
+ # Distances
61
+ haai_distances: "09.distances/01.haai",
62
+ aai_distances: "09.distances/02.aai",
63
+ ani_distances: "09.distances/03.ani",
64
+ #ssu_distances: "09.distances/04.ssu",
65
+ # Clade identification
66
+ clade_finding: "10.clades/01.find",
67
+ # Clade analysis
68
+ subclades: "10.clades/02.ani",
69
+ ogs: "10.clades/03.ogs"
70
+ #ess_phylogeny: "10.clades/04.phylogeny/01.essential",
71
+ #core_phylogeny: "10.clades/04.phylogeny/02.core",
72
+ #clade_metadata: "10.clades/05.metadata"
73
+ }
74
+
75
+ ##
76
+ # Supported types of projects.
77
+ @@KNOWN_TYPES = {
78
+ mixed: {
79
+ description: "Mixed collection of genomes, metagenomes, and viromes.",
80
+ single: true, multi: true},
81
+ genomes: {description: "Collection of genomes.",
82
+ single: true, multi: false},
83
+ clade: {description: "Collection of closely-related genomes (ANI >= 90%).",
84
+ single: true, multi: false},
85
+ metagenomes: {description: "Collection of metagenomes and/or viromes.",
86
+ single: false, multi: true}
87
+ }
88
+
89
+ ##
90
+ # Project-wide distance estimations.
91
+ @@DISTANCE_TASKS = [:project_stats,
92
+ :haai_distances, :aai_distances, :ani_distances, :clade_finding]
93
+
94
+ ##
95
+ # Project-wide tasks for :clade projects.
96
+ @@INCLADE_TASKS = [:subclades, :ogs]
97
+
98
+ end
99
+
@@ -0,0 +1,148 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ ##
5
+ # Helper module including specific functions handle datasets.
6
+ module MiGA::Project::Dataset
7
+
8
+
9
+ ##
10
+ # Returns Array of MiGA::Dataset.
11
+ def datasets
12
+ metadata[:datasets].map{ |name| dataset(name) }
13
+ end
14
+
15
+ ##
16
+ # Returns Array of String (without evaluating dataset objects).
17
+ def dataset_names
18
+ metadata[:datasets]
19
+ end
20
+
21
+ ##
22
+ # Returns MiGA::Dataset.
23
+ def dataset(name)
24
+ name = name.miga_name
25
+ return nil unless MiGA::Dataset.exist?(self, name)
26
+ @datasets ||= {}
27
+ @datasets[name] ||= MiGA::Dataset.new(self, name)
28
+ @datasets[name]
29
+ end
30
+
31
+ ##
32
+ # Iterate through datasets, with one or two variables passed to +blk+.
33
+ # If one, the dataset MiGA::Dataset object is passed. If two, the name and
34
+ # the dataset object are passed.
35
+ def each_dataset(&blk)
36
+ metadata[:datasets].each do |name|
37
+ if blk.arity == 1
38
+ blk.call(dataset(name))
39
+ else
40
+ blk.call(name, dataset(name))
41
+ end
42
+ end
43
+ end
44
+
45
+ ##
46
+ # Add dataset identified by +name+ and return MiGA::Dataset.
47
+ def add_dataset(name)
48
+ unless metadata[:datasets].include? name
49
+ MiGA::Dataset.new(self, name)
50
+ @metadata[:datasets] << name
51
+ save
52
+ end
53
+ dataset(name)
54
+ end
55
+
56
+ ##
57
+ # Unlink dataset identified by +name+ and return MiGA::Dataset.
58
+ def unlink_dataset(name)
59
+ d = dataset(name)
60
+ return nil if d.nil?
61
+ self.metadata[:datasets].delete(name)
62
+ save
63
+ d
64
+ end
65
+
66
+ ##
67
+ # Import the dataset +ds+, a MiGA::Dataset, using +method+ which is any method
68
+ # supported by File#generic_transfer.
69
+ def import_dataset(ds, method=:hardlink)
70
+ raise "Impossible to import dataset, it already exists: #{ds.name}." if
71
+ MiGA::Dataset.exist?(self, ds.name)
72
+ # Import dataset results
73
+ ds.each_result do |task, result|
74
+ # import result files
75
+ result.each_file do |file|
76
+ File.generic_transfer("#{result.dir}/#{file}",
77
+ "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/#{file}", method)
78
+ end
79
+ # import result metadata
80
+ %w(json start done).each do |suffix|
81
+ if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
82
+ File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
83
+ "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/" +
84
+ "#{ds.name}.#{suffix}", method)
85
+ end
86
+ end
87
+ end
88
+ # Import dataset metadata
89
+ File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
90
+ "#{self.path}/metadata/#{ds.name}.json", method)
91
+ # Save dataset
92
+ self.add_dataset(ds.name)
93
+ end
94
+
95
+ ##
96
+ # Find all datasets with (potential) result files but are yet unregistered.
97
+ def unregistered_datasets
98
+ datasets = []
99
+ MiGA::Dataset.RESULT_DIRS.values.each do |dir|
100
+ dir_p = "#{path}/data/#{dir}"
101
+ next unless Dir.exist? dir_p
102
+ Dir.entries(dir_p).each do |file|
103
+ next unless
104
+ file =~ %r{
105
+ \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
106
+ }x
107
+ m = /([^\.]+)/.match(file)
108
+ datasets << m[1] unless m.nil? or m[1] == "miga-project"
109
+ end
110
+ end
111
+ datasets.uniq - metadata[:datasets]
112
+ end
113
+
114
+ ##
115
+ # Are all the datasets in the project preprocessed? Save intermediate results
116
+ # if +save+ (until the first incomplete dataset is reached).
117
+ def done_preprocessing?(save=true)
118
+ dataset_names.each do |dn|
119
+ ds = dataset(dn)
120
+ return false if ds.is_ref? and not ds.done_preprocessing?(save)
121
+ end
122
+ true
123
+ end
124
+
125
+ ##
126
+ # Returns a two-dimensional matrix (Array of Array) where the first index
127
+ # corresponds to the dataset, the second index corresponds to the dataset
128
+ # task, and the value corresponds to:
129
+ # - 0: Before execution.
130
+ # - 1: Done (or not required).
131
+ # - 2: To do.
132
+ def profile_datasets_advance
133
+ advance = []
134
+ self.each_dataset_profile_advance do |ds_adv|
135
+ advance << ds_adv
136
+ end
137
+ advance
138
+ end
139
+
140
+ ##
141
+ # Call +blk+ passing the result of MiGA::Dataset#profile_advance for each
142
+ # registered dataset.
143
+ def each_dataset_profile_advance(&blk)
144
+ each_dataset { |ds| blk.call(ds.profile_advance) }
145
+ end
146
+
147
+ end
148
+
@@ -0,0 +1,41 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ ##
5
+ # Helper module including specific functions handle plugins.
6
+ module MiGA::Project::Plugins
7
+
8
+ ##
9
+ # Installs the plugin in the specified path.
10
+ def install_plugin(path)
11
+ abs_path = File.absolute_path(path)
12
+ raise "Plugin already installed in project: #{abs_path}." unless
13
+ metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
14
+ raise "Malformed MiGA plugin: #{abs_path}." unless
15
+ File.exist?(File.expand_path("miga-plugin.json", abs_path))
16
+ self.metadata[:plugins] ||= []
17
+ self.metadata[:plugins] << abs_path
18
+ save
19
+ end
20
+
21
+ ##
22
+ # Uninstall the plugin in the specified path.
23
+ def uninstall_plugin(path)
24
+ abs_path = File.absolute_path(path)
25
+ raise "Plugin not currently installed: #{abs_path}." if
26
+ metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
27
+ self.metadata[:plugins].delete(abs_path)
28
+ save
29
+ end
30
+
31
+ ##
32
+ # List plugins installed in the project.
33
+ def plugins ; metadata[:plugins] ||= [] ; end
34
+
35
+ ##
36
+ # Loads the plugins installed in the project.
37
+ def load_plugins
38
+ plugins.each { |pl| require File.expand_path("lib-plugin.rb", pl) }
39
+ end
40
+
41
+ end
@@ -1,10 +1,75 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
+ require "miga/result"
5
+ require "miga/project/base"
6
+
4
7
  ##
5
8
  # Helper module including specific functions to add project results.
6
- module MiGA::ProjectResult
9
+ module MiGA::Project::Result
10
+
11
+ include MiGA::Project::Base
12
+
13
+ ##
14
+ # Get result identified by Symbol +name+, returns MiGA::Result.
15
+ def result(name)
16
+ dir = @@RESULT_DIRS[name.to_sym]
17
+ return nil if dir.nil?
18
+ MiGA::Result.load("#{path}/data/#{dir}/miga-project.json")
19
+ end
7
20
 
21
+ ##
22
+ # Get all results, an Array of MiGA::Result.
23
+ def results
24
+ @@RESULT_DIRS.keys.map{ |k| result(k) }.reject{ |r| r.nil? }
25
+ end
26
+
27
+ ##
28
+ # Add the result identified by Symbol +name+, and return MiGA::Result. Save
29
+ # the result if +save+. The +opts+ hash controls result creation (if
30
+ # necessary).
31
+ # Supported values include:
32
+ # - +force+: A Boolean indicating if the result must be re-indexed. If true,
33
+ # it implies save=true.
34
+ def add_result(name, save=true, opts={})
35
+ return nil if @@RESULT_DIRS[name].nil?
36
+ base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
37
+ unless opts[:force]
38
+ r_pre = MiGA::Result.load("#{base}.json")
39
+ return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
40
+ end
41
+ r = result_files_exist?(base, ".done") ?
42
+ send("add_result_#{name}", base) : nil
43
+ r.save unless r.nil?
44
+ r
45
+ end
46
+
47
+ ##
48
+ # Get the next distances task, saving intermediate results if +save+. Returns
49
+ # a Symbol.
50
+ def next_distances(save=true) ; next_task(@@DISTANCE_TASKS, save) ; end
51
+
52
+ ##
53
+ # Get the next inclade task, saving intermediate results if +save+. Returns a
54
+ # Symbol.
55
+ def next_inclade(save=true) ; next_task(@@INCLADE_TASKS, save) ; end
56
+
57
+ ##
58
+ # Get the next task from +tasks+, saving intermediate results if +save+.
59
+ # Returns a Symbol.
60
+ def next_task(tasks=@@DISTANCE_TASKS+@@INCLADE_TASKS, save=true)
61
+ tasks.find do |t|
62
+ if metadata["run_#{t}"]==false or
63
+ (!is_clade? and @@INCLADE_TASKS.include?(t) and
64
+ metadata["run_#{t}"]!=true)
65
+ false
66
+ else
67
+ add_result(t, save).nil?
68
+ end
69
+ end
70
+ end
71
+
72
+
8
73
  private
9
74
 
10
75
  ##
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 1, 3]
13
+ VERSION = [0.3, 1, 4]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
data/scripts/cds.bash CHANGED
@@ -11,6 +11,13 @@ cd "$PROJECT/data/06.cds"
11
11
  # Initialize
12
12
  miga date > "$DATASET.start"
13
13
 
14
+ # Gunzip (if necessary)
15
+ if [[ -e "../05.assembly/$DATASET.LargeContigs.fna.gz" \
16
+ && ! -e "../05.assembly/$DATASET.LargeContigs.fna" ]] ; then
17
+ gzip -d "../05.assembly/$DATASET.LargeContigs.fna.gz"
18
+ miga add_result -P "$PROJECT" -D "$DATASET" -r assembly -f
19
+ fi
20
+
14
21
  # Run Prodigal
15
22
  TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
16
23
  --metadata "type" | awk '{print $2}')
@@ -28,14 +28,16 @@ class RemoteDatasetTest < Test::Unit::TestCase
28
28
  assert_raise { MiGA::RemoteDataset.new("ids", :google, :ebi) }
29
29
  end
30
30
 
31
- def test_ebi
31
+ def test_rest
32
32
  hiv2 = "M30502.1"
33
- rd = MiGA::RemoteDataset.new(hiv2, :embl, :ebi)
34
- assert_equal([hiv2], rd.ids)
35
- omit_if(!$remote_tests, "Remote access is error-prone.")
36
- tx = rd.get_ncbi_taxonomy
37
- assert_equal(MiGA::Taxonomy, tx.class)
38
- assert_equal("Lentivirus", tx[:g])
33
+ {embl: :ebi, nuccore: :ncbi}.each do |db, universe|
34
+ rd = MiGA::RemoteDataset.new(hiv2, db, universe)
35
+ assert_equal([hiv2], rd.ids)
36
+ omit_if(!$remote_tests, "Remote access is error-prone.")
37
+ tx = rd.get_ncbi_taxonomy
38
+ assert_equal(MiGA::Taxonomy, tx.class, "Failed on #{universe}:#{db}")
39
+ assert_equal("Lentivirus", tx[:g], "Failed on #{universe}:#{db}")
40
+ end
39
41
  end
40
42
 
41
43
  def test_net_ftp
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'miga'
4
+
5
+ $project = MiGA::Project.load(ARGV.shift)
6
+ $dataset = $project.dataset(ARGV.shift)
7
+ opts = Hash[ARGV]
8
+
9
+ exit(0) if dataset.is_multi?
10
+
11
+ if dataset.is_ref?
12
+ require_relative 'distances/ref-nomulti.rb'
13
+ else
14
+ require_relative 'distances/noref-nomulti.rb'
15
+ end
16
+ # TODO run_distances!!!
@@ -0,0 +1,58 @@
1
+
2
+ require 'sqlite3'
3
+
4
+ $opts = {}
5
+ if ENV["MIGA_AAI_SAVE_RBM"].nil?
6
+ $opts[:aai_save_rbm] = $project.is_clade? ? "save-rbm" : "no-save-rbm"
7
+ else
8
+ $opts[:aai_save_rbm] = ENV["MIGA_AAI_SAVE_RBM"]
9
+ end
10
+ $opts[:thr] = ENV["CORES"].nil? ? 2 : ENV["CORES"].to_i
11
+
12
+ def ani(f1, f2, db, opts={})
13
+ opts = $opts.merge(opts)
14
+ v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" --name1 "#{ds_name f1}" --name2 "#{ds_name f2}" \
15
+ -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm --lookup-first`
16
+ v.nil? or v.empty? ? 0 : v.to_f
17
+ end
18
+
19
+ def make_empty_aai_db(db)
20
+ SQLite3::Database.new(db) do |conn|
21
+ conn.execute "create table if not exists aai(" +
22
+ "seq1 varchar(256), seq2 varchar(256), " +
23
+ "aai float, sd float, n int omega int" +
24
+ ")"
25
+ end unless File.size?(db)
26
+ end
27
+
28
+ def aai(f1, f2, db, opts={})
29
+ opts = $opts.merge(opts)
30
+ v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" --name1 "#{ds_name f1}" --name2 "#{ds_name f2}" \
31
+ -t "#{opts[:thr]}" -a --lookup-first "--#{opts[:aai_save_rbm]}"`.chomp
32
+ v.nil? or v.empty? ? 0 : v.to_f
33
+ end
34
+
35
+ def haai(f1, f2, db, aai_db, opts={})
36
+ opts = $opts.merge(opts)
37
+ haai = aai(f1, f2, db, aai_save_rbm: "no-save-rbm")
38
+ return 0 if haai.nil? or haai == 0 or haai > 90.0
39
+ aai = 100.0 - Math.exp(2.435076 + 0.4275193*Math.log(100.0-haai))
40
+ make_empty_aai_db(aai_db)
41
+ SQLite3::Database.new(db) do |conn|
42
+ conn.execute "insert into aai values(?, ?, ?, 0, 0, 0)",
43
+ [ds_name(f1), ds_name(f2), aai]
44
+ end
45
+ aai
46
+ end
47
+
48
+ def haai_or_aai(f1_h, f2_h, db_h, f1, f2, db, opts={})
49
+ haai=haai(f1_h, f2_h, db_h, db, opts)
50
+ aai = aai(f1, f2, db, opts) if aai.nil? or aai.zero?
51
+ aai
52
+ end
53
+
54
+ def val_from_db(n1, n2, db, metric)
55
+ SQLite3::Database.new(db) do |conn|
56
+ return conn.execute("select #{metric} from #{metric} where seq1=? and seq2=?", [n1, n2]).first.first
57
+ end if File.size? db
58
+ end