rput 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +31 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/lib/experiment.rb +113 -0
- data/lib/starter.rb +307 -0
- data/rput.gemspec +66 -0
- data/test/helper.rb +18 -0
- data/test/test_rput.rb +7 -0
- metadata +167 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "shoulda", ">= 0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "bundler", "~> 1.0.0"
|
12
|
+
gem "jeweler", "~> 1.8.3"
|
13
|
+
gem "rcov", ">= 0"
|
14
|
+
# own dependencies
|
15
|
+
gem "rraxml", "~> 0.1.2"
|
16
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
git (1.2.5)
|
5
|
+
jeweler (1.8.3)
|
6
|
+
bundler (~> 1.0)
|
7
|
+
git (>= 1.2.5)
|
8
|
+
rake
|
9
|
+
rdoc
|
10
|
+
json (1.6.6)
|
11
|
+
rake (0.9.2.2)
|
12
|
+
rcov (1.0.0)
|
13
|
+
rdoc (3.12)
|
14
|
+
json (~> 1.4)
|
15
|
+
rraxml (0.1.2)
|
16
|
+
shoulda (3.0.1)
|
17
|
+
shoulda-context (~> 1.0.0)
|
18
|
+
shoulda-matchers (~> 1.0.0)
|
19
|
+
shoulda-context (1.0.0)
|
20
|
+
shoulda-matchers (1.0.0)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
bundler (~> 1.0.0)
|
27
|
+
jeweler (~> 1.8.3)
|
28
|
+
rcov
|
29
|
+
rdoc (~> 3.12)
|
30
|
+
rraxml (~> 0.1.2)
|
31
|
+
shoulda
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Fernando Izquierdo-Carrasco
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= rput
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Contributing to rput
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
9
|
+
* Fork the project.
|
10
|
+
* Start a feature/bugfix branch.
|
11
|
+
* Commit and push until you are happy with your contribution.
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2012 Fernando Izquierdo-Carrasco. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "rput"
|
18
|
+
gem.homepage = "http://github.com/fizquierdo/rput"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = "Raxml-based perpetually updated tree"
|
21
|
+
gem.description = "Automated iterations of raxml runs over externally-extended phylip files"
|
22
|
+
gem.email = "fer.izquierdo@gmail.com"
|
23
|
+
gem.authors = ["Fernando Izquierdo-Carrasco"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'rcov/rcovtask'
|
36
|
+
Rcov::RcovTask.new do |test|
|
37
|
+
test.libs << 'test'
|
38
|
+
test.pattern = 'test/**/test_*.rb'
|
39
|
+
test.verbose = true
|
40
|
+
test.rcov_opts << '--exclude "gems/*"'
|
41
|
+
end
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rdoc/task'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "rput #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/experiment.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#require 'rphylip'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
class Experiment
|
7
|
+
def initialize(name)
|
8
|
+
@name = name
|
9
|
+
@base_dir = File.join File.expand_path(File.dirname(__FILE__)), '../'
|
10
|
+
end
|
11
|
+
def dirname(a)
|
12
|
+
File.join @base_dir, 'experiments', @name, a
|
13
|
+
end
|
14
|
+
def last_bunch_dir
|
15
|
+
dirs = Dir.entries(dirname("output")).select{|f| f =~ /^bunch/}
|
16
|
+
dirs.sort_by{|s| s.split("_").last.to_i}.last
|
17
|
+
end
|
18
|
+
def setup_dirs
|
19
|
+
setup_ready = true
|
20
|
+
%w(alignment output).map{|n| self.dirname n}.each do |dir|
|
21
|
+
if File.exist?(dir)
|
22
|
+
puts "Exists #{dir}"
|
23
|
+
setup_ready = false # to avoid overwrite
|
24
|
+
else
|
25
|
+
FileUtils.mkdir_p dir
|
26
|
+
end
|
27
|
+
end
|
28
|
+
setup_ready
|
29
|
+
end
|
30
|
+
def expand_with_updates(opts, updates_are_full_alignments = true)
|
31
|
+
ali = self.dirname("alignment")
|
32
|
+
phy = opts[:phylip]
|
33
|
+
raise "phylip file not available" if phy.nil? or not File.exist?(phy)
|
34
|
+
FileUtils.copy phy, ali
|
35
|
+
p = Phylip.new(File.join ali, File.basename(phy))
|
36
|
+
p.subdivide_random opts
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class ExperimentList
|
41
|
+
# Persistent storage in .yml file of the experiment status
|
42
|
+
attr_accessor :name, :date
|
43
|
+
def initialize(expfile)
|
44
|
+
@expfile = expfile
|
45
|
+
File.open(expfile, "w"){|f| f.puts "---"} unless File.exist?(expfile)
|
46
|
+
@list = YAML.load_file(expfile) || Array.new
|
47
|
+
end
|
48
|
+
def add(opts)
|
49
|
+
name_available = self.name_available?(opts[:name])
|
50
|
+
if name_available
|
51
|
+
#newexp = {:name => opts[:name], :date => Time.now, :fake_phy => opts[:fake_phy]}
|
52
|
+
newexp = {:name => opts[:name], :date => Time.now}
|
53
|
+
newexp.merge!({:fake_phy => opts[:fake_phy]}) unless opts[:fake_phy].nil?
|
54
|
+
newexp.merge!({:initial_phy => opts[:initial_phy]}) unless opts[:initial_phy].nil?
|
55
|
+
@list.push newexp
|
56
|
+
self.save
|
57
|
+
true
|
58
|
+
end
|
59
|
+
end
|
60
|
+
def update(name, step, state)
|
61
|
+
e = self.find_by_name(name)
|
62
|
+
unless e.nil?
|
63
|
+
@list.each do |item|
|
64
|
+
if item[:name] == name
|
65
|
+
if item[step].nil?
|
66
|
+
item[step] = state
|
67
|
+
else
|
68
|
+
item[step] += " " + state
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
self.save
|
74
|
+
end
|
75
|
+
def show
|
76
|
+
puts "Current Experiments"
|
77
|
+
puts "ID\tname"
|
78
|
+
@list.each_with_index do |item, i|
|
79
|
+
fake_name = item[:fake_phy].nil? ? "-" : File.basename(item[:fake_phy])
|
80
|
+
initial_name = item[:initial_phy].nil? ? "-" : File.basename(item[:initial_phy])
|
81
|
+
puts "#{i}\t#{item[:name]}\t fake: #{fake_name} \t init: #{initial_name} Best LH: #{item["bestLH"]}"
|
82
|
+
item.keys.select{|k| k =~ /^u\d+$/}.sort.each do |update_key|
|
83
|
+
puts " #{update_key}: #{item[update_key]}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
def remove(name)
|
88
|
+
item = find_by_name(name)
|
89
|
+
unless item.nil?
|
90
|
+
@list.delete(item)
|
91
|
+
self.save
|
92
|
+
end
|
93
|
+
end
|
94
|
+
def save
|
95
|
+
File.open(@expfile, "w"){|f| f.write @list.to_yaml}
|
96
|
+
puts "Experiment list updated"
|
97
|
+
end
|
98
|
+
def name_available?(name)
|
99
|
+
existing_items = find_by_name(name)
|
100
|
+
unless existing_items.nil?
|
101
|
+
if existing_items.size >= 1
|
102
|
+
puts "A expremient with this name exists already"
|
103
|
+
p existing_items
|
104
|
+
return false
|
105
|
+
end
|
106
|
+
end
|
107
|
+
return true
|
108
|
+
end
|
109
|
+
protected
|
110
|
+
def find_by_name(name)
|
111
|
+
@list.find{|e| e[:name] == name}
|
112
|
+
end
|
113
|
+
end
|
data/lib/starter.rb
ADDED
@@ -0,0 +1,307 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
#gem 'rraxml','0.1.2'
|
4
|
+
#require 'rraxml'
|
5
|
+
#require 'rnewick'
|
6
|
+
#require 'rphylip'
|
7
|
+
require 'logger'
|
8
|
+
|
9
|
+
class CycleController
|
10
|
+
def initialize(opts)
|
11
|
+
@opts = opts
|
12
|
+
@opts[:num_ptrees] ||= @opts[:num_parsi_trees]
|
13
|
+
@script = "raxml_batch_cycle.sh"
|
14
|
+
@numtaxa, @seqlen = File.open(@opts[:phy]).readlines.first.split.map{|w| w.to_i}
|
15
|
+
raise "#{@script} missing" unless File.exist?(@script)
|
16
|
+
end
|
17
|
+
def parsimonator_requirements
|
18
|
+
bytes_inner = @numtaxa.to_f * @seqlen.to_f
|
19
|
+
security_factor = 3.0
|
20
|
+
required_MB = bytes_inner * security_factor * 1E-6
|
21
|
+
required_MB = 16 unless required_MB > 16
|
22
|
+
puts required_MB
|
23
|
+
required_MB.to_i.to_s
|
24
|
+
end
|
25
|
+
def raxmllight_requirements
|
26
|
+
#(n-2) * m * ( 8 * 4 )
|
27
|
+
bytes_inner = @numtaxa.to_f * @seqlen.to_f * 8 * 4
|
28
|
+
security_factor = 1.3
|
29
|
+
required_MB = bytes_inner * security_factor * 1E-6
|
30
|
+
required_MB = 16 unless required_MB > 16
|
31
|
+
puts required_MB
|
32
|
+
required_MB.to_i.to_s
|
33
|
+
end
|
34
|
+
def run_as_batch
|
35
|
+
raise "User Number of parsimony trees not set" unless @opts[:num_parsi_trees] > 0
|
36
|
+
raise "Total Number of parsimony trees not set" unless @opts[:num_ptrees] > 0
|
37
|
+
opts = @opts[:iter].to_s
|
38
|
+
opts += " " + File.expand_path(@opts[:phy])
|
39
|
+
opts += " " + parsimonator_requirements
|
40
|
+
opts += " " + raxmllight_requirements
|
41
|
+
opts += " " + @opts[:num_parsi_trees].to_s # -N parameter -p for suer
|
42
|
+
opts += " " + @opts[:num_ptrees].to_s # total number of parsimony trees
|
43
|
+
opts += " " + @opts[:num_bestML_trees].to_s
|
44
|
+
opts += " " + @opts[:exp_name].to_s
|
45
|
+
opts += " " + @opts[:base_dir].to_s
|
46
|
+
puts "./#{@script} #{opts}"
|
47
|
+
system "./#{@script} #{opts}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class TreeBunchStarter
|
52
|
+
# Given an initial alignment, it creates a initial bunch of ML trees in bunch_0 dir
|
53
|
+
# should log results
|
54
|
+
attr_reader :cluster
|
55
|
+
def initialize(opts)
|
56
|
+
@phylip = opts[:phylip]
|
57
|
+
@base_dir = opts[:base_dir]
|
58
|
+
@prev_dir = opts[:prev_dir]
|
59
|
+
@update_id = opts[:update_id] || 0
|
60
|
+
@num_threads = opts[:num_threads] || 0
|
61
|
+
# create dirs if required
|
62
|
+
@alignment_dir = File.join @base_dir, "alignments"
|
63
|
+
@parsimony_trees_dir = File.join @base_dir, "parsimony_trees"
|
64
|
+
@parsimony_trees_out_dir = File.join @parsimony_trees_dir, "output"
|
65
|
+
@ml_trees_dir = File.join @base_dir, "ml_trees"
|
66
|
+
@bestML_trees_dir = File.join @base_dir, "best_ml_trees"
|
67
|
+
# the new phylip
|
68
|
+
@phylip_updated = File.join @alignment_dir, "phy_#{@update_id.to_s}"
|
69
|
+
# defaults
|
70
|
+
@num_parsi_trees = 4
|
71
|
+
@num_bestML_trees = @num_parsi_trees / 2
|
72
|
+
@CAT_topology_bunch = File.join @ml_trees_dir, "CAT_topology_bunch.nw"
|
73
|
+
@CAT_topology_bunch_order = File.join @ml_trees_dir, "CAT_topology_bunch_order.txt"
|
74
|
+
@bestML_bunch = File.join @bestML_trees_dir, "best_bunch.nw"
|
75
|
+
@prev_bestML_bunch = File.join @prev_dir, "best_ml_trees", "best_bunch.nw" unless @prev_dir.nil?
|
76
|
+
@cluster = opts[:cluster] || false
|
77
|
+
end
|
78
|
+
def logput(msg, error = false)
|
79
|
+
@logger ||= Logger.new(File.join @base_dir, "starter.log")
|
80
|
+
if error
|
81
|
+
@logger.error msg
|
82
|
+
else
|
83
|
+
@logger.info msg
|
84
|
+
end
|
85
|
+
puts msg
|
86
|
+
end
|
87
|
+
def ready?
|
88
|
+
ready = true
|
89
|
+
dirs = [@alignment_dir, @parsimony_trees_dir, @parsimony_trees_out_dir,@ml_trees_dir, @bestML_trees_dir]
|
90
|
+
dirs.each do |d|
|
91
|
+
if not File.exist?(d)
|
92
|
+
FileUtils.mkdir_p d
|
93
|
+
logput "Created #{d}"
|
94
|
+
else
|
95
|
+
logput "Exists #{d}"
|
96
|
+
ready = false
|
97
|
+
end
|
98
|
+
end
|
99
|
+
if @update_id == 0
|
100
|
+
FileUtils.cp @phylip, @alignment_dir
|
101
|
+
else
|
102
|
+
logput "Copying new update alignment (not expanding) from #{@phylip} to #{@phylip_updated}"
|
103
|
+
FileUtils.cp @phylip, @phylip_updated
|
104
|
+
end
|
105
|
+
ready
|
106
|
+
end
|
107
|
+
def add_update(opts)
|
108
|
+
check_options(opts)
|
109
|
+
begin
|
110
|
+
num_parsi_trees = opts[:num_parsi_trees] || @num_parsi_trees
|
111
|
+
num_bestML_trees = opts[:num_bestML_trees] || @num_bestML_trees
|
112
|
+
# prepare the parsimony starting trees
|
113
|
+
raise "prev bunch not ready #{@prev_bestML_bunch}" unless File.exist?(@prev_bestML_bunch)
|
114
|
+
last_best_bunch = NewickFile.new(@prev_bestML_bunch)
|
115
|
+
last_best_bunch.save_each_newick_as(File.join(@parsimony_trees_dir, 'prev_parsi_tree'), "nw")
|
116
|
+
prev_trees = Dir.entries(@parsimony_trees_dir).select{|f| f =~ /^prev_parsi_tree/}
|
117
|
+
if num_bestML_trees > num_parsi_trees * prev_trees.size
|
118
|
+
raise "#bestML trees (#{num_bestML_trees}) is too higher than trees from previous round"
|
119
|
+
end
|
120
|
+
if @cluster
|
121
|
+
logput "Exp #{opts[:exp_name]}, your cluster will take care of this update no #{@update_id}. stay tuned"
|
122
|
+
c = CycleController.new(:iter => @update_id,
|
123
|
+
:phy => @phylip_updated,
|
124
|
+
:num_parsi_trees => num_parsi_trees,
|
125
|
+
:num_ptrees => num_parsi_trees * prev_trees.size,
|
126
|
+
:num_bestML_trees => num_bestML_trees,
|
127
|
+
:base_dir => @base_dir,
|
128
|
+
:exp_name => opts[:exp_name]
|
129
|
+
)
|
130
|
+
c.run_as_batch
|
131
|
+
"cluster"
|
132
|
+
else
|
133
|
+
logput "****** Start update no #{@update_id} ********"
|
134
|
+
logput "step 1 of 3 : Parsimony starting trees #{num_parsi_trees} each\n----"
|
135
|
+
update_parsimony_trees(num_parsi_trees, prev_trees)
|
136
|
+
# raxml light phase (2/3): use threads here? / each tree could be computed in paralell
|
137
|
+
logput "step 2 of 3 : ML trees\n----"
|
138
|
+
# generate_ML_trees
|
139
|
+
generate_ML_trees(@parsimony_trees_out_dir, @phylip_updated)
|
140
|
+
# raxml scoring of initial bunch (needs to be done after step 2, or not?)
|
141
|
+
logput "step 3 of 3 : Score bunch of initial ML trees and select best #{num_bestML_trees}\n----"
|
142
|
+
best_lh = score_ML_trees(num_bestML_trees, @phylip_updated)
|
143
|
+
logput "Bunch of initial ML trees #{num_bestML_trees}, ready at #{@bestML_bunch}\n----"
|
144
|
+
best_lh
|
145
|
+
end
|
146
|
+
rescue Exception => e
|
147
|
+
logput(e, error = true)
|
148
|
+
raise e
|
149
|
+
end
|
150
|
+
end
|
151
|
+
def search_std(num_gamma_trees = nil)
|
152
|
+
search_opts = {
|
153
|
+
:phylip => @phylip,
|
154
|
+
:outdir => @ml_trees_dir,
|
155
|
+
:num_gamma_trees => num_gamma_trees || 1,
|
156
|
+
:stderr => File.join(@ml_trees_dir, "err"),
|
157
|
+
:stdout => File.join(@ml_trees_dir, "info"),
|
158
|
+
:name => "std_GAMMA_search"
|
159
|
+
}
|
160
|
+
search_opts.merge!({:num_threads => @num_threads}) if @num_threads.to_i > 0
|
161
|
+
r = RaxmlGammaSearch.new(search_opts)
|
162
|
+
logput "Start ML search from scratch with #{num_gamma_trees} trees"
|
163
|
+
r.run
|
164
|
+
bestLH = File.open(r.stdout).readlines.find{|l| l =~ /^Final GAMMA-based Score of best/}.chomp.split("tree").last
|
165
|
+
logput "Done ML search from scratch with #{num_gamma_trees} trees"
|
166
|
+
bestLH
|
167
|
+
end
|
168
|
+
def generate_initial_bunch(opts)
|
169
|
+
check_options(opts)
|
170
|
+
begin
|
171
|
+
num_parsi_trees = opts[:num_parsi_trees] || @num_parsi_trees
|
172
|
+
num_bestML_trees = opts[:num_bestML_trees] || @num_bestML_trees
|
173
|
+
if num_bestML_trees > num_parsi_trees
|
174
|
+
raise "#bestML trees (#{num_bestML_trees}) cant be higher than #parsi trees(#{num_parsi_trees})"
|
175
|
+
end
|
176
|
+
# phases 2 and 3 can be done in paralell
|
177
|
+
if @cluster
|
178
|
+
logput "your cluster will take care of this start #{@update_id}. Stay tuned"
|
179
|
+
c = CycleController.new(:iter => 0,
|
180
|
+
:phy => @phylip,
|
181
|
+
:num_parsi_trees => num_parsi_trees,
|
182
|
+
:num_bestML_trees => num_bestML_trees,
|
183
|
+
:base_dir => @base_dir,
|
184
|
+
:exp_name => opts[:exp_name]
|
185
|
+
)
|
186
|
+
c.run_as_batch
|
187
|
+
"cluster"
|
188
|
+
else
|
189
|
+
logput "Start generating initial bunch"
|
190
|
+
logput "step 1 of 3 : Parsimony starting trees #{num_parsi_trees}\n----"
|
191
|
+
generate_parsimony_trees(num_parsi_trees)
|
192
|
+
# raxml light phase (2/3): use threads here? / each tree could be computed in paralell
|
193
|
+
logput "step 2 of 3 : ML trees\n----"
|
194
|
+
generate_ML_trees(@parsimony_trees_dir, @phylip)
|
195
|
+
# raxml scoring of initial bunch (needs to be done after step 2, or not?)
|
196
|
+
logput "step 3 of 3 : Score bunch of initial ML trees and select best #{num_bestML_trees}\n----"
|
197
|
+
best_lh = score_ML_trees(num_bestML_trees, @phylip)
|
198
|
+
logput "Bunch of initial ML trees #{num_bestML_trees}, ready at #{@bestML_bunch}\n----"
|
199
|
+
best_lh
|
200
|
+
end
|
201
|
+
rescue Exception => e
|
202
|
+
logput(e, error = true)
|
203
|
+
raise e
|
204
|
+
end
|
205
|
+
end
|
206
|
+
private
|
207
|
+
def check_options(opts)
|
208
|
+
supported_opts = [:num_parsi_trees, :num_bestML_trees, :exp_name]
|
209
|
+
opts.keys.each do |key|
|
210
|
+
unless supported_opts.include?(key)
|
211
|
+
logput "Option #{key} is unknwon"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
def generate_parsimony_trees(num_parsi_trees)
|
216
|
+
logput "Starting parsimony with #{num_parsi_trees} trees"
|
217
|
+
parsimonator_opts = {
|
218
|
+
:phylip => @phylip,
|
219
|
+
:num_trees => num_parsi_trees,
|
220
|
+
:outdir => @parsimony_trees_dir,
|
221
|
+
:stderr => File.join(@parsimony_trees_dir, "err"),
|
222
|
+
:stdout => File.join(@parsimony_trees_dir, "info"),
|
223
|
+
:name => "parsimony_initial"
|
224
|
+
}
|
225
|
+
parsi = Parsimonator.new(parsimonator_opts)
|
226
|
+
logput "Start computing parsimony trees of initial bunch"
|
227
|
+
parsi.run
|
228
|
+
logput "Done with parsimony trees of initial bunch"
|
229
|
+
end
|
230
|
+
def update_parsimony_trees(num_parsi_trees, trees)
|
231
|
+
trees.each_with_index do |parsi_start_tree, i|
|
232
|
+
logput "Starting new parsimony tree with #{parsi_start_tree} trees"
|
233
|
+
parsimonator_opts = {
|
234
|
+
:phylip => @phylip_updated,
|
235
|
+
:num_trees => num_parsi_trees,
|
236
|
+
:newick => File.join(@parsimony_trees_dir, parsi_start_tree),
|
237
|
+
:outdir => @parsimony_trees_out_dir,
|
238
|
+
:stderr => File.join(@parsimony_trees_out_dir, "err_#{parsi_start_tree}"),
|
239
|
+
:stdout => File.join(@parsimony_trees_out_dir, "info_#{parsi_start_tree}"),
|
240
|
+
:name => "u#{@update_id}_#{parsi_start_tree}"
|
241
|
+
}
|
242
|
+
parsi = Parsimonator.new(parsimonator_opts)
|
243
|
+
logput "Start computing parsimony trees of #{parsi_start_tree}, #{i+1} of #{trees.size}"
|
244
|
+
parsi.run
|
245
|
+
logput "run with options #{parsi.ops.to_s}"
|
246
|
+
logput "Done with parsimony trees of #{parsi_start_tree}, #{i+1} of #{trees.size}"
|
247
|
+
end
|
248
|
+
end
|
249
|
+
def generate_ML_trees(starting_trees_dir, phylip)
|
250
|
+
starting_trees = Dir.entries(starting_trees_dir).select{|f| f =~ /^RAxML_parsimonyTree/}
|
251
|
+
raise "no starting trees available" if starting_trees.nil? or starting_trees.size < 1
|
252
|
+
starting_trees.each_with_index do |parsimony_tree, i|
|
253
|
+
# ideally we just submit here to the cluster...and start phase 3 when all are done
|
254
|
+
tree_id = parsimony_tree.split("parsimonyTree.").last
|
255
|
+
light_opts = {
|
256
|
+
:phylip => phylip,
|
257
|
+
:outdir => @ml_trees_dir,
|
258
|
+
:flags => " -D ", # default to a RF convergence criterion
|
259
|
+
:starting_newick => File.join(starting_trees_dir, parsimony_tree),
|
260
|
+
:stderr => File.join(@ml_trees_dir, "err#{tree_id}"),
|
261
|
+
:stdout => File.join(@ml_trees_dir, "info#{tree_id}"),
|
262
|
+
:name => "starting_parsimony_tree_" + tree_id
|
263
|
+
}
|
264
|
+
light_opts.merge!({:num_threads => @num_threads}) if @num_threads.to_i > 0
|
265
|
+
r = RaxmlLight.new(light_opts)
|
266
|
+
logput "Start ML search for #{parsimony_tree} (#{i+1} of #{starting_trees.size})"
|
267
|
+
r.run
|
268
|
+
logput "Done ML search for #{parsimony_tree} (#{i+1} of #{starting_trees.size})"
|
269
|
+
# add the result to the bunch
|
270
|
+
newick_str = NewickFile.new(File.join(r.outdir, "RAxML_result.#{r.name}")).newickStrings[0].str
|
271
|
+
append_to_file(@CAT_topology_bunch, newick_str)
|
272
|
+
append_to_file(@CAT_topology_bunch_order, r.name)
|
273
|
+
end
|
274
|
+
end
|
275
|
+
def score_ML_trees(num_bestML_trees, phylip)
|
276
|
+
logput "Starting scoring of ML trees"
|
277
|
+
scorer_opts = {
|
278
|
+
:phylip => phylip,
|
279
|
+
:outdir => @ml_trees_dir,
|
280
|
+
:starting_newick => @CAT_topology_bunch,
|
281
|
+
:stderr => File.join(@ml_trees_dir, "err_scores"),
|
282
|
+
:stdout => File.join(@ml_trees_dir, "info_scores"),
|
283
|
+
:name => "SCORES"
|
284
|
+
}
|
285
|
+
scorer_opts.merge!({:num_threads => @num_threads}) if @num_threads.to_i > 0
|
286
|
+
scorer = GammaScorer.new(scorer_opts)
|
287
|
+
scorer.run
|
288
|
+
logput "Done scoring of ML trees, selecting the best #{num_bestML_trees}..."
|
289
|
+
rank_file = File.join @ml_trees_dir, "RAxML_info.#{scorer.name}"
|
290
|
+
lh_lines = File.open(rank_file).readlines.select{|l| l =~ /^[0-9]+ -[0-9]+.[0-9]+$/}
|
291
|
+
rank_id = lh_lines.map{|l| l.split.first}
|
292
|
+
best_lh = lh_lines.first.split.last
|
293
|
+
newick_bunch = File.open(@CAT_topology_bunch).readlines
|
294
|
+
File.open(@bestML_bunch, "w") do |f|
|
295
|
+
rank_id[0...num_bestML_trees].each_with_index do |newick_id, i|
|
296
|
+
f.puts newick_bunch[newick_id.to_i]
|
297
|
+
logput "#{i+1}: Selected tree with id #{newick_id}"
|
298
|
+
end
|
299
|
+
end
|
300
|
+
best_lh
|
301
|
+
end
|
302
|
+
def append_to_file(file, str)
|
303
|
+
File.open(file, "a+") do |f|
|
304
|
+
f.puts str
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
data/rput.gemspec
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{rput}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = [%q{Fernando Izquierdo-Carrasco}]
|
12
|
+
s.date = %q{2012-03-29}
|
13
|
+
s.description = %q{Automated iterations of raxml runs over externally-extended phylip files}
|
14
|
+
s.email = %q{fer.izquierdo@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"Gemfile",
|
22
|
+
"Gemfile.lock",
|
23
|
+
"LICENSE.txt",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"lib/experiment.rb",
|
28
|
+
"lib/starter.rb",
|
29
|
+
"rput.gemspec",
|
30
|
+
"test/helper.rb",
|
31
|
+
"test/test_rput.rb"
|
32
|
+
]
|
33
|
+
s.homepage = %q{http://github.com/fizquierdo/rput}
|
34
|
+
s.licenses = [%q{MIT}]
|
35
|
+
s.require_paths = [%q{lib}]
|
36
|
+
s.rubygems_version = %q{1.8.6}
|
37
|
+
s.summary = %q{Raxml-based perpetually updated tree}
|
38
|
+
|
39
|
+
if s.respond_to? :specification_version then
|
40
|
+
s.specification_version = 3
|
41
|
+
|
42
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
43
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
44
|
+
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
45
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
46
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
47
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
48
|
+
s.add_development_dependency(%q<rraxml>, ["~> 0.1.2"])
|
49
|
+
else
|
50
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
51
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
52
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
53
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
54
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
55
|
+
s.add_dependency(%q<rraxml>, ["~> 0.1.2"])
|
56
|
+
end
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
59
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
60
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
61
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
62
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
63
|
+
s.add_dependency(%q<rraxml>, ["~> 0.1.2"])
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'rput'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
data/test/test_rput.rb
ADDED
metadata
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rput
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Fernando Izquierdo-Carrasco
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-03-29 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
type: :development
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
version_requirements: *id001
|
32
|
+
name: shoulda
|
33
|
+
prerelease: false
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
type: :development
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ~>
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 31
|
42
|
+
segments:
|
43
|
+
- 3
|
44
|
+
- 12
|
45
|
+
version: "3.12"
|
46
|
+
version_requirements: *id002
|
47
|
+
name: rdoc
|
48
|
+
prerelease: false
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
type: :development
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ~>
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 23
|
57
|
+
segments:
|
58
|
+
- 1
|
59
|
+
- 0
|
60
|
+
- 0
|
61
|
+
version: 1.0.0
|
62
|
+
version_requirements: *id003
|
63
|
+
name: bundler
|
64
|
+
prerelease: false
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
type: :development
|
67
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ~>
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
hash: 49
|
73
|
+
segments:
|
74
|
+
- 1
|
75
|
+
- 8
|
76
|
+
- 3
|
77
|
+
version: 1.8.3
|
78
|
+
version_requirements: *id004
|
79
|
+
name: jeweler
|
80
|
+
prerelease: false
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
type: :development
|
83
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
hash: 3
|
89
|
+
segments:
|
90
|
+
- 0
|
91
|
+
version: "0"
|
92
|
+
version_requirements: *id005
|
93
|
+
name: rcov
|
94
|
+
prerelease: false
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
type: :development
|
97
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ~>
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
hash: 31
|
103
|
+
segments:
|
104
|
+
- 0
|
105
|
+
- 1
|
106
|
+
- 2
|
107
|
+
version: 0.1.2
|
108
|
+
version_requirements: *id006
|
109
|
+
name: rraxml
|
110
|
+
prerelease: false
|
111
|
+
description: Automated iterations of raxml runs over externally-extended phylip files
|
112
|
+
email: fer.izquierdo@gmail.com
|
113
|
+
executables: []
|
114
|
+
|
115
|
+
extensions: []
|
116
|
+
|
117
|
+
extra_rdoc_files:
|
118
|
+
- LICENSE.txt
|
119
|
+
- README.rdoc
|
120
|
+
files:
|
121
|
+
- .document
|
122
|
+
- Gemfile
|
123
|
+
- Gemfile.lock
|
124
|
+
- LICENSE.txt
|
125
|
+
- README.rdoc
|
126
|
+
- Rakefile
|
127
|
+
- VERSION
|
128
|
+
- lib/experiment.rb
|
129
|
+
- lib/starter.rb
|
130
|
+
- rput.gemspec
|
131
|
+
- test/helper.rb
|
132
|
+
- test/test_rput.rb
|
133
|
+
homepage: http://github.com/fizquierdo/rput
|
134
|
+
licenses:
|
135
|
+
- MIT
|
136
|
+
post_install_message:
|
137
|
+
rdoc_options: []
|
138
|
+
|
139
|
+
require_paths:
|
140
|
+
- lib
|
141
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
142
|
+
none: false
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
hash: 3
|
147
|
+
segments:
|
148
|
+
- 0
|
149
|
+
version: "0"
|
150
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
|
+
none: false
|
152
|
+
requirements:
|
153
|
+
- - ">="
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
hash: 3
|
156
|
+
segments:
|
157
|
+
- 0
|
158
|
+
version: "0"
|
159
|
+
requirements: []
|
160
|
+
|
161
|
+
rubyforge_project:
|
162
|
+
rubygems_version: 1.8.6
|
163
|
+
signing_key:
|
164
|
+
specification_version: 3
|
165
|
+
summary: Raxml-based perpetually updated tree
|
166
|
+
test_files: []
|
167
|
+
|