seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# $: << '/Users/dariogf/progs/ruby/gems/scbi_plot/lib'
|
|
2
|
+
|
|
3
|
+
require 'scbi_plot'
|
|
4
|
+
# require 'gnu_plot_graph'
|
|
5
|
+
|
|
6
|
+
class GraphStats
|
|
7
|
+
|
|
8
|
+
def initialize(stats,initial_stats=nil)
|
|
9
|
+
#load stats
|
|
10
|
+
init_stats=initial_stats
|
|
11
|
+
|
|
12
|
+
if init_stats.nil?
|
|
13
|
+
r=File.read(File.join(OUTPUT_PATH,'initial_stats.json'))
|
|
14
|
+
init_stats= JSON::parse(r)
|
|
15
|
+
end
|
|
16
|
+
# puts init_stats.to_json
|
|
17
|
+
#r=File.read(File.join(File.dirname(__FILE__),'stats.json'))
|
|
18
|
+
if !File.exists?('graphs')
|
|
19
|
+
Dir.mkdir('graphs')
|
|
20
|
+
end
|
|
21
|
+
@stats=stats
|
|
22
|
+
|
|
23
|
+
@stats.each do |plugin_name,plugin_value|
|
|
24
|
+
# get plugin class
|
|
25
|
+
begin
|
|
26
|
+
plugin_class = Object.const_get(plugin_name)
|
|
27
|
+
rescue Exception => e
|
|
28
|
+
# puts "RESCUE",e.message,e.backtrace
|
|
29
|
+
plugin_class = Plugin
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
plugin_value.keys.each do |stats_name|
|
|
34
|
+
puts "Plotting #{stats_name} from #{plugin_name}"
|
|
35
|
+
# if graph is not ignored
|
|
36
|
+
if !plugin_class.graph_ignored?(stats_name)
|
|
37
|
+
|
|
38
|
+
x=[]
|
|
39
|
+
y=[]
|
|
40
|
+
|
|
41
|
+
# get filename
|
|
42
|
+
file_name=File.join('graphs',plugin_class.get_graph_filename(plugin_name,stats_name)+'.png')
|
|
43
|
+
|
|
44
|
+
# create new graph object
|
|
45
|
+
plot=ScbiPlot::Histogram.new(file_name,plugin_class.get_graph_title(plugin_name,stats_name))
|
|
46
|
+
|
|
47
|
+
plugin_class.auto_setup(plugin_value[stats_name],stats_name,x,y)
|
|
48
|
+
|
|
49
|
+
# puts plugin_class.name.to_s
|
|
50
|
+
# plot_setup returns true if it has already handled the setup of the plot, if not, handle here
|
|
51
|
+
if !plugin_class.plot_setup(plugin_value[stats_name],stats_name,x,y,init_stats,plot)
|
|
52
|
+
if !x.empty? && !y.empty? && (x.length==y.length)
|
|
53
|
+
|
|
54
|
+
plot.x_label= "Length"
|
|
55
|
+
plot.y_label= "Count"
|
|
56
|
+
|
|
57
|
+
plot.add_x(x)
|
|
58
|
+
plot.add_y(y)
|
|
59
|
+
|
|
60
|
+
plot.do_graph
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# if !x.empty? && !y.empty? && (x.length==y.length)
|
|
66
|
+
#
|
|
67
|
+
# end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
require 'open-uri'
|
|
2
|
+
|
|
3
|
+
class InstallDatabase
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def initialize(type,db_path)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome']
|
|
10
|
+
|
|
11
|
+
if types.include?(type)
|
|
12
|
+
|
|
13
|
+
if !File.exists?(db_path)
|
|
14
|
+
FileUtils.mkdir_p(db_path)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
remote_db_url="http://www.scbi.uma.es/downloads/#{type}_db.zip"
|
|
18
|
+
local_path=File.join(db_path,'core_db.zip')
|
|
19
|
+
puts "Install databases: #{type}"
|
|
20
|
+
|
|
21
|
+
download_and_unzip(remote_db_url,local_path)
|
|
22
|
+
|
|
23
|
+
else
|
|
24
|
+
puts "Unknown database #{type}"
|
|
25
|
+
puts "Available databases:"
|
|
26
|
+
puts types.join("\n")
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def download_and_unzip(from_url,to_file)
|
|
31
|
+
puts "Downloading databases from #{from_url} to #{to_file}"
|
|
32
|
+
|
|
33
|
+
open(to_file, "w+") { |f| f.write(open(from_url).read)}
|
|
34
|
+
|
|
35
|
+
puts "Unzipping #{to_file}"
|
|
36
|
+
|
|
37
|
+
# unzip and remove
|
|
38
|
+
# `cd #{File.dirname(to_file)};unzip #{to_file}; rm #{to_file}`
|
|
39
|
+
`cd #{File.dirname(to_file)};unzip #{to_file}; rm #{to_file}`
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#########################################
|
|
2
|
+
# Author:: Almudena Bocinos Rioboo
|
|
3
|
+
# This class provided the methods to check if the necesary software is installed in the user system
|
|
4
|
+
#########################################
|
|
5
|
+
|
|
6
|
+
class InstallRequirements
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@external_requirements = {}
|
|
11
|
+
@ruby_requirements = {}
|
|
12
|
+
load_requirements
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def check_install_requirements
|
|
18
|
+
res = true
|
|
19
|
+
|
|
20
|
+
errors = check_system_requirements
|
|
21
|
+
|
|
22
|
+
if !errors.empty?
|
|
23
|
+
|
|
24
|
+
$stderr.puts ' Unable to find these external requeriments:'
|
|
25
|
+
errors.each do |error|
|
|
26
|
+
$stderr.puts ' -' + error
|
|
27
|
+
res = false
|
|
28
|
+
end #end each
|
|
29
|
+
|
|
30
|
+
end #end if
|
|
31
|
+
|
|
32
|
+
errors = check_ruby_requirements
|
|
33
|
+
if !errors.empty?
|
|
34
|
+
$stderr.puts ' Unable to find these Ruby requeriments:'
|
|
35
|
+
errors.each do |error|
|
|
36
|
+
$stderr.puts ' -' + error
|
|
37
|
+
res = false
|
|
38
|
+
end #end each
|
|
39
|
+
end #end if
|
|
40
|
+
|
|
41
|
+
return res
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def check_system_requirements
|
|
49
|
+
|
|
50
|
+
errors=[]
|
|
51
|
+
@external_requirements.each do |cmd,msg|
|
|
52
|
+
if !system("which #{cmd} > /dev/null ")
|
|
53
|
+
errors.push "It's necessary to install #{cmd}. " + msg
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
return errors
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def check_ruby_requirements(install=true)
|
|
61
|
+
errors=[]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@ruby_requirements.each do |cmd,msg|
|
|
66
|
+
if !system("gem list #{cmd} | grep #{cmd} > /dev/null")
|
|
67
|
+
if install
|
|
68
|
+
puts "Are you sure you wan't to install #{cmd} gem? ([Y/n]):"
|
|
69
|
+
res=stdin.readline
|
|
70
|
+
if res.chomp.upcase!='N'
|
|
71
|
+
system("echo gem install #{cmd}")
|
|
72
|
+
end
|
|
73
|
+
else
|
|
74
|
+
errors.push "It's necessary to install #{cmd}. Issue a: gem install #{cmd} " + msg
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
return errors
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# seqtrim's requirements are specified here
|
|
85
|
+
def load_requirements
|
|
86
|
+
|
|
87
|
+
@external_requirements['blastn']= "You need to install Blast+ 2.2.24 or greater and make sure it is available in your path (export PATH=$PATH:path_to_blast).\nYou can download it from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/"
|
|
88
|
+
@external_requirements['cd-hit-454']= "Download from http://code.google.com/p/cdhit/downloads/list"
|
|
89
|
+
@external_requirements['gnuplot']= "Download from http://www.gnuplot.info/download.html"
|
|
90
|
+
|
|
91
|
+
# @external_requirements['pepe']= ""
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# @ruby_requirements = { 'n2array' => "" ,
|
|
95
|
+
@ruby_requirements['narray'] = ''
|
|
96
|
+
@ruby_requirements['gnuplot'] = ''
|
|
97
|
+
@ruby_requirements['term-ansicolor'] = ''
|
|
98
|
+
@ruby_requirements['xml-simple'] = ''
|
|
99
|
+
@ruby_requirements['scbi_blast'] = ''
|
|
100
|
+
@ruby_requirements['scbi_drb'] = ''
|
|
101
|
+
@ruby_requirements['scbi_fasta'] = ''
|
|
102
|
+
@ruby_requirements['scbi_fastq'] = ''
|
|
103
|
+
@ruby_requirements['scbi_plot'] = ''
|
|
104
|
+
@ruby_requirements['scbi_math'] = ''
|
|
105
|
+
# @ruby_requirements['scbi_fastq2'] = ''
|
|
106
|
+
|
|
107
|
+
end # end def
|
|
108
|
+
|
|
109
|
+
def install
|
|
110
|
+
|
|
111
|
+
# gem install gnuplot
|
|
112
|
+
# gem install narray
|
|
113
|
+
# gem install scbi_blast
|
|
114
|
+
# gem install scbi_drb
|
|
115
|
+
# gem install scbi_fasta
|
|
116
|
+
# gem install scbi_fastq
|
|
117
|
+
# gem install term-ansicolor
|
|
118
|
+
# gem install xml-simple
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
|
|
2
|
+
#List all entries in a DB, by name
|
|
3
|
+
#
|
|
4
|
+
#list all DB names if db is ALL
|
|
5
|
+
|
|
6
|
+
class ListDb
|
|
7
|
+
|
|
8
|
+
def initialize(path,db)
|
|
9
|
+
|
|
10
|
+
filename=File.join(path,'formatted',db)
|
|
11
|
+
if File.exists?(filename)
|
|
12
|
+
|
|
13
|
+
f = File.open(filename)
|
|
14
|
+
|
|
15
|
+
f.grep(/^>(.*)$/) do |line|
|
|
16
|
+
puts $1
|
|
17
|
+
end
|
|
18
|
+
f.close
|
|
19
|
+
else
|
|
20
|
+
puts "File #{filename} doesn't exists"
|
|
21
|
+
puts ''
|
|
22
|
+
puts "Available databases:"
|
|
23
|
+
puts '-'*20
|
|
24
|
+
d=Dir.glob(File.join(path,'formatted','*.fasta'))
|
|
25
|
+
d.entries.map{|e| puts File.basename(e)}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# cmd= "grep '^>' #{File.join(path,'formatted',db+'.fasta')}"
|
|
29
|
+
|
|
30
|
+
# system(cmd)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.list_databases(path)
|
|
36
|
+
res = []
|
|
37
|
+
|
|
38
|
+
if File.exists?(path)
|
|
39
|
+
d=Dir.glob(File.join(path,'formatted','*.fasta'))
|
|
40
|
+
|
|
41
|
+
res = d.entries.map{|e| File.basename(e)}
|
|
42
|
+
end
|
|
43
|
+
return res
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
|
|
2
|
+
class MakeBlastDb
|
|
3
|
+
|
|
4
|
+
def initialize(dir)
|
|
5
|
+
|
|
6
|
+
@db_folder = dir
|
|
7
|
+
@status_folder = File.join(@db_folder,'status_info')
|
|
8
|
+
@formatted_folder = File.join(@db_folder,'formatted')
|
|
9
|
+
|
|
10
|
+
update_dbs
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def catFasta(path_start,path_end)
|
|
14
|
+
$LOG.debug("Cat of #{path_start}")
|
|
15
|
+
|
|
16
|
+
# system("cat #{path_start} > #{path_end}")
|
|
17
|
+
system("cat /dev/null > #{path_end}")
|
|
18
|
+
|
|
19
|
+
system("for i in `find #{path_start} -type f ! -name '.*'`; do echo cat of $i; cat $i >> #{path_end}; echo \"\n\" >> #{path_end}; done")
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def dirEmpty?(path_db)
|
|
24
|
+
|
|
25
|
+
folder2=Dir.open("#{path_db}")
|
|
26
|
+
|
|
27
|
+
ignore = ['.','..','.DS_Store']
|
|
28
|
+
|
|
29
|
+
res = folder2.entries - ignore
|
|
30
|
+
|
|
31
|
+
return res.empty?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def merge_db_files(path_db, db_name, formatted_folder)
|
|
35
|
+
if !dirEmpty?(path_db)
|
|
36
|
+
#hay que hacer el cat solo cuando cambian los ficheros que hay en subfolder1
|
|
37
|
+
formatted_file = File.join(formatted_folder, db_name+'.fasta')
|
|
38
|
+
catFasta(File.join(path_db),formatted_file)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.format_db(path_db, db_name, formatted_folder)
|
|
43
|
+
|
|
44
|
+
#hay que hacer el cat solo cuando cambian los ficheros que hay en subfolder1
|
|
45
|
+
formatted_file = File.join(formatted_folder, db_name+'.fasta')
|
|
46
|
+
cmd = "makeblastdb -in #{formatted_file} -parse_seqids -dbtype nucl >> logs/formatdb.log"
|
|
47
|
+
system(cmd)
|
|
48
|
+
$LOG.info(cmd)
|
|
49
|
+
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
#---------------------------------------------------------------------------------------------------
|
|
53
|
+
# Check if files for DataBase have been updated, and only when that has happened, makeblastdb will run
|
|
54
|
+
# Consideres the next directories structure:
|
|
55
|
+
#
|
|
56
|
+
# @dir is the main directory
|
|
57
|
+
# @dir/folder0 is the directoy where will be storaged the DB created/updated
|
|
58
|
+
# @dir/folder0/subfolder1 is where are storaged all the fasta files of the type subfolder1
|
|
59
|
+
# @dir/update is where register the log for each subfolder1, to check if DB has been updated
|
|
60
|
+
#---------------------------------------------------------------------------------------------------
|
|
61
|
+
def update_dbs
|
|
62
|
+
|
|
63
|
+
FileUtils.mkdir_p(@status_folder)
|
|
64
|
+
FileUtils.mkdir_p(@formatted_folder)
|
|
65
|
+
|
|
66
|
+
ignore_folders=['.','..','status_info','formatted']
|
|
67
|
+
|
|
68
|
+
$LOG.info("Checking Blast databases at #{@db_folder} for updates")
|
|
69
|
+
|
|
70
|
+
dbs_folder=Dir.open(@db_folder)
|
|
71
|
+
|
|
72
|
+
#if all file_update.entries is in folder1.entries then cat db/* > DB , make blast, guardar ls nuevo
|
|
73
|
+
dbs_folder.entries.each do |db_name|
|
|
74
|
+
|
|
75
|
+
db_folder=File.join(@db_folder,db_name)
|
|
76
|
+
if (!ignore_folders.include?(db_name) and File.directory?(db_folder))
|
|
77
|
+
|
|
78
|
+
#puts "Checking #{db_name} in #{db_folder}"
|
|
79
|
+
|
|
80
|
+
#path_db = File.join(@dir,db_folder)
|
|
81
|
+
|
|
82
|
+
# set status files
|
|
83
|
+
new_status_file = File.join(@status_folder,'new_'+db_name+'.txt')
|
|
84
|
+
old_status_file = File.join(@status_folder,'old_'+db_name+'.txt')
|
|
85
|
+
|
|
86
|
+
cmd = "ls -lR #{db_folder} > #{new_status_file}"
|
|
87
|
+
$LOG.debug(cmd)
|
|
88
|
+
# list new status tu new_status_file
|
|
89
|
+
# system("ls -lR #{File.join(db_folder,'*')} > #{new_status_file}")
|
|
90
|
+
system(cmd)
|
|
91
|
+
|
|
92
|
+
# if new and old statuses files changed, then reformat
|
|
93
|
+
if (!(File.exists?(old_status_file)) || !system("diff -q #{new_status_file} #{old_status_file} > /dev/null ") || !File.exists?(File.join(@formatted_folder,db_name+'.fasta')))
|
|
94
|
+
|
|
95
|
+
$LOG.info("Database #{db_name} modified. Merging and formatting")
|
|
96
|
+
|
|
97
|
+
merge_db_files(db_folder,db_name,@formatted_folder)
|
|
98
|
+
|
|
99
|
+
MakeBlastDb.format_db(db_folder,db_name,@formatted_folder)
|
|
100
|
+
|
|
101
|
+
# rename new_status_file to replace the old one
|
|
102
|
+
system("mv #{new_status_file} #{old_status_file}")
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
end #end folder1.entries
|
|
108
|
+
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
class OneBlast
|
|
4
|
+
|
|
5
|
+
def initialize(database, blast_type = 'blastp')
|
|
6
|
+
|
|
7
|
+
@blast_type = blast_type
|
|
8
|
+
@database = database
|
|
9
|
+
@c=0
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def do_blast(seq_fasta)
|
|
14
|
+
|
|
15
|
+
@f = File.new('one_blast_aux.fasta','w+')
|
|
16
|
+
@f.puts ">SEQNAME_"+@c.to_s
|
|
17
|
+
@f.puts seq_fasta
|
|
18
|
+
@c = @c+1
|
|
19
|
+
@f.close
|
|
20
|
+
|
|
21
|
+
cmd = '~blast/programs/x86_64/bin/blastall -p '+@blast_type+' -d '+@database + ' -i one_blast_aux.fasta -o one_blast_aux.out'
|
|
22
|
+
#puts cmd
|
|
23
|
+
system(cmd)
|
|
24
|
+
|
|
25
|
+
res =''
|
|
26
|
+
File.open('one_blast_aux.out').each_line { |line|
|
|
27
|
+
|
|
28
|
+
res = line
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def close
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
|