seqtrimnext_report 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ === 0.0.2 2011-06-15
2
+
3
+ Some small fixes
4
+
5
+ === 0.0.1 2011-06-14
6
+
7
+ * Initial release
@@ -0,0 +1,24 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/seqtrimnext_report.rb
7
+ script/console
8
+ script/destroy
9
+ script/generate
10
+ test/test_helper.rb
11
+ test/test_seqtrimnext_report.rb
12
+ bin/generate_report.rb
13
+ lib/seqtrimnext_report/classes/params_report.rb
14
+ lib/seqtrimnext_report/classes/rejected_report.rb
15
+ lib/seqtrimnext_report/classes/stats_report.rb
16
+ lib/seqtrimnext_report/config/plugin_nts.json
17
+ lib/seqtrimnext_report/config/plugin_seqs.json
18
+ lib/seqtrimnext_report/latex_src/input_graph.tex
19
+ lib/seqtrimnext_report/latex_src/main.tex
20
+ lib/seqtrimnext_report/latex_src/output_files.tex
21
+ lib/seqtrimnext_report/latex_src/output_graph.tex
22
+ lib/seqtrimnext_report/latex_src/piescbi.jpg
23
+ lib/seqtrimnext_report/latex_src/qv_graph.tex
24
+ lib/seqtrimnext_report/latex_src/ref_seqs.png
@@ -0,0 +1,7 @@
1
+
2
+ For more information on seqtrimnext_report, see http://seqtrimnext_report.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
@@ -0,0 +1,49 @@
1
+ = seqtrimnext_report
2
+
3
+ * http://www.scbi.uma.es/downloads
4
+
5
+ == DESCRIPTION:
6
+
7
+ A PDF report generator for SeqtrimNEXT preprocessing software from SCBI.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * Generates an eye-catching PDF report with the output produced by SeqtrimNEXT
12
+
13
+ == SYNOPSIS:
14
+
15
+ generate_report.rb seqtrim_output_folder
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * seqtrimnext gem
20
+ * pdflatex binary
21
+
22
+ == INSTALL:
23
+
24
+ gem install seqtrimnext_report
25
+
26
+ == LICENSE:
27
+
28
+ (The MIT License)
29
+
30
+ Copyright (c) 2011 FIXME full name
31
+
32
+ Permission is hereby granted, free of charge, to any person obtaining
33
+ a copy of this software and associated documentation files (the
34
+ 'Software'), to deal in the Software without restriction, including
35
+ without limitation the rights to use, copy, modify, merge, publish,
36
+ distribute, sublicense, and/or sell copies of the Software, and to
37
+ permit persons to whom the Software is furnished to do so, subject to
38
+ the following conditions:
39
+
40
+ The above copyright notice and this permission notice shall be
41
+ included in all copies or substantial portions of the Software.
42
+
43
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
44
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
45
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
46
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
47
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
48
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
49
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/seqtrimnext_report'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'seqtrimnext_report' do
14
+ self.developer 'Noe Fernandez & Dario Guerrero', 'noefp@gmail.com, dariogf@gmail.com'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ self.extra_deps = [['seqtrimnext','>= 2.0.31']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ task :default => [:spec, :featuresm, :redocs]
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Noe Fdez Pozo 2011-05-11. To build a PDF with latex parsing SeqTrimNext output files
4
+
5
+ #---------------------------------------------- para indicar donde estan las clases
6
+ ROOT_PATH=File.dirname(__FILE__)
7
+ # $: << File.expand_path(File.join(ROOT_PATH, "classes"))
8
+ # $: << '/Users/dariogf/progs/ruby/gems/seqtrimnext/lib'
9
+
10
+ #---------------------------------------------- gems
11
+ require 'json'
12
+ #---------------------------------------------- classes
13
+
14
+ require 'seqtrimnext'
15
+ require 'seqtrimnext_report'
16
+ require 'params'
17
+ require 'scbi_stats'
18
+ require 'params_report'
19
+ require 'stats_report'
20
+ require 'rejected_report'
21
+
22
+ #---------------------------------------------- method to parse a json
23
+ def get_json_data(file)
24
+
25
+ data=nil
26
+
27
+ if File.exists?(file)
28
+ file1 = File.open(file)
29
+ text = file1.read
30
+ file1.close
31
+
32
+ text=text.gsub(/^\s*#.*$/,'').gsub(/^\n$/,'')
33
+ if !text.nil? && !text.empty?
34
+ data = JSON.parse(text)
35
+ end
36
+ end
37
+
38
+ return data
39
+ end
40
+
41
+ if ARGV.count!=1
42
+ puts "Usage: #{File.basename($0)} output_files_folder"
43
+ exit(-1)
44
+ end
45
+
46
+ #---------------------------------------------- check if files exists
47
+ output_files=ARGV.shift
48
+ if !Dir.exists?(output_files)
49
+ puts "Directory #{output_files} doesn't exists"
50
+ exit(-1)
51
+ end
52
+
53
+ if !File.exist?(File.join(output_files,'used_params.txt'))
54
+ puts "File used_params.txt not found.\n"
55
+ exit(-1)
56
+ elsif !File.exist?(File.join(output_files,'initial_stats.json'))
57
+ puts "File initial_stats.json not found.\n"
58
+ exit(-1)
59
+ elsif !File.exist?(File.join(output_files,'stats.json'))
60
+ puts "File stats.json not found.\n"
61
+ exit(-1)
62
+ end
63
+
64
+ puts "Generating report"
65
+ puts "="*50
66
+ puts "used_params.txt, initial_stats.json and stats.json files were found"
67
+
68
+
69
+ #---------------------------------------------- MAIN --------------------------------
70
+ begin
71
+
72
+ # load used params
73
+ all_params=Params.new(File.join(output_files,'used_params.txt'))
74
+
75
+ # load initial and final stats
76
+ initial_stats = get_json_data(File.join(output_files,'initial_stats.json'))
77
+ stats = get_json_data(File.join(output_files,'stats.json'))
78
+
79
+ # load json configuration
80
+ plugin_fix_hash = get_json_data(File.join(File.dirname(__FILE__),'..','lib','seqtrimnext_report','config','plugin_seqs.json'))
81
+ plugin_nts_hash = get_json_data(File.join(File.dirname(__FILE__),'..','lib','seqtrimnext_report','config','plugin_nts.json'))
82
+
83
+ output_latex=File.join(output_files,'latex')
84
+
85
+ # copy latex required files to output_folder
86
+ `cp -r #{File.join(File.dirname(__FILE__),'..','lib','seqtrimnext_report','latex_src')} #{output_latex}`
87
+ `cp -r #{File.join(output_files,'..','graphs')} #{output_latex}`
88
+
89
+ #---------------------------------------------- Parameters
90
+ ParamsReport.new(all_params,output_files, output_latex)
91
+
92
+ #---------------------------------------------- Statistics
93
+ StatsReport.new(all_params,initial_stats,stats,plugin_nts_hash, output_files, output_latex)
94
+
95
+ #---------------------------------------------- Rejected
96
+ RejectedReport.new(stats,plugin_fix_hash,output_files, output_latex)
97
+
98
+ #---------------------------------------------- Build pdf
99
+ # system('pdflatex main.tex')
100
+ cmd="pushd .; cd #{output_latex}; pdflatex -halt-on-error main.tex; popd"
101
+ puts "Running pdflatex: #{cmd}"
102
+
103
+ # it must be repeated to solve links in latex
104
+ `#{cmd}`
105
+ `#{cmd}`
106
+
107
+ if File.exists?(File.join(output_latex,'main.pdf'))
108
+ `cp #{File.join(output_latex,'main.pdf')} #{File.join(output_files,'statistics_report.pdf')}`
109
+ `zip -r #{File.join(output_files,'latex.zip')} #{output_latex}`
110
+ `rm -r #{output_latex}`
111
+ end
112
+
113
+ rescue
114
+
115
+ puts "output PDF couldn't be created for this dataset"
116
+
117
+ end
118
+ # system(cmd)
@@ -0,0 +1,12 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ # require 'seqtrimnext'
5
+
6
+ ROOT_PATH=File.join(File.dirname(__FILE__),'seqtrimnext_report')
7
+
8
+ $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
9
+
10
+ module SeqtrimnextReport
11
+ VERSION = '0.0.2'
12
+ end
@@ -0,0 +1,84 @@
1
+ class ParamsReport
2
+
3
+ def initialize(all_params,output_files, output_latex)
4
+
5
+ # all_params.print_parameters
6
+
7
+ params_in_pdf = {}
8
+
9
+ params_in_pdf['general']=['plugin_list','remove_clonality','min_insert_size_trimmed','min_insert_size_paired','seqtrim_version','min_sequence_size_raw']
10
+ params_in_pdf['quality']=['min_quality','window_width']
11
+ params_in_pdf['contaminants']=['blast_evalue_contaminants','blast_percent_contaminants','min_contam_seq_presence','genus','contaminants_reject','contaminants_db']
12
+
13
+ output=File.open(File.join(output_latex,'UsedParams.tex'), 'w')
14
+ output.puts "%!TEX root = FinalReport.tex"
15
+
16
+ output.puts '\subsection{General}'
17
+ print_each_group_of_params(output,all_params,params_in_pdf,'general')
18
+
19
+ if (all_params.get_param('min_quality'))
20
+ output.puts '\subsection{Quality}'
21
+ print_each_group_of_params(output,all_params,params_in_pdf,'quality')
22
+ end
23
+
24
+ if (all_params.get_param('blast_evalue_contaminants'))
25
+ output.puts '\subsection{Contaminants}'
26
+ print_each_group_of_params(output,all_params,params_in_pdf,'contaminants')
27
+ end
28
+
29
+ output.close
30
+
31
+ puts "used parameters information was added to the report"
32
+
33
+ end
34
+
35
+ def print_each_group_of_params(output,all_params,params_in_pdf,my_group)
36
+
37
+ params_in_pdf[my_group].each do |param_name|
38
+
39
+ if !all_params.get_comment('params',param_name).nil?
40
+ description = all_params.get_comment('params',param_name).last
41
+
42
+ description.gsub!('%','\%')
43
+ description.gsub!('_','\_')
44
+
45
+ if description =~/^\#\#+/
46
+ description = ''
47
+ end
48
+ else
49
+ description = ''
50
+ end
51
+
52
+ value = all_params.get_param(param_name)
53
+
54
+ if (param_name == 'plugin_list')
55
+ puts value
56
+ values = value.split(",")
57
+ output.puts "#{description}"+'\\\\'
58
+
59
+ count = 0
60
+ values.each do |plugin|
61
+ count += 1
62
+ output.puts '\indent'+"#{count}\. #{plugin}"+'\\\\'
63
+ end
64
+
65
+ output.puts '\\\\'
66
+ elsif (param_name == 'contaminants_db')
67
+ value.gsub!('"','')
68
+ values = value.split(" ")
69
+ output.puts "#{description}"+'\\\\'
70
+ values.each do |plugin|
71
+ plugin = File.basename(plugin)
72
+ plugin.gsub!('_','\_')
73
+ output.puts '\indent \texttt{'+" #{plugin}"+'}\\\\'
74
+ end
75
+ else
76
+ param_name.gsub!('_','\_')
77
+ output.puts "#{description}"+'\\\\'
78
+ output.puts '\indent \texttt{'+"#{param_name}: #{value}"+'}\\\\'
79
+ end
80
+ end
81
+ end
82
+
83
+ end
84
+
@@ -0,0 +1,207 @@
1
+ class RejectedReport
2
+
3
+ def initialize(stats,plugin_fix_hash,output_files, output_latex)
4
+
5
+ # write_plugin_json
6
+
7
+ output3=File.open(File.join(output_latex,'rejected.tex'), 'w')
8
+ output3.puts "%!TEX root = FinalReport.tex\n\n"
9
+
10
+ input_seqs = stats['sequences']['count']['input_count'].to_i
11
+ rejected_seqs = stats['sequences']['count']['rejected'].to_i
12
+ output_seqs = stats['sequences']['count']['output_seqs'].to_i
13
+
14
+ output_seqs_paired = 0
15
+ total_output_seqs = 0
16
+ if (!stats['sequences']['count']['output_seqs_paired'].nil?)
17
+ output_seqs_paired = stats['sequences']['count']['output_seqs_paired'].to_i # solo cuando hay pareadas
18
+ total_output_seqs = output_seqs_paired+output_seqs
19
+ end
20
+ low_complex = 0
21
+ if (!stats['sequences']['count']['output_seqs_low_complexity'].nil?)
22
+ low_complex = stats['sequences']['count']['output_seqs_low_complexity'].to_i # no hay cuando es genomico
23
+ end
24
+
25
+ rejected_hash = {}
26
+ data_hash = {}
27
+
28
+ data_hash['value'] = rejected_seqs
29
+ data_hash['warning'] = 'OK'
30
+ data_hash['warning_msg'] = ''
31
+ data_hash['percent'] = sprintf("%0.3f", (rejected_seqs.to_f*100/input_seqs.to_f))
32
+ rejected_hash['rejected']=data_hash
33
+
34
+ if (!stats['sequences']['rejected'].nil?)
35
+ rejected_hash = load_plugins_info(stats, rejected_hash, input_seqs, plugin_fix_hash)
36
+
37
+ #-------------------------------------------------- build table
38
+ output3.puts '\begin{table}[H]'
39
+ output3.puts '\begin{center}'
40
+ output3.puts '\begin{tabular}{r r}'
41
+
42
+ output3.puts "Input sequences & #{input_seqs}\\\\"
43
+ output3.puts "Output sequences & #{output_seqs}\\\\"
44
+ output3.puts "Rejected sequences & #{rejected_seqs}\\\\"
45
+ if (output_seqs_paired != 0)
46
+ output3.puts "Output paired sequences & #{output_seqs_paired} \\\\"
47
+ output3.puts "Total output sequences & #{total_output_seqs} \\\\"
48
+ end
49
+ if (low_complex != 0)
50
+ output3.puts "Low complexity sequences & #{low_complex} \\\\"
51
+ end
52
+
53
+ output3.puts '\end{tabular}'
54
+ output3.puts '\label{table:input_seqs}'
55
+ output3.puts '\end{center}'
56
+ output3.puts '\end{table}'+"\n\n"
57
+ #-------------------------------------------------- end table
58
+
59
+ #-------------------------------------------------- build table
60
+ output3.puts '\begin{table}[H]'
61
+ output3.puts '\caption{Summary of reads removed in every plugin.}'
62
+ output3.puts '\begin{center}'
63
+ output3.puts '\begin{tabular}{l r r c}'
64
+ output3.puts '\hline'
65
+ output3.puts 'Case & Number of sequences & Percent & Warnings \\\\ [0.5ex]'
66
+ output3.puts '\hline'
67
+
68
+ #the hash of hashes is ordered by value (number of sequences rejected)
69
+ rejected_ordered = rejected_hash.sort {|a,b| b[1]['value']<=>a[1]['value']}
70
+
71
+ rejected_ordered.each do |plugin|
72
+
73
+ my_name = plugin[1]['name']
74
+ my_value = plugin[1]['value']
75
+ my_percent = plugin[1]['percent']
76
+ my_warning = plugin[1]['warning']
77
+
78
+ if (plugin[0] != 'rejected')
79
+ if (plugin[0] == 'Indeterminations in middle of sequence')
80
+ plugin[0] = plugin[0].sub(' in middle of sequence','')
81
+ end
82
+ output3.puts "#{my_name}&#{my_value}&#{my_percent} \\%&#{my_warning}\\\\"
83
+ end
84
+ end
85
+
86
+ output3.puts '\hline'
87
+ output3.puts "Total rejected&#{rejected_hash['rejected']['value']}&#{rejected_hash['rejected']['percent']} \\%&#{rejected_hash['rejected']['warning']}\\\\ [1ex]"
88
+ output3.puts '\hline'
89
+ output3.puts '\end{tabular}'
90
+ output3.puts '\end{center}'
91
+ output3.puts '\label{table:reads_removed}'
92
+ output3.puts '\end{table}'+"\n\n"
93
+ #-------------------------------------------------- end table
94
+
95
+ rejected_ordered.each do |plugin|
96
+ if (plugin[1]['warning'] != 'OK')
97
+ plugin[1]['warning_msg'].gsub!('my_percent',"#{rejected_hash["#{plugin[0]}"]['percent']}")
98
+ output3.puts '\noindent\fcolorbox{black}{yellow}{'+"\n"+'\begin{minipage}{\linewidth}{'+"\n"+'\textbf{'+"#{plugin[1]['warning']} #{plugin[1]['warning_msg']}"+'}'+"\n"+'}'+"\n"+'\end{minipage}'+"\n"+'}\\\\\\\\'
99
+ end
100
+ end
101
+ else
102
+ output3.puts 'There are not rejected sequences\\\\'
103
+ end
104
+
105
+ output3.close
106
+
107
+ puts "Information about rejected sequences was added to the report"
108
+ end
109
+
110
+ def load_plugins_info(stats, rejected_hash, input_seqs, plugin_fix_hash)
111
+ data_hash = {}
112
+
113
+ stats['sequences']['rejected'].each do |rejected|
114
+ data_hash = {}
115
+ if plugin_fix_hash[rejected[0]]
116
+ data_hash['name'] = plugin_fix_hash[rejected[0]]['name']
117
+ data_hash['value'] = rejected[1]
118
+ data_hash['warning'] = 'OK'
119
+ data_hash['warning_msg'] = ''
120
+ data_hash['percent'] = sprintf("%0.3f", (rejected[1].to_f*100/input_seqs.to_f))
121
+ rejected_hash[rejected[0]]=data_hash
122
+ end
123
+ end
124
+
125
+ rejected_hash.each_key do |key|
126
+ if (rejected_hash[key]['percent'].to_f >= plugin_fix_hash[key]['threshold'] )
127
+ rejected_hash[key]['warning'] = plugin_fix_hash[key]['warning']
128
+ rejected_hash[key]['warning_msg'] = plugin_fix_hash[key]['msg']
129
+ end
130
+ end
131
+
132
+ return rejected_hash
133
+ end
134
+
135
+ def write_plugin_json
136
+
137
+ plugin_fix_hash = {}
138
+ msgs_hash = {}
139
+
140
+ msgs_hash['msg'] = "Warning!, there are a my_percent \\% of repeated sequences"
141
+ msgs_hash['threshold'] = 9
142
+ msgs_hash['warning'] = 'W1'
143
+
144
+ plugin_fix_hash['repeated'] = msgs_hash
145
+ msgs_hash = {}
146
+
147
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are too short"
148
+ msgs_hash['threshold'] = 10
149
+ msgs_hash['warning'] = 'W2'
150
+
151
+ plugin_fix_hash['short insert'] = msgs_hash
152
+ msgs_hash = {}
153
+
154
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are empty (without an insert)"
155
+ msgs_hash['warning'] = 'W3'
156
+ msgs_hash['threshold'] = 1
157
+
158
+ plugin_fix_hash['empty insert'] = msgs_hash
159
+ msgs_hash = {}
160
+
161
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are from a contaminant organism or from organelles"
162
+ msgs_hash['warning'] = 'W4'
163
+ msgs_hash['threshold'] = 1
164
+
165
+ plugin_fix_hash['contaminated'] = msgs_hash
166
+ msgs_hash = {}
167
+
168
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are no valid sequences"
169
+ msgs_hash['threshold'] = 0.1
170
+ msgs_hash['warning'] = 'W5'
171
+
172
+ plugin_fix_hash['No valid inserts found'] = msgs_hash
173
+ msgs_hash = {}
174
+
175
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are low complexity sequences"
176
+ msgs_hash['warning'] = 'W6'
177
+ msgs_hash['threshold'] = 1
178
+
179
+ plugin_fix_hash['low complexity by polyt'] = msgs_hash
180
+ msgs_hash = {}
181
+
182
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences contain a vector in an unexpected position"
183
+ msgs_hash['warning'] = 'W7'
184
+ msgs_hash['threshold'] = 1
185
+
186
+ plugin_fix_hash['unexpected vector'] = msgs_hash
187
+ msgs_hash = {}
188
+
189
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences contain too much indeterminations"
190
+ msgs_hash['threshold'] = 0.1
191
+ msgs_hash['warning'] = 'W8'
192
+
193
+ plugin_fix_hash['Indeterminations in middle of sequence'] = msgs_hash
194
+ msgs_hash = {}
195
+
196
+ msgs_hash['msg'] = "WT Warning!, a my_percent \\% of your sequences were rejected!"
197
+ msgs_hash['threshold'] = 25
198
+ msgs_hash['warning'] = 'WT'
199
+
200
+ plugin_fix_hash['rejected'] = msgs_hash
201
+ msgs_hash = {}
202
+
203
+ puts JSON.pretty_generate(plugin_fix_hash)
204
+
205
+ end
206
+
207
+ end