seqtrimnext_report 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ === 0.0.2 2011-06-15
2
+
3
+ Some small fixes
4
+
5
+ === 0.0.1 2011-06-14
6
+
7
+ * Initial release
@@ -0,0 +1,24 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/seqtrimnext_report.rb
7
+ script/console
8
+ script/destroy
9
+ script/generate
10
+ test/test_helper.rb
11
+ test/test_seqtrimnext_report.rb
12
+ bin/generate_report.rb
13
+ lib/seqtrimnext_report/classes/params_report.rb
14
+ lib/seqtrimnext_report/classes/rejected_report.rb
15
+ lib/seqtrimnext_report/classes/stats_report.rb
16
+ lib/seqtrimnext_report/config/plugin_nts.json
17
+ lib/seqtrimnext_report/config/plugin_seqs.json
18
+ lib/seqtrimnext_report/latex_src/input_graph.tex
19
+ lib/seqtrimnext_report/latex_src/main.tex
20
+ lib/seqtrimnext_report/latex_src/output_files.tex
21
+ lib/seqtrimnext_report/latex_src/output_graph.tex
22
+ lib/seqtrimnext_report/latex_src/piescbi.jpg
23
+ lib/seqtrimnext_report/latex_src/qv_graph.tex
24
+ lib/seqtrimnext_report/latex_src/ref_seqs.png
@@ -0,0 +1,7 @@
1
+
2
+ For more information on seqtrimnext_report, see http://seqtrimnext_report.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
@@ -0,0 +1,49 @@
1
+ = seqtrimnext_report
2
+
3
+ * http://www.scbi.uma.es/downloads
4
+
5
+ == DESCRIPTION:
6
+
7
+ A PDF report generator for SeqtrimNEXT preprocessing software from SCBI.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * Generates an eye-catching PDF report with the output produced by SeqtrimNEXT
12
+
13
+ == SYNOPSIS:
14
+
15
+ generate_report.rb seqtrim_output_folder
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * seqtrimnext gem
20
+ * pdflatex binary
21
+
22
+ == INSTALL:
23
+
24
+ gem install seqtrimnext_report
25
+
26
+ == LICENSE:
27
+
28
+ (The MIT License)
29
+
30
+ Copyright (c) 2011 FIXME full name
31
+
32
+ Permission is hereby granted, free of charge, to any person obtaining
33
+ a copy of this software and associated documentation files (the
34
+ 'Software'), to deal in the Software without restriction, including
35
+ without limitation the rights to use, copy, modify, merge, publish,
36
+ distribute, sublicense, and/or sell copies of the Software, and to
37
+ permit persons to whom the Software is furnished to do so, subject to
38
+ the following conditions:
39
+
40
+ The above copyright notice and this permission notice shall be
41
+ included in all copies or substantial portions of the Software.
42
+
43
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
44
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
45
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
46
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
47
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
48
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
49
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/seqtrimnext_report'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'seqtrimnext_report' do
14
+ self.developer 'Noe Fernandez & Dario Guerrero', 'noefp@gmail.com, dariogf@gmail.com'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ self.extra_deps = [['seqtrimnext','>= 2.0.31']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ task :default => [:spec, :featuresm, :redocs]
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Noe Fdez Pozo 2011-05-11. To build a PDF with latex parsing SeqTrimNext output files
4
+
5
+ #---------------------------------------------- para indicar donde estan las clases
6
+ ROOT_PATH=File.dirname(__FILE__)
7
+ # $: << File.expand_path(File.join(ROOT_PATH, "classes"))
8
+ # $: << '/Users/dariogf/progs/ruby/gems/seqtrimnext/lib'
9
+
10
+ #---------------------------------------------- gems
11
+ require 'json'
12
+ #---------------------------------------------- classes
13
+
14
+ require 'seqtrimnext'
15
+ require 'seqtrimnext_report'
16
+ require 'params'
17
+ require 'scbi_stats'
18
+ require 'params_report'
19
+ require 'stats_report'
20
+ require 'rejected_report'
21
+
22
+ #---------------------------------------------- method to parse a json
23
+ def get_json_data(file)
24
+
25
+ data=nil
26
+
27
+ if File.exists?(file)
28
+ file1 = File.open(file)
29
+ text = file1.read
30
+ file1.close
31
+
32
+ text=text.gsub(/^\s*#.*$/,'').gsub(/^\n$/,'')
33
+ if !text.nil? && !text.empty?
34
+ data = JSON.parse(text)
35
+ end
36
+ end
37
+
38
+ return data
39
+ end
40
+
41
+ if ARGV.count!=1
42
+ puts "Usage: #{File.basename($0)} output_files_folder"
43
+ exit(-1)
44
+ end
45
+
46
+ #---------------------------------------------- check if files exists
47
+ output_files=ARGV.shift
48
+ if !Dir.exists?(output_files)
49
+ puts "Directory #{output_files} doesn't exists"
50
+ exit(-1)
51
+ end
52
+
53
+ if !File.exist?(File.join(output_files,'used_params.txt'))
54
+ puts "File used_params.txt not found.\n"
55
+ exit(-1)
56
+ elsif !File.exist?(File.join(output_files,'initial_stats.json'))
57
+ puts "File initial_stats.json not found.\n"
58
+ exit(-1)
59
+ elsif !File.exist?(File.join(output_files,'stats.json'))
60
+ puts "File stats.json not found.\n"
61
+ exit(-1)
62
+ end
63
+
64
+ puts "Generating report"
65
+ puts "="*50
66
+ puts "used_params.txt, initial_stats.json and stats.json files were found"
67
+
68
+
69
+ #---------------------------------------------- MAIN --------------------------------
70
+ begin
71
+
72
+ # load used params
73
+ all_params=Params.new(File.join(output_files,'used_params.txt'))
74
+
75
+ # load initial and final stats
76
+ initial_stats = get_json_data(File.join(output_files,'initial_stats.json'))
77
+ stats = get_json_data(File.join(output_files,'stats.json'))
78
+
79
+ # load json configuration
80
+ plugin_fix_hash = get_json_data(File.join(File.dirname(__FILE__),'..','lib','seqtrimnext_report','config','plugin_seqs.json'))
81
+ plugin_nts_hash = get_json_data(File.join(File.dirname(__FILE__),'..','lib','seqtrimnext_report','config','plugin_nts.json'))
82
+
83
+ output_latex=File.join(output_files,'latex')
84
+
85
+ # copy latex required files to output_folder
86
+ `cp -r #{File.join(File.dirname(__FILE__),'..','lib','seqtrimnext_report','latex_src')} #{output_latex}`
87
+ `cp -r #{File.join(output_files,'..','graphs')} #{output_latex}`
88
+
89
+ #---------------------------------------------- Parameters
90
+ ParamsReport.new(all_params,output_files, output_latex)
91
+
92
+ #---------------------------------------------- Statistics
93
+ StatsReport.new(all_params,initial_stats,stats,plugin_nts_hash, output_files, output_latex)
94
+
95
+ #---------------------------------------------- Rejected
96
+ RejectedReport.new(stats,plugin_fix_hash,output_files, output_latex)
97
+
98
+ #---------------------------------------------- Build pdf
99
+ # system('pdflatex main.tex')
100
+ cmd="pushd .; cd #{output_latex}; pdflatex -halt-on-error main.tex; popd"
101
+ puts "Running pdflatex: #{cmd}"
102
+
103
+ # it must be repeated to solve links in latex
104
+ `#{cmd}`
105
+ `#{cmd}`
106
+
107
+ if File.exists?(File.join(output_latex,'main.pdf'))
108
+ `cp #{File.join(output_latex,'main.pdf')} #{File.join(output_files,'statistics_report.pdf')}`
109
+ `zip -r #{File.join(output_files,'latex.zip')} #{output_latex}`
110
+ `rm -r #{output_latex}`
111
+ end
112
+
113
+ rescue
114
+
115
+ puts "output PDF couldn't be created for this dataset"
116
+
117
+ end
118
+ # system(cmd)
@@ -0,0 +1,12 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ # require 'seqtrimnext'
5
+
6
+ ROOT_PATH=File.join(File.dirname(__FILE__),'seqtrimnext_report')
7
+
8
+ $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
9
+
10
+ module SeqtrimnextReport
11
+ VERSION = '0.0.2'
12
+ end
@@ -0,0 +1,84 @@
1
+ class ParamsReport
2
+
3
+ def initialize(all_params,output_files, output_latex)
4
+
5
+ # all_params.print_parameters
6
+
7
+ params_in_pdf = {}
8
+
9
+ params_in_pdf['general']=['plugin_list','remove_clonality','min_insert_size_trimmed','min_insert_size_paired','seqtrim_version','min_sequence_size_raw']
10
+ params_in_pdf['quality']=['min_quality','window_width']
11
+ params_in_pdf['contaminants']=['blast_evalue_contaminants','blast_percent_contaminants','min_contam_seq_presence','genus','contaminants_reject','contaminants_db']
12
+
13
+ output=File.open(File.join(output_latex,'UsedParams.tex'), 'w')
14
+ output.puts "%!TEX root = FinalReport.tex"
15
+
16
+ output.puts '\subsection{General}'
17
+ print_each_group_of_params(output,all_params,params_in_pdf,'general')
18
+
19
+ if (all_params.get_param('min_quality'))
20
+ output.puts '\subsection{Quality}'
21
+ print_each_group_of_params(output,all_params,params_in_pdf,'quality')
22
+ end
23
+
24
+ if (all_params.get_param('blast_evalue_contaminants'))
25
+ output.puts '\subsection{Contaminants}'
26
+ print_each_group_of_params(output,all_params,params_in_pdf,'contaminants')
27
+ end
28
+
29
+ output.close
30
+
31
+ puts "used parameters information was added to the report"
32
+
33
+ end
34
+
35
+ def print_each_group_of_params(output,all_params,params_in_pdf,my_group)
36
+
37
+ params_in_pdf[my_group].each do |param_name|
38
+
39
+ if !all_params.get_comment('params',param_name).nil?
40
+ description = all_params.get_comment('params',param_name).last
41
+
42
+ description.gsub!('%','\%')
43
+ description.gsub!('_','\_')
44
+
45
+ if description =~/^\#\#+/
46
+ description = ''
47
+ end
48
+ else
49
+ description = ''
50
+ end
51
+
52
+ value = all_params.get_param(param_name)
53
+
54
+ if (param_name == 'plugin_list')
55
+ puts value
56
+ values = value.split(",")
57
+ output.puts "#{description}"+'\\\\'
58
+
59
+ count = 0
60
+ values.each do |plugin|
61
+ count += 1
62
+ output.puts '\indent'+"#{count}\. #{plugin}"+'\\\\'
63
+ end
64
+
65
+ output.puts '\\\\'
66
+ elsif (param_name == 'contaminants_db')
67
+ value.gsub!('"','')
68
+ values = value.split(" ")
69
+ output.puts "#{description}"+'\\\\'
70
+ values.each do |plugin|
71
+ plugin = File.basename(plugin)
72
+ plugin.gsub!('_','\_')
73
+ output.puts '\indent \texttt{'+" #{plugin}"+'}\\\\'
74
+ end
75
+ else
76
+ param_name.gsub!('_','\_')
77
+ output.puts "#{description}"+'\\\\'
78
+ output.puts '\indent \texttt{'+"#{param_name}: #{value}"+'}\\\\'
79
+ end
80
+ end
81
+ end
82
+
83
+ end
84
+
@@ -0,0 +1,207 @@
1
+ class RejectedReport
2
+
3
+ def initialize(stats,plugin_fix_hash,output_files, output_latex)
4
+
5
+ # write_plugin_json
6
+
7
+ output3=File.open(File.join(output_latex,'rejected.tex'), 'w')
8
+ output3.puts "%!TEX root = FinalReport.tex\n\n"
9
+
10
+ input_seqs = stats['sequences']['count']['input_count'].to_i
11
+ rejected_seqs = stats['sequences']['count']['rejected'].to_i
12
+ output_seqs = stats['sequences']['count']['output_seqs'].to_i
13
+
14
+ output_seqs_paired = 0
15
+ total_output_seqs = 0
16
+ if (!stats['sequences']['count']['output_seqs_paired'].nil?)
17
+ output_seqs_paired = stats['sequences']['count']['output_seqs_paired'].to_i # solo cuando hay pareadas
18
+ total_output_seqs = output_seqs_paired+output_seqs
19
+ end
20
+ low_complex = 0
21
+ if (!stats['sequences']['count']['output_seqs_low_complexity'].nil?)
22
+ low_complex = stats['sequences']['count']['output_seqs_low_complexity'].to_i # no hay cuando es genomico
23
+ end
24
+
25
+ rejected_hash = {}
26
+ data_hash = {}
27
+
28
+ data_hash['value'] = rejected_seqs
29
+ data_hash['warning'] = 'OK'
30
+ data_hash['warning_msg'] = ''
31
+ data_hash['percent'] = sprintf("%0.3f", (rejected_seqs.to_f*100/input_seqs.to_f))
32
+ rejected_hash['rejected']=data_hash
33
+
34
+ if (!stats['sequences']['rejected'].nil?)
35
+ rejected_hash = load_plugins_info(stats, rejected_hash, input_seqs, plugin_fix_hash)
36
+
37
+ #-------------------------------------------------- build table
38
+ output3.puts '\begin{table}[H]'
39
+ output3.puts '\begin{center}'
40
+ output3.puts '\begin{tabular}{r r}'
41
+
42
+ output3.puts "Input sequences & #{input_seqs}\\\\"
43
+ output3.puts "Output sequences & #{output_seqs}\\\\"
44
+ output3.puts "Rejected sequences & #{rejected_seqs}\\\\"
45
+ if (output_seqs_paired != 0)
46
+ output3.puts "Output paired sequences & #{output_seqs_paired} \\\\"
47
+ output3.puts "Total output sequences & #{total_output_seqs} \\\\"
48
+ end
49
+ if (low_complex != 0)
50
+ output3.puts "Low complexity sequences & #{low_complex} \\\\"
51
+ end
52
+
53
+ output3.puts '\end{tabular}'
54
+ output3.puts '\label{table:input_seqs}'
55
+ output3.puts '\end{center}'
56
+ output3.puts '\end{table}'+"\n\n"
57
+ #-------------------------------------------------- end table
58
+
59
+ #-------------------------------------------------- build table
60
+ output3.puts '\begin{table}[H]'
61
+ output3.puts '\caption{Summary of reads removed in every plugin.}'
62
+ output3.puts '\begin{center}'
63
+ output3.puts '\begin{tabular}{l r r c}'
64
+ output3.puts '\hline'
65
+ output3.puts 'Case & Number of sequences & Percent & Warnings \\\\ [0.5ex]'
66
+ output3.puts '\hline'
67
+
68
+ #the hash of hashes is ordered by value (number of sequences rejected)
69
+ rejected_ordered = rejected_hash.sort {|a,b| b[1]['value']<=>a[1]['value']}
70
+
71
+ rejected_ordered.each do |plugin|
72
+
73
+ my_name = plugin[1]['name']
74
+ my_value = plugin[1]['value']
75
+ my_percent = plugin[1]['percent']
76
+ my_warning = plugin[1]['warning']
77
+
78
+ if (plugin[0] != 'rejected')
79
+ if (plugin[0] == 'Indeterminations in middle of sequence')
80
+ plugin[0] = plugin[0].sub(' in middle of sequence','')
81
+ end
82
+ output3.puts "#{my_name}&#{my_value}&#{my_percent} \\%&#{my_warning}\\\\"
83
+ end
84
+ end
85
+
86
+ output3.puts '\hline'
87
+ output3.puts "Total rejected&#{rejected_hash['rejected']['value']}&#{rejected_hash['rejected']['percent']} \\%&#{rejected_hash['rejected']['warning']}\\\\ [1ex]"
88
+ output3.puts '\hline'
89
+ output3.puts '\end{tabular}'
90
+ output3.puts '\end{center}'
91
+ output3.puts '\label{table:reads_removed}'
92
+ output3.puts '\end{table}'+"\n\n"
93
+ #-------------------------------------------------- end table
94
+
95
+ rejected_ordered.each do |plugin|
96
+ if (plugin[1]['warning'] != 'OK')
97
+ plugin[1]['warning_msg'].gsub!('my_percent',"#{rejected_hash["#{plugin[0]}"]['percent']}")
98
+ output3.puts '\noindent\fcolorbox{black}{yellow}{'+"\n"+'\begin{minipage}{\linewidth}{'+"\n"+'\textbf{'+"#{plugin[1]['warning']} #{plugin[1]['warning_msg']}"+'}'+"\n"+'}'+"\n"+'\end{minipage}'+"\n"+'}\\\\\\\\'
99
+ end
100
+ end
101
+ else
102
+ output3.puts 'There are not rejected sequences\\\\'
103
+ end
104
+
105
+ output3.close
106
+
107
+ puts "Information about rejected sequences was added to the report"
108
+ end
109
+
110
+ def load_plugins_info(stats, rejected_hash, input_seqs, plugin_fix_hash)
111
+ data_hash = {}
112
+
113
+ stats['sequences']['rejected'].each do |rejected|
114
+ data_hash = {}
115
+ if plugin_fix_hash[rejected[0]]
116
+ data_hash['name'] = plugin_fix_hash[rejected[0]]['name']
117
+ data_hash['value'] = rejected[1]
118
+ data_hash['warning'] = 'OK'
119
+ data_hash['warning_msg'] = ''
120
+ data_hash['percent'] = sprintf("%0.3f", (rejected[1].to_f*100/input_seqs.to_f))
121
+ rejected_hash[rejected[0]]=data_hash
122
+ end
123
+ end
124
+
125
+ rejected_hash.each_key do |key|
126
+ if (rejected_hash[key]['percent'].to_f >= plugin_fix_hash[key]['threshold'] )
127
+ rejected_hash[key]['warning'] = plugin_fix_hash[key]['warning']
128
+ rejected_hash[key]['warning_msg'] = plugin_fix_hash[key]['msg']
129
+ end
130
+ end
131
+
132
+ return rejected_hash
133
+ end
134
+
135
+ def write_plugin_json
136
+
137
+ plugin_fix_hash = {}
138
+ msgs_hash = {}
139
+
140
+ msgs_hash['msg'] = "Warning!, there are a my_percent \\% of repeated sequences"
141
+ msgs_hash['threshold'] = 9
142
+ msgs_hash['warning'] = 'W1'
143
+
144
+ plugin_fix_hash['repeated'] = msgs_hash
145
+ msgs_hash = {}
146
+
147
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are too short"
148
+ msgs_hash['threshold'] = 10
149
+ msgs_hash['warning'] = 'W2'
150
+
151
+ plugin_fix_hash['short insert'] = msgs_hash
152
+ msgs_hash = {}
153
+
154
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are empty (without an insert)"
155
+ msgs_hash['warning'] = 'W3'
156
+ msgs_hash['threshold'] = 1
157
+
158
+ plugin_fix_hash['empty insert'] = msgs_hash
159
+ msgs_hash = {}
160
+
161
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are from a contaminant organism or from organelles"
162
+ msgs_hash['warning'] = 'W4'
163
+ msgs_hash['threshold'] = 1
164
+
165
+ plugin_fix_hash['contaminated'] = msgs_hash
166
+ msgs_hash = {}
167
+
168
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are no valid sequences"
169
+ msgs_hash['threshold'] = 0.1
170
+ msgs_hash['warning'] = 'W5'
171
+
172
+ plugin_fix_hash['No valid inserts found'] = msgs_hash
173
+ msgs_hash = {}
174
+
175
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences are low complexity sequences"
176
+ msgs_hash['warning'] = 'W6'
177
+ msgs_hash['threshold'] = 1
178
+
179
+ plugin_fix_hash['low complexity by polyt'] = msgs_hash
180
+ msgs_hash = {}
181
+
182
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences contain a vector in an unexpected position"
183
+ msgs_hash['warning'] = 'W7'
184
+ msgs_hash['threshold'] = 1
185
+
186
+ plugin_fix_hash['unexpected vector'] = msgs_hash
187
+ msgs_hash = {}
188
+
189
+ msgs_hash['msg'] = "Warning!, a my_percent \\% of your sequences contain too much indeterminations"
190
+ msgs_hash['threshold'] = 0.1
191
+ msgs_hash['warning'] = 'W8'
192
+
193
+ plugin_fix_hash['Indeterminations in middle of sequence'] = msgs_hash
194
+ msgs_hash = {}
195
+
196
+ msgs_hash['msg'] = "WT Warning!, a my_percent \\% of your sequences were rejected!"
197
+ msgs_hash['threshold'] = 25
198
+ msgs_hash['warning'] = 'WT'
199
+
200
+ plugin_fix_hash['rejected'] = msgs_hash
201
+ msgs_hash = {}
202
+
203
+ puts JSON.pretty_generate(plugin_fix_hash)
204
+
205
+ end
206
+
207
+ end