seqtrimnext_report 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/Manifest.txt +24 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +49 -0
- data/Rakefile +26 -0
- data/bin/generate_report.rb +118 -0
- data/lib/seqtrimnext_report.rb +12 -0
- data/lib/seqtrimnext_report/classes/params_report.rb +84 -0
- data/lib/seqtrimnext_report/classes/rejected_report.rb +207 -0
- data/lib/seqtrimnext_report/classes/stats_report.rb +323 -0
- data/lib/seqtrimnext_report/config/plugin_nts.json +65 -0
- data/lib/seqtrimnext_report/config/plugin_seqs.json +69 -0
- data/lib/seqtrimnext_report/latex_src/input_graph.tex +21 -0
- data/lib/seqtrimnext_report/latex_src/main.tex +111 -0
- data/lib/seqtrimnext_report/latex_src/output_files.tex +29 -0
- data/lib/seqtrimnext_report/latex_src/output_graph.tex +22 -0
- data/lib/seqtrimnext_report/latex_src/piescbi.jpg +0 -0
- data/lib/seqtrimnext_report/latex_src/qv_graph.tex +21 -0
- data/lib/seqtrimnext_report/latex_src/ref_seqs.png +0 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext_report.rb +11 -0
- metadata +103 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
%!TEX root = FinalReport.tex
|
2
|
+
\IfFileExists{graphs/size_stats.png}{
|
3
|
+
|
4
|
+
\begin{figure}[H]
|
5
|
+
\begin{minipage}{\textwidth}
|
6
|
+
|
7
|
+
The following figure shows the size distribution of reads of input data
|
8
|
+
|
9
|
+
\begin{center}
|
10
|
+
\includegraphics[width=0.95\textwidth]{graphs/size_stats.png}
|
11
|
+
\includegraphics[width=0.9\textwidth]{ref_seqs.png}
|
12
|
+
% \caption{Distribution of your input sequences (Up). Reference 1 (Down left). Reference 2 (Down right)}
|
13
|
+
\caption{Upper plot: Size distribution of the reads analysed by SeqTrimNext. If they come from GS-FLX (454 technology), the profile should be close to the one shown in the bottom image, where an example of an appropriate read length distribution is shown. Peak position (modal read length) will depend on the pyrosequencing technology used. [size\_stats.png]}
|
14
|
+
\label{input_graph}
|
15
|
+
\end{center}
|
16
|
+
\end{minipage}
|
17
|
+
\end{figure}
|
18
|
+
}
|
19
|
+
{
|
20
|
+
|
21
|
+
}
|
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
% Created by noefp on 2011-5-10.
|
3
|
+
|
4
|
+
%-------------------------------- define latex stile
|
5
|
+
\documentclass[11pt,oneside,a4paper]{article}
|
6
|
+
|
7
|
+
% Use utf-8 encoding for foreign characters y para las tildes
|
8
|
+
\usepackage[utf8]{inputenc}
|
9
|
+
% el paquete Babel que sirve para separar correctamente las palabras de multitud de idiomas%%%
|
10
|
+
\usepackage[english,activeacute]{babel}
|
11
|
+
|
12
|
+
% para colorear el texto
|
13
|
+
\usepackage{xcolor}
|
14
|
+
\usepackage{colortbl}
|
15
|
+
|
16
|
+
% reconoce las URL marcadas con \url{}
|
17
|
+
\usepackage{url}
|
18
|
+
% convierte las URL en hiperenlaces reales
|
19
|
+
\usepackage{hyperref}
|
20
|
+
|
21
|
+
% para incluir imagenes
|
22
|
+
\usepackage{eso-pic}
|
23
|
+
\usepackage{graphicx}
|
24
|
+
|
25
|
+
% Surround parts of graphics with box
|
26
|
+
\usepackage{boxedminipage}
|
27
|
+
|
28
|
+
% para que la figura o la tabla se inserten donde se dice
|
29
|
+
\usepackage{float}
|
30
|
+
|
31
|
+
% para que no indente
|
32
|
+
% \setlength{\parindent}{0in}
|
33
|
+
%------------------------------------------ imagen en el pie de cada pagina
|
34
|
+
\newcommand{\fondoPie}{
|
35
|
+
\ClearShipoutPicture % Borra el fondo
|
36
|
+
|
37
|
+
% añade fondo para todas las páginas
|
38
|
+
\AddToShipoutPicture{
|
39
|
+
\includegraphics[width=\paperwidth]{piescbi.jpg}
|
40
|
+
}
|
41
|
+
}
|
42
|
+
%-------------------------------------------------------------------------------------------------------
|
43
|
+
%-------------------------------------------------------------------------------------------------------
|
44
|
+
|
45
|
+
%-------------------------------- begin document
|
46
|
+
\begin{document}
|
47
|
+
|
48
|
+
%-------------------------------- front page
|
49
|
+
\title{
|
50
|
+
\Huge{\textbf{Seq\color{red}T\color{black}rim\color{orange}Next\color{black}\\ Statistics of pre-processing}}
|
51
|
+
\newline
|
52
|
+
}
|
53
|
+
|
54
|
+
\author{\textbf{Plataforma Andaluza de Bioinformática}\\
|
55
|
+
\small Universidad de Málaga\\}
|
56
|
+
|
57
|
+
\date{\today}
|
58
|
+
|
59
|
+
% \fondoTitulo
|
60
|
+
|
61
|
+
% \cabeceraTitulo
|
62
|
+
\maketitle
|
63
|
+
|
64
|
+
% pone el fondo de pie de página
|
65
|
+
\fondoPie
|
66
|
+
\newpage
|
67
|
+
%-------------------------------- output files
|
68
|
+
\section{Output Files}
|
69
|
+
\label{sec:files}
|
70
|
+
|
71
|
+
\input{output_files}
|
72
|
+
|
73
|
+
%-------------------------------- Parameters
|
74
|
+
\section{Relevant parameters}
|
75
|
+
\label{sec:params}
|
76
|
+
|
77
|
+
In this section, the relevant parameters used in your experiment are shown. Full information about the parameters can be obtained from file \texttt{used\_params.txt}
|
78
|
+
|
79
|
+
% introducimos el texto de un fichero
|
80
|
+
\input{UsedParams}
|
81
|
+
|
82
|
+
|
83
|
+
%-------------------------------- statistics
|
84
|
+
% \newpage
|
85
|
+
\section{Pre-processing statistics}
|
86
|
+
\label{sec:summary}
|
87
|
+
|
88
|
+
|
89
|
+
% introducimos de un fichero
|
90
|
+
\input{stats}
|
91
|
+
|
92
|
+
%-------------------------------- Rejected sequences
|
93
|
+
\section{Rejected reads}
|
94
|
+
\label{sec:rejected}
|
95
|
+
|
96
|
+
\input{rejected}
|
97
|
+
|
98
|
+
%-------------------------------- bibliografia
|
99
|
+
\begin{thebibliography}{100}
|
100
|
+
\bibitem{seqtrim} Falgueras et al. SeqTrim: a high-throughput pipeline for preprocessing any type of sequence reads. \emph{BMC Bioinformatics} 11:38 (2010)
|
101
|
+
|
102
|
+
\bibitem{cdhit} Weizhong Li \& Adam Godzik. Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences. \emph{Bioinformatics} (2006) 22:1658-9
|
103
|
+
|
104
|
+
\end{thebibliography}
|
105
|
+
|
106
|
+
%-------------------------------- contact
|
107
|
+
Thanks you for use Seq\color{red}T\color{black}rim\color{orange}Next\color{black}!
|
108
|
+
Send us any comment to \href{mailto:soporte@scbi.uma.es}{scbi support}
|
109
|
+
|
110
|
+
%-------------------------------- end document
|
111
|
+
\end{document}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
%!TEX root = FinalReport.tex
|
2
|
+
|
3
|
+
|
4
|
+
\noindent SeqTrimNext provides several files, the most interesting ones are in the following directories:
|
5
|
+
|
6
|
+
\begin{itemize}
|
7
|
+
\item{output\_files}
|
8
|
+
\begin{itemize}
|
9
|
+
\item \small {\texttt{output.less}, containing an extensive information about the trimming of each sequence. It can be visualised on terminal using the command \texttt{less -R}.}
|
10
|
+
\item \small {\texttt{used\_params.txt}, containing the complete set of parameters used for execution of SeqTrimNext with your data}
|
11
|
+
\item \small {\texttt{rejected.txt}, containing a list of rejected sequences together with the reason for their removal.}
|
12
|
+
\item \small {\texttt{initial\_stats.json}, containing statistics for raw sequences.}
|
13
|
+
\item \small {\texttt{stats.json}, containing the statistics of the cleaning process.}
|
14
|
+
\item \small {There is a collection of \texttt{folders} that gather sequences with the same MID; each folder contains a \texttt{sequences} file (in \textsc{FastQ} format) with useful reads. There may also exists a file with reads containing low complexity regions. If you want to reconstruct a \textsc{SFF} with the useful segment of each pre-processed read, use \texttt{sff\_info} file in combination with the original \textsc{SFF} file for the \texttt{sfffile} tool.}
|
15
|
+
\end{itemize}
|
16
|
+
\item{graphs}
|
17
|
+
\begin{itemize}
|
18
|
+
\item \small {\texttt{size\_stats.png}, a graph with the distribution of read lengths in raw data (see Fig. \ref{input_graph}).}
|
19
|
+
\item \small {\texttt{qualities.png}, a graph to inspect read qualities in raw data (see Fig. \ref{qv_graph}).}
|
20
|
+
\item \small {\texttt{PluginExtractInserts\_insert\_size.png}, a graph with the distribution of read lengths after SeqTrimNext pre-processing (see Fig. \ref{output_graph}).}
|
21
|
+
\item \small {There are other graphs (mostly bar plots) that illustrate the quality of pre-processed reads. All are in PNG format.}
|
22
|
+
\end{itemize}
|
23
|
+
\item {latex}
|
24
|
+
\begin{itemize}
|
25
|
+
\item \small {It is provided as a compressed file \texttt{latex.zip} containing all ``.tex'' files required to compile this document. Graphs are taken from the \texttt{graph} folder}
|
26
|
+
\end{itemize}
|
27
|
+
\end{itemize}
|
28
|
+
|
29
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
%!TEX root = FinalReport.tex
|
2
|
+
|
3
|
+
\IfFileExists{graphs/PluginExtractInserts_insert_size.png}{
|
4
|
+
\begin{figure}[H]
|
5
|
+
\begin{minipage}{\textwidth}
|
6
|
+
|
7
|
+
Next figure is equivalent to Figure \ref{input_graph} but using output reads (useful sequences). The mode is expected to decrease but the shape of the plot should be similar.
|
8
|
+
|
9
|
+
\begin{center}
|
10
|
+
\includegraphics[width=0.9\textwidth]{graphs/PluginExtractInserts_insert_size.png}
|
11
|
+
\caption{Size distribution of the output sequences. Short sequences ($<\texttt{min\_insert\_size\_trimmed}$) were removed. [PluginExtractInserts\_insert\_size.png]}
|
12
|
+
\label{output_graph}
|
13
|
+
\end{center}
|
14
|
+
\end{minipage}
|
15
|
+
\end{figure}
|
16
|
+
|
17
|
+
Summary statistics of the SeqTrimNext analysis. Be careful and read all warnings that are indicating concerns about your data.
|
18
|
+
In the files \texttt{initial\_stats.json} and \texttt{stats.json} can be found a full statistics of your data and SeqTrimNext pre-processing
|
19
|
+
}
|
20
|
+
{
|
21
|
+
|
22
|
+
}
|
Binary file
|
@@ -0,0 +1,21 @@
|
|
1
|
+
%!TEX root = FinalReport.tex
|
2
|
+
|
3
|
+
|
4
|
+
\IfFileExists{graphs/qualities.png}{
|
5
|
+
\begin{figure}[H]
|
6
|
+
\begin{minipage}{\textwidth}
|
7
|
+
|
8
|
+
Next figure is illustrating the distribution of quality values (QV) for each position on the reads from the input data.
|
9
|
+
|
10
|
+
\begin{center}
|
11
|
+
\includegraphics[width=0.9\textwidth]{graphs/qualities.png}
|
12
|
+
\caption{Distribution of the QV by position in the read. The useful part of sequences correspond to a \color{red}mean QV\color{black} $>20$. You can also see the \color{blue}maximum QV\color{black} (that should be $\sim40$) and the \color{green}minimum QV\color{black}. When sequences are becoming very bad, \color{green}minimum QV\color{black} $>0$ [qualities.png]}
|
13
|
+
\label{qv_graph}
|
14
|
+
\end{center}
|
15
|
+
|
16
|
+
\end{minipage}
|
17
|
+
\end{figure}
|
18
|
+
}
|
19
|
+
{
|
20
|
+
|
21
|
+
}
|
Binary file
|
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/seqtrimnext_report.rb'}"
|
9
|
+
puts "Loading seqtrimnext_report gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: seqtrimnext_report
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.2
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Noe Fernandez & Dario Guerrero
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-06-15 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: seqtrimnext
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 2.0.31
|
24
|
+
type: :runtime
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: hoe
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: 2.8.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id002
|
37
|
+
description: A PDF report generator for SeqtrimNEXT preprocessing software from SCBI.
|
38
|
+
email:
|
39
|
+
- noefp@gmail.com, dariogf@gmail.com
|
40
|
+
executables:
|
41
|
+
- generate_report.rb
|
42
|
+
extensions: []
|
43
|
+
|
44
|
+
extra_rdoc_files:
|
45
|
+
- History.txt
|
46
|
+
- Manifest.txt
|
47
|
+
- PostInstall.txt
|
48
|
+
files:
|
49
|
+
- History.txt
|
50
|
+
- Manifest.txt
|
51
|
+
- PostInstall.txt
|
52
|
+
- README.rdoc
|
53
|
+
- Rakefile
|
54
|
+
- lib/seqtrimnext_report.rb
|
55
|
+
- script/console
|
56
|
+
- script/destroy
|
57
|
+
- script/generate
|
58
|
+
- test/test_helper.rb
|
59
|
+
- test/test_seqtrimnext_report.rb
|
60
|
+
- bin/generate_report.rb
|
61
|
+
- lib/seqtrimnext_report/classes/params_report.rb
|
62
|
+
- lib/seqtrimnext_report/classes/rejected_report.rb
|
63
|
+
- lib/seqtrimnext_report/classes/stats_report.rb
|
64
|
+
- lib/seqtrimnext_report/config/plugin_nts.json
|
65
|
+
- lib/seqtrimnext_report/config/plugin_seqs.json
|
66
|
+
- lib/seqtrimnext_report/latex_src/input_graph.tex
|
67
|
+
- lib/seqtrimnext_report/latex_src/main.tex
|
68
|
+
- lib/seqtrimnext_report/latex_src/output_files.tex
|
69
|
+
- lib/seqtrimnext_report/latex_src/output_graph.tex
|
70
|
+
- lib/seqtrimnext_report/latex_src/piescbi.jpg
|
71
|
+
- lib/seqtrimnext_report/latex_src/qv_graph.tex
|
72
|
+
- lib/seqtrimnext_report/latex_src/ref_seqs.png
|
73
|
+
homepage: http://www.scbi.uma.es/downloads
|
74
|
+
licenses: []
|
75
|
+
|
76
|
+
post_install_message: PostInstall.txt
|
77
|
+
rdoc_options:
|
78
|
+
- --main
|
79
|
+
- README.rdoc
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: "0"
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: "0"
|
94
|
+
requirements: []
|
95
|
+
|
96
|
+
rubyforge_project: seqtrimnext_report
|
97
|
+
rubygems_version: 1.7.2
|
98
|
+
signing_key:
|
99
|
+
specification_version: 3
|
100
|
+
summary: A PDF report generator for SeqtrimNEXT preprocessing software from SCBI.
|
101
|
+
test_files:
|
102
|
+
- test/test_helper.rb
|
103
|
+
- test/test_seqtrimnext_report.rb
|