bio-ngs 0.3.2.alpha.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +39 -0
- data/Gemfile.lock +81 -0
- data/LICENSE.txt +28 -0
- data/README.rdoc +240 -0
- data/Rakefile +60 -0
- data/VERSION +1 -0
- data/bin/biongs +35 -0
- data/bio-ngs.gemspec +215 -0
- data/ext/mkrf_conf.rb +87 -0
- data/lib/bio-ngs.rb +54 -0
- data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
- data/lib/bio/appl/ngs/blast.rb +36 -0
- data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
- data/lib/bio/appl/ngs/cufflinks.rb +489 -0
- data/lib/bio/appl/ngs/fastx.rb +170 -0
- data/lib/bio/appl/ngs/samtools.rb +118 -0
- data/lib/bio/appl/ngs/sff_extract.rb +23 -0
- data/lib/bio/appl/ngs/tophat.rb +158 -0
- data/lib/bio/ngs/converter.rb +100 -0
- data/lib/bio/ngs/core_ext.rb +12 -0
- data/lib/bio/ngs/db.rb +66 -0
- data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
- data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
- data/lib/bio/ngs/db/models.rb +1 -0
- data/lib/bio/ngs/db/models/homology.rb +8 -0
- data/lib/bio/ngs/db/models/ontology.rb +16 -0
- data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
- data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
- data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
- data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
- data/lib/bio/ngs/ext/versions.yaml +73 -0
- data/lib/bio/ngs/graphics.rb +189 -0
- data/lib/bio/ngs/homology.rb +102 -0
- data/lib/bio/ngs/ontology.rb +103 -0
- data/lib/bio/ngs/quality.rb +64 -0
- data/lib/bio/ngs/record.rb +50 -0
- data/lib/bio/ngs/task.rb +46 -0
- data/lib/bio/ngs/utils.rb +176 -0
- data/lib/development_tasks.rb +34 -0
- data/lib/enumerable.rb +37 -0
- data/lib/tasks/bwa.thor +126 -0
- data/lib/tasks/convert.thor +454 -0
- data/lib/tasks/history.thor +51 -0
- data/lib/tasks/homology.thor +121 -0
- data/lib/tasks/ontology.thor +93 -0
- data/lib/tasks/project.thor +51 -0
- data/lib/tasks/quality.thor +142 -0
- data/lib/tasks/rna.thor +126 -0
- data/lib/tasks/sff_extract.thor +9 -0
- data/lib/templates/README.tt +43 -0
- data/lib/templates/db.tt +6 -0
- data/lib/wrapper.rb +225 -0
- data/spec/converter_qseq_spec.rb +56 -0
- data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
- data/spec/quality_spec.rb +40 -0
- data/spec/sff_extract_spec.rb +98 -0
- data/spec/spec_helper.rb +55 -0
- data/spec/tophat_spec.rb +99 -0
- data/spec/utils_spec.rb +22 -0
- data/test/conf/test_db.yml +4 -0
- data/test/data/blastoutput.xml +69 -0
- data/test/data/gene-GO.json +1 -0
- data/test/data/goa_uniprot +27 -0
- data/test/data/goslim_goa.obo +1763 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-ngs.rb +17 -0
- data/test/test_db.rb +21 -0
- data/test/test_homology.rb +102 -0
- data/test/test_ngs.rb +21 -0
- data/test/test_ontology.rb +74 -0
- data/test/test_utils.rb +29 -0
- metadata +460 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#
|
|
2
|
+
# converter.rb - convert qseq format to fastq
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2011
|
|
5
|
+
# Raoul Bonnal <r@bioruby.org>,
|
|
6
|
+
# Ranzani Valeria <ranzani@ingm.it>
|
|
7
|
+
# License:: The Ruby License
|
|
8
|
+
#
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
module Bio
|
|
13
|
+
module Ngs
|
|
14
|
+
module Converter
|
|
15
|
+
class Qseq
|
|
16
|
+
|
|
17
|
+
# Source buffer:
|
|
18
|
+
# String with \n as line separator
|
|
19
|
+
# File (reading)
|
|
20
|
+
attr_accessor :buffer
|
|
21
|
+
attr_reader :type
|
|
22
|
+
attr_reader :stats #keep statistics about total reads, passed filter or not.
|
|
23
|
+
|
|
24
|
+
def initialize(default_type=nil)
|
|
25
|
+
@type=default_type if [:pe, :se].include?(default_type)
|
|
26
|
+
@stats = {}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def type=(data)
|
|
30
|
+
if [:pe, :se].include?(data)
|
|
31
|
+
@type = data
|
|
32
|
+
else
|
|
33
|
+
@type = nil
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Return each line converted in fastq, is a line is not valid
|
|
38
|
+
# because is not good enough that line will return a nil
|
|
39
|
+
# rember to remove the nil values if you are building an array
|
|
40
|
+
# TODO: benchmark the performances, I suspect this is not ooptimized
|
|
41
|
+
def to_fastq(stats=false)
|
|
42
|
+
if (type.nil?)
|
|
43
|
+
raise "Type of qseq not specifed."
|
|
44
|
+
else
|
|
45
|
+
total = 0
|
|
46
|
+
passed = 0
|
|
47
|
+
rejected = 0
|
|
48
|
+
bases_passed_b_quality = 0
|
|
49
|
+
bases_rejected_b_quality = 0
|
|
50
|
+
bases_passed_total = 0
|
|
51
|
+
bases_rejected_total = 0
|
|
52
|
+
bases_passed_N = 0
|
|
53
|
+
bases_rejected_N = 0
|
|
54
|
+
@buffer.lines do |line|
|
|
55
|
+
qseq_line_array = line.split
|
|
56
|
+
read = (send "qseq2fastq_#{type}", qseq_line_array)
|
|
57
|
+
total += 1
|
|
58
|
+
if read
|
|
59
|
+
passed+=1
|
|
60
|
+
bases_passed_b_quality += qseq_line_array[9].scan("B").size
|
|
61
|
+
bases_passed_N += qseq_line_array[9].scan("N").size
|
|
62
|
+
bases_passed_total += qseq_line_array[9].size
|
|
63
|
+
else
|
|
64
|
+
rejected+=1
|
|
65
|
+
bases_rejected_b_quality += qseq_line_array[9].scan("B").size
|
|
66
|
+
bases_rejected_N += qseq_line_array[9].scan("N").size
|
|
67
|
+
bases_rejected_total += qseq_line_array[9].size
|
|
68
|
+
end
|
|
69
|
+
yield read
|
|
70
|
+
end
|
|
71
|
+
@stats={:reads_total=>total,
|
|
72
|
+
:reads_passed=>passed,
|
|
73
|
+
:reads_rejected=>rejected,
|
|
74
|
+
:bases_passed_total => bases_passed_total,
|
|
75
|
+
:bases_rejected_total => bases_rejected_total,
|
|
76
|
+
:bases_passed_with_b_quality => bases_passed_b_quality,
|
|
77
|
+
:bases_rejected_with_b_quality => bases_rejected_b_quality,
|
|
78
|
+
:bases_passed_with_n => bases_passed_N,
|
|
79
|
+
:bases_rejected_with_n => bases_rejected_N}
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Return the reads in fastq from a paired-end read dataset
|
|
84
|
+
# qseq_line is an Array of strings generated from raw line of qseq file.
|
|
85
|
+
def qseq2fastq_pe(qseq)
|
|
86
|
+
# qseq = qseq_line.split #logic here
|
|
87
|
+
"@#{qseq[0]}:#{qseq[2]}:#{qseq[3]}:#{qseq[4]}:#{qseq[5]}#0/#{qseq[7]}\n#{qseq[8].gsub(/\./,'N')}\n+\n#{qseq[9]}" if qseq[10]=="1"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Return the reads in fastq from a single read dataset
|
|
91
|
+
# qseq_line is an Array of strings generated from raw line of qseq file.
|
|
92
|
+
def qseq2fastq_se(qseq)
|
|
93
|
+
# qseq = qseq_line.split #logic here
|
|
94
|
+
"@#{qseq[0]}:#{qseq[2]}:#{qseq[3]}:#{qseq[4]}:#{qseq[5]}#0/\n#{qseq[8].gsub(/\./,'N')}\n+\n#{qseq[9]}" if qseq[10]=="1"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
end #Qseq
|
|
98
|
+
end #Converter
|
|
99
|
+
end #Ngs
|
|
100
|
+
end #Bio
|
data/lib/bio/ngs/db.rb
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#
|
|
2
|
+
#
|
|
3
|
+
# Copyright:: Copyright (C) 2011
|
|
4
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
module Bio
|
|
10
|
+
module Ngs
|
|
11
|
+
class Db
|
|
12
|
+
|
|
13
|
+
require 'active_support/inflector'
|
|
14
|
+
DB_TYPES = [:ontology, :homology]
|
|
15
|
+
|
|
16
|
+
# Open a connection to a database using ActiveRecord
|
|
17
|
+
def initialize(*args)
|
|
18
|
+
@db_type = args[0]
|
|
19
|
+
if DB_TYPES.include? @db_type
|
|
20
|
+
yaml_file=(args[1]) ? args[1] : Dir.pwd+"/conf/#{@db_type}_db.yml"
|
|
21
|
+
@db = ActiveRecord::Base
|
|
22
|
+
@db.establish_connection YAML.load_file(yaml_file)
|
|
23
|
+
# ONLY FOR DEBUG
|
|
24
|
+
#require 'logger'
|
|
25
|
+
#ActiveRecord::Base.logger = Logger.new 'log/db.log'
|
|
26
|
+
require File.expand_path(File.dirname(__FILE__)+"/db/models/#{@db_type}.rb")
|
|
27
|
+
else
|
|
28
|
+
raise ArgumentError, "Invalid database type: #{@db_type}"
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Runs AR migrations and create database tables
|
|
33
|
+
def create_tables(verbose=false)
|
|
34
|
+
ActiveRecord::Migration.verbose = verbose
|
|
35
|
+
ActiveRecord::Migrator.migrate(File.expand_path(File.dirname(__FILE__)+"/db/migrate/#{@db_type}"),nil)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Export a database table into a tab-separated file
|
|
39
|
+
def export(table,fileout)
|
|
40
|
+
klass = @db.const_get(table.singularize.camelize)
|
|
41
|
+
columns = klass.column_names
|
|
42
|
+
out = File.open(fileout,"w")
|
|
43
|
+
out.write columns.join("\t")+"\n"
|
|
44
|
+
klass.find(:all).each do |output|
|
|
45
|
+
records = output.attributes
|
|
46
|
+
values = []
|
|
47
|
+
columns.each {|c| values << records[c]}
|
|
48
|
+
out.write values.join("\t")+"\n"
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Wrapper for DB transaction to execute many INSERT queries into a single transaction
|
|
53
|
+
# This can speed up things particularly for SQLite databases.
|
|
54
|
+
def insert_many(table,query,values=[])
|
|
55
|
+
klass = @db.const_get(table.to_s.singularize.camelize)
|
|
56
|
+
klass.transaction do
|
|
57
|
+
values.each do |v|
|
|
58
|
+
sql = @db.send(:sanitize_sql_array,[query]+v)
|
|
59
|
+
@db.connection.execute(sql)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
class CreateBlastout < ActiveRecord::Migration
|
|
2
|
+
|
|
3
|
+
def self.up
|
|
4
|
+
create_table :blast_outputs do |t|
|
|
5
|
+
t.string :query_id
|
|
6
|
+
t.string :target_id
|
|
7
|
+
t.string :target_description
|
|
8
|
+
t.float :evalue, :precision => :double
|
|
9
|
+
t.float :identity
|
|
10
|
+
t.float :positive
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
add_index :blast_outputs, :query_id
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.down
|
|
18
|
+
drop_table :blast_outputs
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class CreateGoannotation < ActiveRecord::Migration
|
|
2
|
+
|
|
3
|
+
def self.up
|
|
4
|
+
create_table :go_annotations do |t|
|
|
5
|
+
t.string :db
|
|
6
|
+
t.string :entry_id
|
|
7
|
+
t.string :symbol
|
|
8
|
+
t.string :qualifier
|
|
9
|
+
t.string :go_id
|
|
10
|
+
t.string :db_ref
|
|
11
|
+
t.string :evidence
|
|
12
|
+
t.string :additional_identifier
|
|
13
|
+
t.string :aspect
|
|
14
|
+
t.string :name
|
|
15
|
+
t.string :synonym
|
|
16
|
+
t.string :molecule_type
|
|
17
|
+
t.string :taxon_id
|
|
18
|
+
t.string :date
|
|
19
|
+
t.string :assigned_by
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
add_index :go_annotations, :entry_id
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.down
|
|
26
|
+
drop_table :go_annotations
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class CreateGo < ActiveRecord::Migration
|
|
2
|
+
|
|
3
|
+
def self.up
|
|
4
|
+
create_table :go do |t|
|
|
5
|
+
t.string :go_id
|
|
6
|
+
t.string :name
|
|
7
|
+
t.string :namespace
|
|
8
|
+
t.string :is_a
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_index :go, :go_id
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.down
|
|
15
|
+
drop_table :go
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
class CreateGeneGo < ActiveRecord::Migration
|
|
2
|
+
|
|
3
|
+
def self.up
|
|
4
|
+
create_table :gene_gos do |t|
|
|
5
|
+
t.integer :gene_id
|
|
6
|
+
t.integer :go_id
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
add_index :gene_gos, :gene_id
|
|
10
|
+
add_index :gene_gos, :go_id
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.down
|
|
14
|
+
drop_table :gene_gos
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Go < ActiveRecord::Base
|
|
2
|
+
set_table_name "go"
|
|
3
|
+
has_many :genes, :through => :gene_gos
|
|
4
|
+
has_many :gene_gos
|
|
5
|
+
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
class Gene < ActiveRecord::Base
|
|
9
|
+
has_many :go, :through => :gene_gos
|
|
10
|
+
has_many :gene_gos
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
class GeneGo < ActiveRecord::Base
|
|
14
|
+
belongs_to :gene
|
|
15
|
+
belongs_to :go
|
|
16
|
+
end
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
# FASTX-toolkit - FASTA/FASTQ preprocessing tools.
|
|
4
|
+
# Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
|
|
5
|
+
#
|
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU Affero General Public License as
|
|
8
|
+
# published by the Free Software Foundation, either version 3 of the
|
|
9
|
+
# License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU Affero General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
|
|
19
|
+
function usage()
|
|
20
|
+
{
|
|
21
|
+
echo "Solexa-Quality BoxPlot plotter"
|
|
22
|
+
echo "Generates a solexa quality score box-plot graph "
|
|
23
|
+
echo
|
|
24
|
+
echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
|
|
25
|
+
echo
|
|
26
|
+
echo " [-p] - Generate PostScript (.PS) file. Default is PNG image."
|
|
27
|
+
echo " [-i INPUT.TXT] - Input file. Should be the output of \"solexa_quality_statistics\" program."
|
|
28
|
+
echo " [-o OUTPUT] - Output file name. default is STDOUT."
|
|
29
|
+
echo " [-t TITLE] - Title (usually the solexa file name) - will be plotted on the graph."
|
|
30
|
+
echo
|
|
31
|
+
exit
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
#
|
|
35
|
+
# Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
|
|
36
|
+
# As produced by "solexa_quality_statistics" program
|
|
37
|
+
|
|
38
|
+
TITLE="" # default title is empty
|
|
39
|
+
FILENAME=""
|
|
40
|
+
OUTPUTTERM="set term png size 2048,768" # default output terminal is "PNG"
|
|
41
|
+
OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout"
|
|
42
|
+
while getopts ":t:i:o:ph" Option
|
|
43
|
+
do
|
|
44
|
+
case $Option in
|
|
45
|
+
# w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;;
|
|
46
|
+
t ) TITLE="for $OPTARG" ;;
|
|
47
|
+
i ) FILENAME=$OPTARG ;;
|
|
48
|
+
o ) OUTPUTFILE="$OPTARG" ;;
|
|
49
|
+
p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 8" ;;
|
|
50
|
+
h ) usage ;;
|
|
51
|
+
* ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
|
|
52
|
+
esac
|
|
53
|
+
done
|
|
54
|
+
shift $(($OPTIND - 1))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
if [ "$FILENAME" == "" ]; then
|
|
58
|
+
usage
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
if [ ! -r "$FILENAME" ]; then
|
|
62
|
+
echo "Error: can't open input file ($1)." >&2
|
|
63
|
+
exit 1
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
##
|
|
67
|
+
## Input validation
|
|
68
|
+
## Too many users (in galaxy) try to plot a FASTQ file
|
|
69
|
+
## (without using the 'fastq statistics' tool first).
|
|
70
|
+
##
|
|
71
|
+
## gnuplot's error in that case is crypt, and support emails are annoying.
|
|
72
|
+
##
|
|
73
|
+
## try to detect FASTA/FASTQ input, and give a detailed, easy-to-understand warning.
|
|
74
|
+
##
|
|
75
|
+
##
|
|
76
|
+
AWK_FASTX_DETECTION='
|
|
77
|
+
NR==1 && $0 ~ /^>/ { fasta_id = 1 }
|
|
78
|
+
NR==1 && $0 ~ /^@/ { fastq_id = 1 }
|
|
79
|
+
NR==2 && $0 ~ /^[ACGT][ACGT]*$/ { nucleotides = 1 }
|
|
80
|
+
NR>3 { exit }
|
|
81
|
+
END { if ( fasta_id && nucleotides ) { print "FASTA" }
|
|
82
|
+
if ( fastq_id && nucleotides ) { print "FASTQ" }
|
|
83
|
+
}'
|
|
84
|
+
|
|
85
|
+
INPUT_TYPE=$(awk "$AWK_FASTX_DETECTION" "$FILENAME")
|
|
86
|
+
|
|
87
|
+
if [ "x$INPUT_TYPE" = "xFASTA" ] ; then
|
|
88
|
+
#this doesn't even make sense: FASTA files don't contain any quality scores
|
|
89
|
+
cat>&2<<EOF
|
|
90
|
+
Error: It looks like your input file is a FASTA file.
|
|
91
|
+
|
|
92
|
+
FASTA files do not contain quality scores, and can not be used with this tool.
|
|
93
|
+
EOF
|
|
94
|
+
exit 1
|
|
95
|
+
fi
|
|
96
|
+
if [ "x$INPUT_TYPE" = "xFASTQ" ] ; then
|
|
97
|
+
cat>&2<<EOF
|
|
98
|
+
Error: It looks like your input file is a FASTQ file.
|
|
99
|
+
|
|
100
|
+
This tool (fastq-quality-plot) can't use FASTQ files directly - it requires a tabular text file conaining summary statistic about your FASTQ file.
|
|
101
|
+
|
|
102
|
+
In Galaxy,
|
|
103
|
+
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.
|
|
104
|
+
|
|
105
|
+
On the command line,
|
|
106
|
+
Please use the "fastx_quality_stats" program to create the statistics report.
|
|
107
|
+
EOF
|
|
108
|
+
exit 1
|
|
109
|
+
fi
|
|
110
|
+
|
|
111
|
+
##
|
|
112
|
+
## Even if this is not a FASTA/FASTQ file,
|
|
113
|
+
## users can still use incompatible input files.
|
|
114
|
+
## Try to detect it and abort with a warning.
|
|
115
|
+
AWK_VALID_STAT='NR==1 && $1=="column" && $2=="count" && $3=="min" { exit 2 } NR>1 { exit }'
|
|
116
|
+
|
|
117
|
+
awk "$AWK_VALID_STAT" "$FILENAME"
|
|
118
|
+
if [ $? -ne 2 ] ; then
|
|
119
|
+
cat>&2<<EOF
|
|
120
|
+
Error: Input file is not a valid statistics report.
|
|
121
|
+
|
|
122
|
+
This tool (fastq-quality-plot) requires a tabular text file conaining summary statistic about your FASTQ file.
|
|
123
|
+
|
|
124
|
+
In Galaxy,
|
|
125
|
+
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.
|
|
126
|
+
|
|
127
|
+
On the command line,
|
|
128
|
+
Please use the "fastx_quality_stats" program to create the statistics report.
|
|
129
|
+
EOF
|
|
130
|
+
exit 1
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
#Read number of cycles from the stats file (each line is a cycle, minus the header line)
|
|
135
|
+
#But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now.
|
|
136
|
+
NUM_CYCLES=$(cat "$FILENAME" | wc -l)
|
|
137
|
+
|
|
138
|
+
GNUPLOTCMD="
|
|
139
|
+
$OUTPUTTERM
|
|
140
|
+
set boxwidth 0.8
|
|
141
|
+
set size 1,1
|
|
142
|
+
set key Left inside
|
|
143
|
+
set xlabel \"read position\"
|
|
144
|
+
set ylabel \"Coverage\"
|
|
145
|
+
set title \"Coverage $TITLE\"
|
|
146
|
+
#set auto y
|
|
147
|
+
#set bars 4.0
|
|
148
|
+
set xrange [ 0: $NUM_CYCLES ]
|
|
149
|
+
#set yrange [-15:45]
|
|
150
|
+
#set y2range [-15:45]
|
|
151
|
+
set xtics 1
|
|
152
|
+
set x2tics 1
|
|
153
|
+
#set ytics 2
|
|
154
|
+
#set y2tics 2
|
|
155
|
+
set tics out
|
|
156
|
+
set grid ytics
|
|
157
|
+
set style fill empty
|
|
158
|
+
plot '$FILENAME' using 1:2 with lines lt 1 lw 1 title 'Coverage'
|
|
159
|
+
"
|
|
160
|
+
|
|
161
|
+
echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"
|