bio-ngs 0.4.2.alpha.01 → 0.4.4.alpha.01
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +5 -4
- data/Gemfile.lock +34 -32
- data/README.rdoc +3 -0
- data/VERSION +1 -1
- data/bin/biongs +0 -5
- data/bio-ngs.gemspec +17 -28
- data/ext/mkrf_conf.rb +5 -1
- data/lib/bio-ngs.rb +9 -2
- data/lib/bio/appl/ngs/bcftools.rb +100 -0
- data/lib/bio/appl/ngs/bwa.rb +210 -0
- data/lib/bio/appl/ngs/fastx.rb +22 -6
- data/lib/bio/appl/ngs/samtools.rb +85 -0
- data/lib/bio/appl/ngs/tophat.rb +1 -1
- data/lib/bio/ngs/ext/versions.yaml +16 -0
- data/lib/bio/ngs/record.rb +2 -2
- data/lib/bio/ngs/utils.rb +11 -8
- data/lib/enumerable.rb +13 -1
- data/lib/tasks/bwa.thor +32 -106
- data/lib/tasks/filter.thor +75 -0
- data/lib/tasks/pre.thor +17 -37
- data/lib/tasks/project.thor +2 -1
- data/lib/tasks/quality.thor +27 -5
- data/lib/wrapper.rb +32 -5
- metadata +100 -99
@@ -0,0 +1,210 @@
|
|
1
|
+
#
|
2
|
+
# bwa.rb - description
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2011
|
5
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
#
|
9
|
+
#
|
10
|
+
# Program: bwa (alignment via Burrows-Wheeler transformation)
|
11
|
+
# Version: 0.6.0-r85
|
12
|
+
#
|
13
|
+
|
14
|
+
#Usage: bwa <command> [options]
|
15
|
+
#
|
16
|
+
#Command: index index sequences in the FASTA format
|
17
|
+
# aln gapped/ungapped alignment
|
18
|
+
# samse generate alignment (single ended)
|
19
|
+
# sampe generate alignment (paired ended)
|
20
|
+
# bwasw BWA-SW for long queries
|
21
|
+
# fastmap identify super-maximal exact matches
|
22
|
+
#
|
23
|
+
# fa2pac convert FASTA to PAC format
|
24
|
+
# pac2bwt generate BWT from PAC
|
25
|
+
# pac2bwtgen alternative algorithm for generating BWT
|
26
|
+
# bwtupdate update .bwt to the new format
|
27
|
+
# bwt2sa generate SA from BWT and Occ
|
28
|
+
# pac2cspac convert PAC to color-space PAC
|
29
|
+
# stdsw standard SW/NW alignment
|
30
|
+
|
31
|
+
|
32
|
+
module Bio
|
33
|
+
module Ngs
|
34
|
+
module Bwa
|
35
|
+
|
36
|
+
#Usage: bwa index [-a bwtsw|div|is] [-c] <in.fasta>
|
37
|
+
#
|
38
|
+
#Options: -a STR BWT construction algorithm: bwtsw or is [is]
|
39
|
+
# -p STR prefix of the index [same as fasta name]
|
40
|
+
# -c build color-space index
|
41
|
+
|
42
|
+
class Index
|
43
|
+
include Bio::Command::Wrapper
|
44
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
45
|
+
set_sub_program "index"
|
46
|
+
use_aliases
|
47
|
+
add_option :algorithm, :type => :string, :aliases => "-a", :desc => "BWT construction algorithm: bwtsw or is [is]"
|
48
|
+
add_option :prefix, :type => :string, :aliases => "-p", :desc => "prefix of the index [same as fasta name]"
|
49
|
+
end #Index
|
50
|
+
|
51
|
+
|
52
|
+
#Usage: bwa aln [options] <prefix> <in.fq>
|
53
|
+
#
|
54
|
+
#Options: -n NUM max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
|
55
|
+
# -o INT maximum number or fraction of gap opens [1]
|
56
|
+
# -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]
|
57
|
+
# -i INT do not put an indel within INT bp towards the ends [5]
|
58
|
+
# -d INT maximum occurrences for extending a long deletion [10]
|
59
|
+
# -l INT seed length [32]
|
60
|
+
# -k INT maximum differences in the seed [2]
|
61
|
+
# -m INT maximum entries in the queue [2000000]
|
62
|
+
# -t INT number of threads [1]
|
63
|
+
# -M INT mismatch penalty [3]
|
64
|
+
# -O INT gap open penalty [11]
|
65
|
+
# -E INT gap extension penalty [4]
|
66
|
+
# -R INT stop searching when there are >INT equally best hits [30]
|
67
|
+
# -q INT quality threshold for read trimming down to 35bp [0]
|
68
|
+
# -f FILE file to write output to instead of stdout
|
69
|
+
# -B INT length of barcode
|
70
|
+
# -L log-scaled gap penalty for long deletions
|
71
|
+
# -N non-iterative mode: search for all n-difference hits (slooow)
|
72
|
+
# -I the input is in the Illumina 1.3+ FASTQ-like format
|
73
|
+
# -b the input read file is in the BAM format
|
74
|
+
# -0 use single-end reads only (effective with -b)
|
75
|
+
# -1 use the 1st read in a pair (effective with -b)
|
76
|
+
# -2 use the 2nd read in a pair (effective with -b)
|
77
|
+
# -Y filter Casava-filtered sequences
|
78
|
+
|
79
|
+
class Aln
|
80
|
+
include Bio::Command::Wrapper
|
81
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
82
|
+
set_sub_program "aln"
|
83
|
+
use_aliases
|
84
|
+
add_option :num, :type => :numeric, :aliases => "-n", :desc => "max #diff (int) or missing prob under 0.02 err rate (float) [0.04]"
|
85
|
+
add_option :gap_open, :type => :numeric, :aliases => "-o", :desc => "maximum number or fraction of gap opens [1]"
|
86
|
+
add_option :gap_ext, :type => :numeric, :aliases => "-e", :desc => "maximum number of gap extensions, -1 for disabling long gaps [-1]"
|
87
|
+
add_option :indel, :type => :numeric, :aliases => "-i", :desc => "do not put an indel within INT bp towards the ends [5]"
|
88
|
+
add_option :extending_deletion, :type => :numeric, :aliases => "-d", :desc => "maximum occurrences for extending a long deletion [10]"
|
89
|
+
add_option :seed_length, :type => :numeric, :aliases => "-l", :desc => "seed length [32]"
|
90
|
+
add_option :seed_diff, :type => :numeric, :aliases => "-k", :desc => "maximum differences in the seed [2]"
|
91
|
+
add_option :queue, :type => :numeric, :aliases => "-m", :desc => "maximum entries in the queue [2000000]"
|
92
|
+
add_option :threads, :type => :numeric, :aliases => "-t", :desc => "number of threads [1]"
|
93
|
+
add_option :mismatch_penalty, :type => :numeric, :aliases => "-M", :desc => "mismatch penalty [3]"
|
94
|
+
add_option :gap_open_penalty, :type => :numeric, :aliases => "-O", :desc => "gap open penalty [11]"
|
95
|
+
add_option :gap_extension_penalty, :type => :numeric, :aliases => "-E", :desc => "gap extension penalty [4]"
|
96
|
+
add_option :best_hit, :type => :numeric, :aliases => "-R", :desc => "stop searching when there are >INT equally best hits [30]"
|
97
|
+
add_option :quality_trimming, :type => :numeric, :aliases => "-q", :desc => "quality threshold for read trimming down to 35bp [0]"
|
98
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "file to write output to instead of stdout"
|
99
|
+
add_option :barcode_length, :type => :numeric, :aliases => "-B", :desc => "length of barcode"
|
100
|
+
add_option :log_scale_penalty, :type => :boolean, :aliases => "-L", :desc => "log-scaled gap penalty for long deletions"
|
101
|
+
add_option :non_iterative, :type => :boolean, :aliases => "-N", :desc => "non-iterative mode: search for all n-difference hits"
|
102
|
+
add_option :illumina_13, :type => :boolean, :aliases => "-I", :desc => "the input is in the Illumina 1.3+ FASTQ-like format"
|
103
|
+
add_option :bam, :type => :boolean, :aliases => "-b", :desc => "the input read file is in the BAM format"
|
104
|
+
add_option :single, :type => :boolean, :aliases => "-0", :desc => "use single-end reads only (effective with -b)"
|
105
|
+
add_option :first, :type => :boolean, :aliases => "-1", :desc => "use the 1st read in a pair (effective with -b)"
|
106
|
+
add_option :second, :type => :boolean, :aliases => "-2", :desc => "use the 2nd read in a pair (effective with -b)"
|
107
|
+
add_option :filter, :type => :boolean, :aliases => "-Y", :desc => "filter Casava-filtered sequences"
|
108
|
+
end # Aln
|
109
|
+
|
110
|
+
|
111
|
+
# Usage: bwa samse [-n max_occ] [-f out.sam] [-r RG_line] <prefix> <in.sai> <in.fq>
|
112
|
+
|
113
|
+
class Samse
|
114
|
+
include Bio::Command::Wrapper
|
115
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
116
|
+
set_sub_program "samse"
|
117
|
+
use_aliases
|
118
|
+
add_option :max_occ, :type => :numeric, :aliases => "-n", :desc => "max_occ"
|
119
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "file name to save data"
|
120
|
+
add_option :rg_line, :type => :string, :aliases => "-r", :desc => "RG line"
|
121
|
+
end #Samse
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
#Usage: bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>
|
126
|
+
#
|
127
|
+
#Options: -a INT maximum insert size [500]
|
128
|
+
# -o INT maximum occurrences for one end [100000]
|
129
|
+
# -n INT maximum hits to output for paired reads [3]
|
130
|
+
# -N INT maximum hits to output for discordant pairs [10]
|
131
|
+
# -c FLOAT prior of chimeric rate (lower bound) [1.0e-05]
|
132
|
+
# -f FILE sam file to output results to [stdout]
|
133
|
+
# -r STR read group header line such as `@RG\tID:foo\tSM:bar' [null]
|
134
|
+
# -P preload index into memory (for base-space reads only)
|
135
|
+
# -s disable Smith-Waterman for the unmapped mate
|
136
|
+
# -A disable insert size estimate (force -s)
|
137
|
+
|
138
|
+
class Sampe
|
139
|
+
include Bio::Command::Wrapper
|
140
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
141
|
+
set_sub_program "sampe"
|
142
|
+
use_aliases
|
143
|
+
add_option :max_insert, :type => :numeric, :aliases => "-a", :desc => "maximum insert size [500]"
|
144
|
+
add_option :max_occurrences, :type => :numeric, :aliases => "-o", :desc => "maximum occurrences for one end [100000]"
|
145
|
+
add_option :max_hits, :type => :numeric, :aliases => "-n", :desc => "maximum hits to output for paired reads [3]"
|
146
|
+
add_option :max_hits_discordant, :type => :numeric, :aliases => "-N", :desc => "maximum hits to output for discordant pairs [10]"
|
147
|
+
add_option :chimeric_rate, :type => :numeric, :aliases => "-c", :desc => "prior of chimeric rate (lower bound) [1.0e-05]"
|
148
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "sam file to output results to [stdout]"
|
149
|
+
add_option :read_group, :type => :string, :aliases => "-r", :desc => "read group header line such as `@RG\tID:foo\tSM:bar' [null]"
|
150
|
+
add_option :preload_index, :type => :boolean, :aliases => "-P", :desc => "preload index into memory (for base-space reads only)"
|
151
|
+
add_option :disable_sw, :type => :boolean, :aliases => "-s", :desc => "disable Smith-Waterman for the unmapped mate"
|
152
|
+
add_option :disable_insert_estimate, :type => :boolean, :aliases => "-A", :desc => "disable insert size estimate (force -s)"
|
153
|
+
end #Sampe
|
154
|
+
|
155
|
+
|
156
|
+
#Usage: bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]
|
157
|
+
#
|
158
|
+
#Options: -a INT score for a match [1]
|
159
|
+
# -b INT mismatch penalty [3]
|
160
|
+
# -q INT gap open penalty [5]
|
161
|
+
# -r INT gap extension penalty [2]
|
162
|
+
#
|
163
|
+
# -t INT number of threads [1]
|
164
|
+
#
|
165
|
+
# -w INT band width [50]
|
166
|
+
# -m FLOAT mask level [0.50]
|
167
|
+
#
|
168
|
+
# -T INT score threshold divided by a [30]
|
169
|
+
# -s INT maximum seeding interval size [3]
|
170
|
+
# -z INT Z-best [1]
|
171
|
+
# -N INT # seeds to trigger reverse alignment [5]
|
172
|
+
# -c FLOAT coefficient of length-threshold adjustment [5.5]
|
173
|
+
# -H in SAM output, use hard clipping rather than soft
|
174
|
+
# -f FILE file to output results to instead of stdout
|
175
|
+
|
176
|
+
class Bwasw
|
177
|
+
include Bio::Command::Wrapper
|
178
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
179
|
+
set_sub_program "bwasw"
|
180
|
+
use_aliases
|
181
|
+
add_option :paired, :type => :string, :desc => "paired reads"
|
182
|
+
add_option :match_score, :type => :numeric, :aliases => "-a", :desc => "score for a match [1]"
|
183
|
+
add_option :mismatch_penalty, :type => :numeric, :aliases => "-b", :desc => "mismatch penalty [3]"
|
184
|
+
add_option :gap_open_penalty, :type => :numeric, :aliases => "-q", :desc => "gap open penalty [5]"
|
185
|
+
add_option :gap_ext_penalty, :type => :numeric, :aliases => "-r", :desc => "gap extension penalty [2]"
|
186
|
+
add_option :threads, :type => :numeric, :aliases => "-t", :desc => "number of threads [1]"
|
187
|
+
add_option :band_width, :type => :numeric, :aliases => "-w", :desc => "band width [50]"
|
188
|
+
add_option :mask_level, :type => :numeric, :aliases => "-m", :desc => "mask level [0.50]"
|
189
|
+
add_option :score_threshold, :type => :numeric, :aliases => "-T", :desc => "score threshold divided by a [30]"
|
190
|
+
add_option :max_seeding, :type => :numeric, :aliases => "-s", :desc => "maximum seeding interval size [3]"
|
191
|
+
add_option :z_best, :type => :numeric, :aliases => "-z", :desc => "Z-best [1]"
|
192
|
+
add_option :seed_reverse, :type => :numeric, :aliases => "-N", :desc => "seeds to trigger reverse alignment [5]"
|
193
|
+
add_option :length_threshold, :type => :numeric, :aliases => "-c", :desc => "coefficient of length-threshold adjustment [5.5]"
|
194
|
+
add_option :hard_clip, :type => :boolean, :aliases => "-H", :desc => "in SAM output, use hard clipping rather than soft"
|
195
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "file to output results to instead of stdout"
|
196
|
+
end
|
197
|
+
|
198
|
+
# Usage: bwa fastmap [-l minLen=17] [-w maxSaSize=20] <idxbase> <in.fq>
|
199
|
+
class Fastmap
|
200
|
+
include Bio::Command::Wrapper
|
201
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
202
|
+
set_sub_program "fastmap"
|
203
|
+
use_aliases
|
204
|
+
add_option :min_length, :type => :numeric, :aliases => "-l", :desc => "minLen [17]"
|
205
|
+
add_option :max_sa_size, :type => :numeric, :aliases => "-w", :desc => "maxSaSize [20]"
|
206
|
+
end
|
207
|
+
|
208
|
+
end #Bwa
|
209
|
+
end #Ngs
|
210
|
+
end #Bio
|
data/lib/bio/appl/ngs/fastx.rb
CHANGED
@@ -107,10 +107,9 @@ module Bio
|
|
107
107
|
set_program Bio::Ngs::Utils.binary("fastq_quality_boxplot_graph.sh")
|
108
108
|
use_aliases
|
109
109
|
add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
|
110
|
-
add_option :output, :type=>:string, :aliases => "-o", :desc => "
|
111
|
-
add_option :input, :type=>:string, :aliases => "-i", :desc => "
|
110
|
+
add_option :output, :type=>:string, :aliases => "-o", :desc => "Output file name. default is STDOUT.", :collapse => true
|
111
|
+
add_option :input, :type=>:string, :aliases => "-i", :desc => "Input file. Should be the output of \"fastx_quality_statistics\" program.", :collapse => true
|
112
112
|
add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
113
|
-
add_option :quality_type, :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
|
114
113
|
end #ReadsBoxPlot
|
115
114
|
|
116
115
|
# Solexa-Reads coverage plotter
|
@@ -127,10 +126,9 @@ module Bio
|
|
127
126
|
set_program Bio::Ngs::Utils.binary("fastq_coverage_graph.sh")
|
128
127
|
use_aliases
|
129
128
|
add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
|
130
|
-
add_option :output, :type=>:string, :aliases => "-o", :desc => "
|
131
|
-
add_option :input, :type=>:string, :aliases => "-i", :desc => "
|
129
|
+
add_option :output, :type=>:string, :aliases => "-o", :desc => "Output file name. default is STDOUT.", :collapse => true
|
130
|
+
add_option :input, :type=>:string, :aliases => "-i", :desc => "Input file. Should be the output of \"fastx_quality_statistics\" program.", :collapse => true
|
132
131
|
add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
133
|
-
add_option :quality_type, :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
|
134
132
|
end #ReadsCoverage
|
135
133
|
|
136
134
|
|
@@ -188,6 +186,24 @@ module Bio
|
|
188
186
|
add_option :quality_type, :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
|
189
187
|
end #ReadsCoverage
|
190
188
|
|
189
|
+
# FASTA/Q Nucleotide Distribution Plotter
|
190
|
+
|
191
|
+
# Usage: /usr/local/bin/fastx_nucleotide_distribution_graph.sh [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]
|
192
|
+
|
193
|
+
# [-p] - Generate PostScript (.PS) file. Default is PNG image.
|
194
|
+
# [-i INPUT.TXT] - Input file. Should be the output of "fastx_quality_statistics" program.
|
195
|
+
# [-o OUTPUT] - Output file name. default is STDOUT.
|
196
|
+
# [-t TITLE] - Title - will be plotted on the graph.
|
197
|
+
class NucleotideDistribution
|
198
|
+
include Bio::Command::Wrapper
|
199
|
+
set_program Bio::Ngs::Utils.binary "fastx_nucleotide_distribution_graph.sh"
|
200
|
+
use_aliases
|
201
|
+
add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
|
202
|
+
add_option :output, :type=>:string, :aliases => "-o", :desc => "Output file name. default is STDOUT.", :collapse => true
|
203
|
+
add_option :input, :type=>:string, :aliases => "-i", :desc => "Input file. Should be the output of \"fastx_quality_statistics\" program.", :collapse => true
|
204
|
+
add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
205
|
+
end
|
206
|
+
|
191
207
|
end #Fastx
|
192
208
|
end #Ngs
|
193
209
|
end #Bio
|
@@ -113,6 +113,91 @@ module Bio
|
|
113
113
|
add_option :copy_header, :type => :string, :aliases => "-h", :desc => "copy the header in FILE to <out.bam> [in1.bam]"
|
114
114
|
end #Merge
|
115
115
|
|
116
|
+
#Usage: samtools mpileup [options] in1.bam [in2.bam [...]]
|
117
|
+
|
118
|
+
#Input options:
|
119
|
+
|
120
|
+
# -6 assume the quality is in the Illumina-1.3+ encoding
|
121
|
+
# -A count anomalous read pairs
|
122
|
+
# -B disable BAQ computation
|
123
|
+
# -b FILE list of input BAM files [null]
|
124
|
+
# -C INT parameter for adjusting mapQ; 0 to disable [0]
|
125
|
+
# -d INT max per-BAM depth to avoid excessive memory usage [250]
|
126
|
+
# -E extended BAQ for higher sensitivity but lower specificity
|
127
|
+
# -f FILE faidx indexed reference sequence file [null]
|
128
|
+
# -G FILE exclude read groups listed in FILE [null]
|
129
|
+
# -l FILE list of positions (chr pos) or regions (BED) [null]
|
130
|
+
# -M INT cap mapping quality at INT [60]
|
131
|
+
# -r STR region in which pileup is generated [null]
|
132
|
+
# -R ignore RG tags
|
133
|
+
# -q INT skip alignments with mapQ smaller than INT [0]
|
134
|
+
# -Q INT skip bases with baseQ/BAQ smaller than INT [13]
|
135
|
+
|
136
|
+
#Output options:
|
137
|
+
|
138
|
+
# -D output per-sample DP in BCF (require -g/-u)
|
139
|
+
# -g generate BCF output (genotype likelihoods)
|
140
|
+
# -O output base positions on reads (disabled by -g/-u)
|
141
|
+
# -s output mapping quality (disabled by -g/-u)
|
142
|
+
# -S output per-sample strand bias P-value in BCF (require -g/-u)
|
143
|
+
# -u generate uncompress BCF output
|
144
|
+
|
145
|
+
#SNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):
|
146
|
+
|
147
|
+
# -e INT Phred-scaled gap extension seq error probability [20]
|
148
|
+
# -F FLOAT minimum fraction of gapped reads for candidates [0.002]
|
149
|
+
# -h INT coefficient for homopolymer errors [100]
|
150
|
+
# -I do not perform indel calling
|
151
|
+
# -L INT max per-sample depth for INDEL calling [250]
|
152
|
+
# -m INT minimum gapped reads for indel candidates [1]
|
153
|
+
# -o INT Phred-scaled gap open sequencing error probability [40]
|
154
|
+
# -P STR comma separated list of platforms for indels [all]
|
155
|
+
|
156
|
+
#Notes: Assuming diploid individuals.
|
157
|
+
|
158
|
+
|
159
|
+
class Mpileup
|
160
|
+
include Bio::Command::Wrapper
|
161
|
+
set_program Bio::Ngs::Utils.binary("samtools")
|
162
|
+
set_sub_program "mpileup"
|
163
|
+
add_option :illumina13, :type => :boolean, :aliases => "-6", :desc => "assume the quality is in the Illumina-1.3+ encoding"
|
164
|
+
add_option :anomalous, :type => :boolean, :aliases => "-A", :desc => "count anomalous read pairs"
|
165
|
+
add_option :baq, :type => :boolean, :aliases => "-B", :desc => "disable BAQ computation"
|
166
|
+
add_option :bam, :type => :string, :aliases => "-b", :desc => "list of input BAM files [null]"
|
167
|
+
add_option :adjust, :type => :numeric, :aliases => "-C", :desc => "parameter for adjusting mapQ; 0 to disable [0]"
|
168
|
+
add_option :depth, :type => :numeric, :aliases => "-d", :desc => "max per-BAM depth to avoid excessive memory usage [250]"
|
169
|
+
add_option :extended, :type => :boolean, :aliases => "-E", :desc => "extended BAQ for higher sensitivity but lower specificity"
|
170
|
+
add_option :file_in, :type => :string, :aliases => "-f", :desc => "faidx indexed reference sequence file [null]"
|
171
|
+
add_option :readgroup, :type => :string, :aliases => "-G", :desc => "exclude read groups listed in FILE [null]"
|
172
|
+
add_option :positions, :type => :string, :aliases => "-l", :desc => "list of positions (chr pos) or regions (BED) in FILE [null]"
|
173
|
+
add_option :mapping_quality, :type => :numeric, :aliases => "-M", :desc => "cap mapping quality at INT [60]"
|
174
|
+
add_option :region, :type => :string, :aliases => "r", :desc => "region in which pileup is generated [null]"
|
175
|
+
add_option :ignoreRG, :type => :boolean, :aliases => "-R", :desc => "ignore RG tags"
|
176
|
+
add_option :align_qual, :type => :numeric, :aliases => "-q", :desc => "skip alignments with mapQ smaller than INT [0]"
|
177
|
+
add_option :base_qual, :type => :numeric, :aliases => "-Q", :desc => "skip bases with baseQ/BAQ smaller than INT [13]"
|
178
|
+
add_option :dp, :type => :boolean, :aliases => "-D", :desc => "output per-sample DP in BCF (require -g/-u)"
|
179
|
+
add_option :bcfout, :type => :boolean, :aliases => "-g", :desc => "generate BCF output (genotype likelihoods)"
|
180
|
+
add_option :basepositions, :type => :boolean, :aliases => "-O", :desc => "output base positions on reads (disabled by -g/-u)"
|
181
|
+
add_option :mapq_out, :type => :boolean, :aliases => "-s", :desc => "output mapping quality (disabled by -g/-u)"
|
182
|
+
add_option :strand_bias, :type => :boolean, :aliases => "-S", :desc => "output per-sample strand bias P-value in BCF (require -g/-u)"
|
183
|
+
add_option :uncompressed, :type => :boolean, :aliases => "-u", :desc => "generate uncompress BCF output"
|
184
|
+
add_option :gap_error, :type => :numeric, :aliases => "-e", :desc => "Phred-scaled gap extension seq error probability [20]"
|
185
|
+
add_option :reads_fraction, :type => :numeric, :aliases => "-F", :desc => "minimum fraction of gapped reads for candidates [0.002]"
|
186
|
+
add_option :homopolymer_errors, :type => :numeric, :aliases => "-h", :desc => "coefficient for homopolymer errors [100]"
|
187
|
+
add_option :noindel, :type => :boolean, :aliases => "-I", :desc => "do not perform indel calling"
|
188
|
+
add_option :sample_depth, :type => :numeric, :aliases => "-L", :desc => "max per-sample depth for INDEL calling [250]"
|
189
|
+
add_option :min_gap, :type => :numeric, :aliases => "-m", :desc => "minimum gapped reads for indel candidates [1]"
|
190
|
+
add_option :gap_open, :type => :numeric, :aliases => "-o", :desc => "Phred-scaled gap open sequencing error probability [40]"
|
191
|
+
add_option :indel_platforms, :type => :string, :aliases => "-P", :desc => "comma separated list of platforms for indels [all]"
|
192
|
+
end #mpileup
|
193
|
+
|
194
|
+
class Faidx
|
195
|
+
include Bio::Command::Wrapper
|
196
|
+
set_program Bio::Ngs::Utils.binary("samtools")
|
197
|
+
set_sub_program "faidx"
|
198
|
+
end #faidx
|
199
|
+
|
200
|
+
|
116
201
|
end #Samtools
|
117
202
|
end #Ngs
|
118
203
|
end #Bio
|
data/lib/bio/appl/ngs/tophat.rb
CHANGED
@@ -148,7 +148,7 @@ module Bio
|
|
148
148
|
add_option "tmp-dir", :type => :string
|
149
149
|
add_option "zpacker", :type => :string, :aliases => '-z'
|
150
150
|
add_option "unmapped-fifo", :type => :boolean, :aliases => '-X'
|
151
|
-
add_option "initial-read-mismatches", :type => :
|
151
|
+
add_option "initial-read-mismatches", :type => :numeric, :aliases => '-N'
|
152
152
|
add_option "segment-mismatches", :type => :numeric
|
153
153
|
add_option "segment-length", :type => :numeric
|
154
154
|
add_option "min-closure-exon", :type => :numeric
|
@@ -13,6 +13,22 @@ common:
|
|
13
13
|
suffix: tar.bz2
|
14
14
|
desc: "Fastx-toolkit version 0.0.13 requires libgtextutils-0.6 (available here for download). A recent g++ compiler (tested with GNU G++ 4.1.2 and later). The fasta_clipping_histogram tool requires two perl modules: PerlIO::gzip and GD::Graph::bars. The fastx_barcode_splitter tool requires GNU sed. The fastq_quality_boxplot tool requires gnuplot version 4.2 or newer."
|
15
15
|
type: source
|
16
|
+
bwa:
|
17
|
+
version: 0.6.1
|
18
|
+
url: http://sourceforge.net/projects/bio-bwa/files/bwa-0.6.1.tar.bz2/download
|
19
|
+
basename: bwa-0.6.1
|
20
|
+
suffix: tar.bz2
|
21
|
+
desc: "Burrows-Wheeler Aligner"
|
22
|
+
type: make
|
23
|
+
samtools:
|
24
|
+
version: 0.1.18
|
25
|
+
url: http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2/download
|
26
|
+
basename: samtools-0.1.18
|
27
|
+
suffix: tar.bz2
|
28
|
+
desc: "SAMtools"
|
29
|
+
type: make
|
30
|
+
|
31
|
+
|
16
32
|
linux:
|
17
33
|
cufflinks:
|
18
34
|
version: 1.3.0
|
data/lib/bio/ngs/record.rb
CHANGED
@@ -26,7 +26,7 @@ module Bio
|
|
26
26
|
|
27
27
|
def load
|
28
28
|
tasks = []
|
29
|
-
YAML.
|
29
|
+
YAML.load_stream(@file) do |ydoc|
|
30
30
|
ydoc[:args].flatten!
|
31
31
|
tasks << ydoc
|
32
32
|
end
|
@@ -41,7 +41,7 @@ module Bio
|
|
41
41
|
|
42
42
|
def is_saved?(params)
|
43
43
|
tasks = []
|
44
|
-
YAML.
|
44
|
+
YAML.load_stream(@file) {|ydoc| tasks << ydoc}
|
45
45
|
return tasks.include?(params)
|
46
46
|
end
|
47
47
|
|
data/lib/bio/ngs/utils.rb
CHANGED
@@ -23,13 +23,6 @@ module Bio
|
|
23
23
|
end
|
24
24
|
class << self
|
25
25
|
|
26
|
-
def parallel_exec(command_blocks)
|
27
|
-
command_blocks.each do |block|
|
28
|
-
fork(&block)
|
29
|
-
end
|
30
|
-
Process.waitall
|
31
|
-
end
|
32
|
-
|
33
26
|
def binary(name)
|
34
27
|
begin
|
35
28
|
if !(plugin_binaries_found = find_binary_files(name)).empty?
|
@@ -47,7 +40,7 @@ module Bio
|
|
47
40
|
|
48
41
|
def os_type
|
49
42
|
require 'rbconfig'
|
50
|
-
case
|
43
|
+
case RbConfig::CONFIG['host_os']
|
51
44
|
when /darwin/ then return "osx"
|
52
45
|
when /linux/ then return "linux"
|
53
46
|
when /mswin|mingw/ then raise NotImplementedError, "This plugin does not run on Windows"
|
@@ -154,6 +147,16 @@ module Bio
|
|
154
147
|
end #cd
|
155
148
|
end #uncompress_compile
|
156
149
|
|
150
|
+
def just_make(tool_name, tool_record, path_external, path_binary)
|
151
|
+
puts "Uncompressing #{tool_name}..."
|
152
|
+
tool_dir_name = uncompress_any(tool_name, tool_record)
|
153
|
+
puts "Compiling #{tool_name}..."
|
154
|
+
cd(tool_dir_name) do
|
155
|
+
system "make"
|
156
|
+
FileUtils.cp tool_name,path_binary
|
157
|
+
end #cd
|
158
|
+
end
|
159
|
+
|
157
160
|
def install_binary(tool_name, tool_record, path_external, path_binary)
|
158
161
|
require 'fileutils'
|
159
162
|
include FileUtils::Verbose
|
data/lib/enumerable.rb
CHANGED
@@ -34,4 +34,16 @@ module Enumerable
|
|
34
34
|
return Math.sqrt(self.sample_variance)
|
35
35
|
end
|
36
36
|
|
37
|
-
end # module Enumerable
|
37
|
+
end # module Enumerable
|
38
|
+
|
39
|
+
class Array
|
40
|
+
def / len
|
41
|
+
a = []
|
42
|
+
each_with_index do |x,i|
|
43
|
+
a << [] if i % len == 0
|
44
|
+
a.last << x
|
45
|
+
end
|
46
|
+
a
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|