bio-ngs 0.4.2.alpha.01 → 0.4.4.alpha.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +5 -4
- data/Gemfile.lock +34 -32
- data/README.rdoc +3 -0
- data/VERSION +1 -1
- data/bin/biongs +0 -5
- data/bio-ngs.gemspec +17 -28
- data/ext/mkrf_conf.rb +5 -1
- data/lib/bio-ngs.rb +9 -2
- data/lib/bio/appl/ngs/bcftools.rb +100 -0
- data/lib/bio/appl/ngs/bwa.rb +210 -0
- data/lib/bio/appl/ngs/fastx.rb +22 -6
- data/lib/bio/appl/ngs/samtools.rb +85 -0
- data/lib/bio/appl/ngs/tophat.rb +1 -1
- data/lib/bio/ngs/ext/versions.yaml +16 -0
- data/lib/bio/ngs/record.rb +2 -2
- data/lib/bio/ngs/utils.rb +11 -8
- data/lib/enumerable.rb +13 -1
- data/lib/tasks/bwa.thor +32 -106
- data/lib/tasks/filter.thor +75 -0
- data/lib/tasks/pre.thor +17 -37
- data/lib/tasks/project.thor +2 -1
- data/lib/tasks/quality.thor +27 -5
- data/lib/wrapper.rb +32 -5
- metadata +100 -99
@@ -0,0 +1,210 @@
|
|
1
|
+
#
|
2
|
+
# bwa.rb - description
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2011
|
5
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
#
|
9
|
+
#
|
10
|
+
# Program: bwa (alignment via Burrows-Wheeler transformation)
|
11
|
+
# Version: 0.6.0-r85
|
12
|
+
#
|
13
|
+
|
14
|
+
#Usage: bwa <command> [options]
|
15
|
+
#
|
16
|
+
#Command: index index sequences in the FASTA format
|
17
|
+
# aln gapped/ungapped alignment
|
18
|
+
# samse generate alignment (single ended)
|
19
|
+
# sampe generate alignment (paired ended)
|
20
|
+
# bwasw BWA-SW for long queries
|
21
|
+
# fastmap identify super-maximal exact matches
|
22
|
+
#
|
23
|
+
# fa2pac convert FASTA to PAC format
|
24
|
+
# pac2bwt generate BWT from PAC
|
25
|
+
# pac2bwtgen alternative algorithm for generating BWT
|
26
|
+
# bwtupdate update .bwt to the new format
|
27
|
+
# bwt2sa generate SA from BWT and Occ
|
28
|
+
# pac2cspac convert PAC to color-space PAC
|
29
|
+
# stdsw standard SW/NW alignment
|
30
|
+
|
31
|
+
|
32
|
+
module Bio
|
33
|
+
module Ngs
|
34
|
+
module Bwa
|
35
|
+
|
36
|
+
#Usage: bwa index [-a bwtsw|div|is] [-c] <in.fasta>
|
37
|
+
#
|
38
|
+
#Options: -a STR BWT construction algorithm: bwtsw or is [is]
|
39
|
+
# -p STR prefix of the index [same as fasta name]
|
40
|
+
# -c build color-space index
|
41
|
+
|
42
|
+
class Index
|
43
|
+
include Bio::Command::Wrapper
|
44
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
45
|
+
set_sub_program "index"
|
46
|
+
use_aliases
|
47
|
+
add_option :algorithm, :type => :string, :aliases => "-a", :desc => "BWT construction algorithm: bwtsw or is [is]"
|
48
|
+
add_option :prefix, :type => :string, :aliases => "-p", :desc => "prefix of the index [same as fasta name]"
|
49
|
+
end #Index
|
50
|
+
|
51
|
+
|
52
|
+
#Usage: bwa aln [options] <prefix> <in.fq>
|
53
|
+
#
|
54
|
+
#Options: -n NUM max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
|
55
|
+
# -o INT maximum number or fraction of gap opens [1]
|
56
|
+
# -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]
|
57
|
+
# -i INT do not put an indel within INT bp towards the ends [5]
|
58
|
+
# -d INT maximum occurrences for extending a long deletion [10]
|
59
|
+
# -l INT seed length [32]
|
60
|
+
# -k INT maximum differences in the seed [2]
|
61
|
+
# -m INT maximum entries in the queue [2000000]
|
62
|
+
# -t INT number of threads [1]
|
63
|
+
# -M INT mismatch penalty [3]
|
64
|
+
# -O INT gap open penalty [11]
|
65
|
+
# -E INT gap extension penalty [4]
|
66
|
+
# -R INT stop searching when there are >INT equally best hits [30]
|
67
|
+
# -q INT quality threshold for read trimming down to 35bp [0]
|
68
|
+
# -f FILE file to write output to instead of stdout
|
69
|
+
# -B INT length of barcode
|
70
|
+
# -L log-scaled gap penalty for long deletions
|
71
|
+
# -N non-iterative mode: search for all n-difference hits (slooow)
|
72
|
+
# -I the input is in the Illumina 1.3+ FASTQ-like format
|
73
|
+
# -b the input read file is in the BAM format
|
74
|
+
# -0 use single-end reads only (effective with -b)
|
75
|
+
# -1 use the 1st read in a pair (effective with -b)
|
76
|
+
# -2 use the 2nd read in a pair (effective with -b)
|
77
|
+
# -Y filter Casava-filtered sequences
|
78
|
+
|
79
|
+
class Aln
|
80
|
+
include Bio::Command::Wrapper
|
81
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
82
|
+
set_sub_program "aln"
|
83
|
+
use_aliases
|
84
|
+
add_option :num, :type => :numeric, :aliases => "-n", :desc => "max #diff (int) or missing prob under 0.02 err rate (float) [0.04]"
|
85
|
+
add_option :gap_open, :type => :numeric, :aliases => "-o", :desc => "maximum number or fraction of gap opens [1]"
|
86
|
+
add_option :gap_ext, :type => :numeric, :aliases => "-e", :desc => "maximum number of gap extensions, -1 for disabling long gaps [-1]"
|
87
|
+
add_option :indel, :type => :numeric, :aliases => "-i", :desc => "do not put an indel within INT bp towards the ends [5]"
|
88
|
+
add_option :extending_deletion, :type => :numeric, :aliases => "-d", :desc => "maximum occurrences for extending a long deletion [10]"
|
89
|
+
add_option :seed_length, :type => :numeric, :aliases => "-l", :desc => "seed length [32]"
|
90
|
+
add_option :seed_diff, :type => :numeric, :aliases => "-k", :desc => "maximum differences in the seed [2]"
|
91
|
+
add_option :queue, :type => :numeric, :aliases => "-m", :desc => "maximum entries in the queue [2000000]"
|
92
|
+
add_option :threads, :type => :numeric, :aliases => "-t", :desc => "number of threads [1]"
|
93
|
+
add_option :mismatch_penalty, :type => :numeric, :aliases => "-M", :desc => "mismatch penalty [3]"
|
94
|
+
add_option :gap_open_penalty, :type => :numeric, :aliases => "-O", :desc => "gap open penalty [11]"
|
95
|
+
add_option :gap_extension_penalty, :type => :numeric, :aliases => "-E", :desc => "gap extension penalty [4]"
|
96
|
+
add_option :best_hit, :type => :numeric, :aliases => "-R", :desc => "stop searching when there are >INT equally best hits [30]"
|
97
|
+
add_option :quality_trimming, :type => :numeric, :aliases => "-q", :desc => "quality threshold for read trimming down to 35bp [0]"
|
98
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "file to write output to instead of stdout"
|
99
|
+
add_option :barcode_length, :type => :numeric, :aliases => "-B", :desc => "length of barcode"
|
100
|
+
add_option :log_scale_penalty, :type => :boolean, :aliases => "-L", :desc => "log-scaled gap penalty for long deletions"
|
101
|
+
add_option :non_iterative, :type => :boolean, :aliases => "-N", :desc => "non-iterative mode: search for all n-difference hits"
|
102
|
+
add_option :illumina_13, :type => :boolean, :aliases => "-I", :desc => "the input is in the Illumina 1.3+ FASTQ-like format"
|
103
|
+
add_option :bam, :type => :boolean, :aliases => "-b", :desc => "the input read file is in the BAM format"
|
104
|
+
add_option :single, :type => :boolean, :aliases => "-0", :desc => "use single-end reads only (effective with -b)"
|
105
|
+
add_option :first, :type => :boolean, :aliases => "-1", :desc => "use the 1st read in a pair (effective with -b)"
|
106
|
+
add_option :second, :type => :boolean, :aliases => "-2", :desc => "use the 2nd read in a pair (effective with -b)"
|
107
|
+
add_option :filter, :type => :boolean, :aliases => "-Y", :desc => "filter Casava-filtered sequences"
|
108
|
+
end # Aln
|
109
|
+
|
110
|
+
|
111
|
+
# Usage: bwa samse [-n max_occ] [-f out.sam] [-r RG_line] <prefix> <in.sai> <in.fq>
|
112
|
+
|
113
|
+
class Samse
|
114
|
+
include Bio::Command::Wrapper
|
115
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
116
|
+
set_sub_program "samse"
|
117
|
+
use_aliases
|
118
|
+
add_option :max_occ, :type => :numeric, :aliases => "-n", :desc => "max_occ"
|
119
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "file name to save data"
|
120
|
+
add_option :rg_line, :type => :string, :aliases => "-r", :desc => "RG line"
|
121
|
+
end #Samse
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
#Usage: bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>
|
126
|
+
#
|
127
|
+
#Options: -a INT maximum insert size [500]
|
128
|
+
# -o INT maximum occurrences for one end [100000]
|
129
|
+
# -n INT maximum hits to output for paired reads [3]
|
130
|
+
# -N INT maximum hits to output for discordant pairs [10]
|
131
|
+
# -c FLOAT prior of chimeric rate (lower bound) [1.0e-05]
|
132
|
+
# -f FILE sam file to output results to [stdout]
|
133
|
+
# -r STR read group header line such as `@RG\tID:foo\tSM:bar' [null]
|
134
|
+
# -P preload index into memory (for base-space reads only)
|
135
|
+
# -s disable Smith-Waterman for the unmapped mate
|
136
|
+
# -A disable insert size estimate (force -s)
|
137
|
+
|
138
|
+
class Sampe
|
139
|
+
include Bio::Command::Wrapper
|
140
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
141
|
+
set_sub_program "sampe"
|
142
|
+
use_aliases
|
143
|
+
add_option :max_insert, :type => :numeric, :aliases => "-a", :desc => "maximum insert size [500]"
|
144
|
+
add_option :max_occurrences, :type => :numeric, :aliases => "-o", :desc => "maximum occurrences for one end [100000]"
|
145
|
+
add_option :max_hits, :type => :numeric, :aliases => "-n", :desc => "maximum hits to output for paired reads [3]"
|
146
|
+
add_option :max_hits_discordant, :type => :numeric, :aliases => "-N", :desc => "maximum hits to output for discordant pairs [10]"
|
147
|
+
add_option :chimeric_rate, :type => :numeric, :aliases => "-c", :desc => "prior of chimeric rate (lower bound) [1.0e-05]"
|
148
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "sam file to output results to [stdout]"
|
149
|
+
add_option :read_group, :type => :string, :aliases => "-r", :desc => "read group header line such as `@RG\tID:foo\tSM:bar' [null]"
|
150
|
+
add_option :preload_index, :type => :boolean, :aliases => "-P", :desc => "preload index into memory (for base-space reads only)"
|
151
|
+
add_option :disable_sw, :type => :boolean, :aliases => "-s", :desc => "disable Smith-Waterman for the unmapped mate"
|
152
|
+
add_option :disable_insert_estimate, :type => :boolean, :aliases => "-A", :desc => "disable insert size estimate (force -s)"
|
153
|
+
end #Sampe
|
154
|
+
|
155
|
+
|
156
|
+
#Usage: bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]
|
157
|
+
#
|
158
|
+
#Options: -a INT score for a match [1]
|
159
|
+
# -b INT mismatch penalty [3]
|
160
|
+
# -q INT gap open penalty [5]
|
161
|
+
# -r INT gap extension penalty [2]
|
162
|
+
#
|
163
|
+
# -t INT number of threads [1]
|
164
|
+
#
|
165
|
+
# -w INT band width [50]
|
166
|
+
# -m FLOAT mask level [0.50]
|
167
|
+
#
|
168
|
+
# -T INT score threshold divided by a [30]
|
169
|
+
# -s INT maximum seeding interval size [3]
|
170
|
+
# -z INT Z-best [1]
|
171
|
+
# -N INT # seeds to trigger reverse alignment [5]
|
172
|
+
# -c FLOAT coefficient of length-threshold adjustment [5.5]
|
173
|
+
# -H in SAM output, use hard clipping rather than soft
|
174
|
+
# -f FILE file to output results to instead of stdout
|
175
|
+
|
176
|
+
class Bwasw
|
177
|
+
include Bio::Command::Wrapper
|
178
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
179
|
+
set_sub_program "bwasw"
|
180
|
+
use_aliases
|
181
|
+
add_option :paired, :type => :string, :desc => "paired reads"
|
182
|
+
add_option :match_score, :type => :numeric, :aliases => "-a", :desc => "score for a match [1]"
|
183
|
+
add_option :mismatch_penalty, :type => :numeric, :aliases => "-b", :desc => "mismatch penalty [3]"
|
184
|
+
add_option :gap_open_penalty, :type => :numeric, :aliases => "-q", :desc => "gap open penalty [5]"
|
185
|
+
add_option :gap_ext_penalty, :type => :numeric, :aliases => "-r", :desc => "gap extension penalty [2]"
|
186
|
+
add_option :threads, :type => :numeric, :aliases => "-t", :desc => "number of threads [1]"
|
187
|
+
add_option :band_width, :type => :numeric, :aliases => "-w", :desc => "band width [50]"
|
188
|
+
add_option :mask_level, :type => :numeric, :aliases => "-m", :desc => "mask level [0.50]"
|
189
|
+
add_option :score_threshold, :type => :numeric, :aliases => "-T", :desc => "score threshold divided by a [30]"
|
190
|
+
add_option :max_seeding, :type => :numeric, :aliases => "-s", :desc => "maximum seeding interval size [3]"
|
191
|
+
add_option :z_best, :type => :numeric, :aliases => "-z", :desc => "Z-best [1]"
|
192
|
+
add_option :seed_reverse, :type => :numeric, :aliases => "-N", :desc => "seeds to trigger reverse alignment [5]"
|
193
|
+
add_option :length_threshold, :type => :numeric, :aliases => "-c", :desc => "coefficient of length-threshold adjustment [5.5]"
|
194
|
+
add_option :hard_clip, :type => :boolean, :aliases => "-H", :desc => "in SAM output, use hard clipping rather than soft"
|
195
|
+
add_option :file_out, :type => :string, :aliases => "-f", :desc => "file to output results to instead of stdout"
|
196
|
+
end
|
197
|
+
|
198
|
+
# Usage: bwa fastmap [-l minLen=17] [-w maxSaSize=20] <idxbase> <in.fq>
|
199
|
+
class Fastmap
|
200
|
+
include Bio::Command::Wrapper
|
201
|
+
set_program Bio::Ngs::Utils.binary("bwa")
|
202
|
+
set_sub_program "fastmap"
|
203
|
+
use_aliases
|
204
|
+
add_option :min_length, :type => :numeric, :aliases => "-l", :desc => "minLen [17]"
|
205
|
+
add_option :max_sa_size, :type => :numeric, :aliases => "-w", :desc => "maxSaSize [20]"
|
206
|
+
end
|
207
|
+
|
208
|
+
end #Bwa
|
209
|
+
end #Ngs
|
210
|
+
end #Bio
|
data/lib/bio/appl/ngs/fastx.rb
CHANGED
@@ -107,10 +107,9 @@ module Bio
|
|
107
107
|
set_program Bio::Ngs::Utils.binary("fastq_quality_boxplot_graph.sh")
|
108
108
|
use_aliases
|
109
109
|
add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
|
110
|
-
add_option :output, :type=>:string, :aliases => "-o", :desc => "
|
111
|
-
add_option :input, :type=>:string, :aliases => "-i", :desc => "
|
110
|
+
add_option :output, :type=>:string, :aliases => "-o", :desc => "Output file name. default is STDOUT.", :collapse => true
|
111
|
+
add_option :input, :type=>:string, :aliases => "-i", :desc => "Input file. Should be the output of \"fastx_quality_statistics\" program.", :collapse => true
|
112
112
|
add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
113
|
-
add_option :quality_type, :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
|
114
113
|
end #ReadsBoxPlot
|
115
114
|
|
116
115
|
# Solexa-Reads coverage plotter
|
@@ -127,10 +126,9 @@ module Bio
|
|
127
126
|
set_program Bio::Ngs::Utils.binary("fastq_coverage_graph.sh")
|
128
127
|
use_aliases
|
129
128
|
add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
|
130
|
-
add_option :output, :type=>:string, :aliases => "-o", :desc => "
|
131
|
-
add_option :input, :type=>:string, :aliases => "-i", :desc => "
|
129
|
+
add_option :output, :type=>:string, :aliases => "-o", :desc => "Output file name. default is STDOUT.", :collapse => true
|
130
|
+
add_option :input, :type=>:string, :aliases => "-i", :desc => "Input file. Should be the output of \"fastx_quality_statistics\" program.", :collapse => true
|
132
131
|
add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
133
|
-
add_option :quality_type, :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
|
134
132
|
end #ReadsCoverage
|
135
133
|
|
136
134
|
|
@@ -188,6 +186,24 @@ module Bio
|
|
188
186
|
add_option :quality_type, :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
|
189
187
|
end #ReadsCoverage
|
190
188
|
|
189
|
+
# FASTA/Q Nucleotide Distribution Plotter
|
190
|
+
|
191
|
+
# Usage: /usr/local/bin/fastx_nucleotide_distribution_graph.sh [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]
|
192
|
+
|
193
|
+
# [-p] - Generate PostScript (.PS) file. Default is PNG image.
|
194
|
+
# [-i INPUT.TXT] - Input file. Should be the output of "fastx_quality_statistics" program.
|
195
|
+
# [-o OUTPUT] - Output file name. default is STDOUT.
|
196
|
+
# [-t TITLE] - Title - will be plotted on the graph.
|
197
|
+
class NucleotideDistribution
|
198
|
+
include Bio::Command::Wrapper
|
199
|
+
set_program Bio::Ngs::Utils.binary "fastx_nucleotide_distribution_graph.sh"
|
200
|
+
use_aliases
|
201
|
+
add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
|
202
|
+
add_option :output, :type=>:string, :aliases => "-o", :desc => "Output file name. default is STDOUT.", :collapse => true
|
203
|
+
add_option :input, :type=>:string, :aliases => "-i", :desc => "Input file. Should be the output of \"fastx_quality_statistics\" program.", :collapse => true
|
204
|
+
add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
205
|
+
end
|
206
|
+
|
191
207
|
end #Fastx
|
192
208
|
end #Ngs
|
193
209
|
end #Bio
|
@@ -113,6 +113,91 @@ module Bio
|
|
113
113
|
add_option :copy_header, :type => :string, :aliases => "-h", :desc => "copy the header in FILE to <out.bam> [in1.bam]"
|
114
114
|
end #Merge
|
115
115
|
|
116
|
+
#Usage: samtools mpileup [options] in1.bam [in2.bam [...]]
|
117
|
+
|
118
|
+
#Input options:
|
119
|
+
|
120
|
+
# -6 assume the quality is in the Illumina-1.3+ encoding
|
121
|
+
# -A count anomalous read pairs
|
122
|
+
# -B disable BAQ computation
|
123
|
+
# -b FILE list of input BAM files [null]
|
124
|
+
# -C INT parameter for adjusting mapQ; 0 to disable [0]
|
125
|
+
# -d INT max per-BAM depth to avoid excessive memory usage [250]
|
126
|
+
# -E extended BAQ for higher sensitivity but lower specificity
|
127
|
+
# -f FILE faidx indexed reference sequence file [null]
|
128
|
+
# -G FILE exclude read groups listed in FILE [null]
|
129
|
+
# -l FILE list of positions (chr pos) or regions (BED) [null]
|
130
|
+
# -M INT cap mapping quality at INT [60]
|
131
|
+
# -r STR region in which pileup is generated [null]
|
132
|
+
# -R ignore RG tags
|
133
|
+
# -q INT skip alignments with mapQ smaller than INT [0]
|
134
|
+
# -Q INT skip bases with baseQ/BAQ smaller than INT [13]
|
135
|
+
|
136
|
+
#Output options:
|
137
|
+
|
138
|
+
# -D output per-sample DP in BCF (require -g/-u)
|
139
|
+
# -g generate BCF output (genotype likelihoods)
|
140
|
+
# -O output base positions on reads (disabled by -g/-u)
|
141
|
+
# -s output mapping quality (disabled by -g/-u)
|
142
|
+
# -S output per-sample strand bias P-value in BCF (require -g/-u)
|
143
|
+
# -u generate uncompress BCF output
|
144
|
+
|
145
|
+
#SNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):
|
146
|
+
|
147
|
+
# -e INT Phred-scaled gap extension seq error probability [20]
|
148
|
+
# -F FLOAT minimum fraction of gapped reads for candidates [0.002]
|
149
|
+
# -h INT coefficient for homopolymer errors [100]
|
150
|
+
# -I do not perform indel calling
|
151
|
+
# -L INT max per-sample depth for INDEL calling [250]
|
152
|
+
# -m INT minimum gapped reads for indel candidates [1]
|
153
|
+
# -o INT Phred-scaled gap open sequencing error probability [40]
|
154
|
+
# -P STR comma separated list of platforms for indels [all]
|
155
|
+
|
156
|
+
#Notes: Assuming diploid individuals.
|
157
|
+
|
158
|
+
|
159
|
+
class Mpileup
|
160
|
+
include Bio::Command::Wrapper
|
161
|
+
set_program Bio::Ngs::Utils.binary("samtools")
|
162
|
+
set_sub_program "mpileup"
|
163
|
+
add_option :illumina13, :type => :boolean, :aliases => "-6", :desc => "assume the quality is in the Illumina-1.3+ encoding"
|
164
|
+
add_option :anomalous, :type => :boolean, :aliases => "-A", :desc => "count anomalous read pairs"
|
165
|
+
add_option :baq, :type => :boolean, :aliases => "-B", :desc => "disable BAQ computation"
|
166
|
+
add_option :bam, :type => :string, :aliases => "-b", :desc => "list of input BAM files [null]"
|
167
|
+
add_option :adjust, :type => :numeric, :aliases => "-C", :desc => "parameter for adjusting mapQ; 0 to disable [0]"
|
168
|
+
add_option :depth, :type => :numeric, :aliases => "-d", :desc => "max per-BAM depth to avoid excessive memory usage [250]"
|
169
|
+
add_option :extended, :type => :boolean, :aliases => "-E", :desc => "extended BAQ for higher sensitivity but lower specificity"
|
170
|
+
add_option :file_in, :type => :string, :aliases => "-f", :desc => "faidx indexed reference sequence file [null]"
|
171
|
+
add_option :readgroup, :type => :string, :aliases => "-G", :desc => "exclude read groups listed in FILE [null]"
|
172
|
+
add_option :positions, :type => :string, :aliases => "-l", :desc => "list of positions (chr pos) or regions (BED) in FILE [null]"
|
173
|
+
add_option :mapping_quality, :type => :numeric, :aliases => "-M", :desc => "cap mapping quality at INT [60]"
|
174
|
+
add_option :region, :type => :string, :aliases => "r", :desc => "region in which pileup is generated [null]"
|
175
|
+
add_option :ignoreRG, :type => :boolean, :aliases => "-R", :desc => "ignore RG tags"
|
176
|
+
add_option :align_qual, :type => :numeric, :aliases => "-q", :desc => "skip alignments with mapQ smaller than INT [0]"
|
177
|
+
add_option :base_qual, :type => :numeric, :aliases => "-Q", :desc => "skip bases with baseQ/BAQ smaller than INT [13]"
|
178
|
+
add_option :dp, :type => :boolean, :aliases => "-D", :desc => "output per-sample DP in BCF (require -g/-u)"
|
179
|
+
add_option :bcfout, :type => :boolean, :aliases => "-g", :desc => "generate BCF output (genotype likelihoods)"
|
180
|
+
add_option :basepositions, :type => :boolean, :aliases => "-O", :desc => "output base positions on reads (disabled by -g/-u)"
|
181
|
+
add_option :mapq_out, :type => :boolean, :aliases => "-s", :desc => "output mapping quality (disabled by -g/-u)"
|
182
|
+
add_option :strand_bias, :type => :boolean, :aliases => "-S", :desc => "output per-sample strand bias P-value in BCF (require -g/-u)"
|
183
|
+
add_option :uncompressed, :type => :boolean, :aliases => "-u", :desc => "generate uncompress BCF output"
|
184
|
+
add_option :gap_error, :type => :numeric, :aliases => "-e", :desc => "Phred-scaled gap extension seq error probability [20]"
|
185
|
+
add_option :reads_fraction, :type => :numeric, :aliases => "-F", :desc => "minimum fraction of gapped reads for candidates [0.002]"
|
186
|
+
add_option :homopolymer_errors, :type => :numeric, :aliases => "-h", :desc => "coefficient for homopolymer errors [100]"
|
187
|
+
add_option :noindel, :type => :boolean, :aliases => "-I", :desc => "do not perform indel calling"
|
188
|
+
add_option :sample_depth, :type => :numeric, :aliases => "-L", :desc => "max per-sample depth for INDEL calling [250]"
|
189
|
+
add_option :min_gap, :type => :numeric, :aliases => "-m", :desc => "minimum gapped reads for indel candidates [1]"
|
190
|
+
add_option :gap_open, :type => :numeric, :aliases => "-o", :desc => "Phred-scaled gap open sequencing error probability [40]"
|
191
|
+
add_option :indel_platforms, :type => :string, :aliases => "-P", :desc => "comma separated list of platforms for indels [all]"
|
192
|
+
end #mpileup
|
193
|
+
|
194
|
+
class Faidx
|
195
|
+
include Bio::Command::Wrapper
|
196
|
+
set_program Bio::Ngs::Utils.binary("samtools")
|
197
|
+
set_sub_program "faidx"
|
198
|
+
end #faidx
|
199
|
+
|
200
|
+
|
116
201
|
end #Samtools
|
117
202
|
end #Ngs
|
118
203
|
end #Bio
|
data/lib/bio/appl/ngs/tophat.rb
CHANGED
@@ -148,7 +148,7 @@ module Bio
|
|
148
148
|
add_option "tmp-dir", :type => :string
|
149
149
|
add_option "zpacker", :type => :string, :aliases => '-z'
|
150
150
|
add_option "unmapped-fifo", :type => :boolean, :aliases => '-X'
|
151
|
-
add_option "initial-read-mismatches", :type => :
|
151
|
+
add_option "initial-read-mismatches", :type => :numeric, :aliases => '-N'
|
152
152
|
add_option "segment-mismatches", :type => :numeric
|
153
153
|
add_option "segment-length", :type => :numeric
|
154
154
|
add_option "min-closure-exon", :type => :numeric
|
@@ -13,6 +13,22 @@ common:
|
|
13
13
|
suffix: tar.bz2
|
14
14
|
desc: "Fastx-toolkit version 0.0.13 requires libgtextutils-0.6 (available here for download). A recent g++ compiler (tested with GNU G++ 4.1.2 and later). The fasta_clipping_histogram tool requires two perl modules: PerlIO::gzip and GD::Graph::bars. The fastx_barcode_splitter tool requires GNU sed. The fastq_quality_boxplot tool requires gnuplot version 4.2 or newer."
|
15
15
|
type: source
|
16
|
+
bwa:
|
17
|
+
version: 0.6.1
|
18
|
+
url: http://sourceforge.net/projects/bio-bwa/files/bwa-0.6.1.tar.bz2/download
|
19
|
+
basename: bwa-0.6.1
|
20
|
+
suffix: tar.bz2
|
21
|
+
desc: "Burrows-Wheeler Aligner"
|
22
|
+
type: make
|
23
|
+
samtools:
|
24
|
+
version: 0.1.18
|
25
|
+
url: http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2/download
|
26
|
+
basename: samtools-0.1.18
|
27
|
+
suffix: tar.bz2
|
28
|
+
desc: "SAMtools"
|
29
|
+
type: make
|
30
|
+
|
31
|
+
|
16
32
|
linux:
|
17
33
|
cufflinks:
|
18
34
|
version: 1.3.0
|
data/lib/bio/ngs/record.rb
CHANGED
@@ -26,7 +26,7 @@ module Bio
|
|
26
26
|
|
27
27
|
def load
|
28
28
|
tasks = []
|
29
|
-
YAML.
|
29
|
+
YAML.load_stream(@file) do |ydoc|
|
30
30
|
ydoc[:args].flatten!
|
31
31
|
tasks << ydoc
|
32
32
|
end
|
@@ -41,7 +41,7 @@ module Bio
|
|
41
41
|
|
42
42
|
def is_saved?(params)
|
43
43
|
tasks = []
|
44
|
-
YAML.
|
44
|
+
YAML.load_stream(@file) {|ydoc| tasks << ydoc}
|
45
45
|
return tasks.include?(params)
|
46
46
|
end
|
47
47
|
|
data/lib/bio/ngs/utils.rb
CHANGED
@@ -23,13 +23,6 @@ module Bio
|
|
23
23
|
end
|
24
24
|
class << self
|
25
25
|
|
26
|
-
def parallel_exec(command_blocks)
|
27
|
-
command_blocks.each do |block|
|
28
|
-
fork(&block)
|
29
|
-
end
|
30
|
-
Process.waitall
|
31
|
-
end
|
32
|
-
|
33
26
|
def binary(name)
|
34
27
|
begin
|
35
28
|
if !(plugin_binaries_found = find_binary_files(name)).empty?
|
@@ -47,7 +40,7 @@ module Bio
|
|
47
40
|
|
48
41
|
def os_type
|
49
42
|
require 'rbconfig'
|
50
|
-
case
|
43
|
+
case RbConfig::CONFIG['host_os']
|
51
44
|
when /darwin/ then return "osx"
|
52
45
|
when /linux/ then return "linux"
|
53
46
|
when /mswin|mingw/ then raise NotImplementedError, "This plugin does not run on Windows"
|
@@ -154,6 +147,16 @@ module Bio
|
|
154
147
|
end #cd
|
155
148
|
end #uncompress_compile
|
156
149
|
|
150
|
+
def just_make(tool_name, tool_record, path_external, path_binary)
|
151
|
+
puts "Uncompressing #{tool_name}..."
|
152
|
+
tool_dir_name = uncompress_any(tool_name, tool_record)
|
153
|
+
puts "Compiling #{tool_name}..."
|
154
|
+
cd(tool_dir_name) do
|
155
|
+
system "make"
|
156
|
+
FileUtils.cp tool_name,path_binary
|
157
|
+
end #cd
|
158
|
+
end
|
159
|
+
|
157
160
|
def install_binary(tool_name, tool_record, path_external, path_binary)
|
158
161
|
require 'fileutils'
|
159
162
|
include FileUtils::Verbose
|
data/lib/enumerable.rb
CHANGED
@@ -34,4 +34,16 @@ module Enumerable
|
|
34
34
|
return Math.sqrt(self.sample_variance)
|
35
35
|
end
|
36
36
|
|
37
|
-
end # module Enumerable
|
37
|
+
end # module Enumerable
|
38
|
+
|
39
|
+
class Array
|
40
|
+
def / len
|
41
|
+
a = []
|
42
|
+
each_with_index do |x,i|
|
43
|
+
a << [] if i % len == 0
|
44
|
+
a.last << x
|
45
|
+
end
|
46
|
+
a
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|