bio-gadget 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.org +0 -4
- data/lib/bio-gadget/demlt.rb +17 -8
- data/lib/bio-gadget/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 27884e24ae8337cdb9a1f93c1baf1e1b8c19c5af
|
4
|
+
data.tar.gz: 7b0ee4c467ba98d3564014f8786c197d9e81756f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: badbe0d6f1f09e5005b5bc29b1d94c35f115d1286050d99de3caf18bd3222b3ca497ac0a632448dea5eb931fd8951bfc08f0e77cf9d4b275d0cf04701bd3900c
|
7
|
+
data.tar.gz: 86c0c9b27fa747f27091a7d1ca994e36d9f5393e690912b51cbcb8612daf9c26bc76a70de069d02c6403dc2152701b861287350fa19e21fdd2d019356521307b
|
data/README.org
CHANGED
@@ -22,10 +22,6 @@ Currently available commands are
|
|
22
22
|
- wig5p :: Convert bam-format alignments into wig-format table
|
23
23
|
- wigchr :: Extract wiggle track on specified chromosome
|
24
24
|
|
25
|
-
** Memo :noexport:
|
26
|
-
- gtfensembl :: Create gtf of ENSEMBL Genes (a.k.a. ensGene.gtf) from XX files of UCSC annotation database; .txt.gz of
|
27
|
-
- gtfucsc :: Create gtf of UCSC Genes from four (knownIsoforms.txt.gz, kgXref.txt.gz, kgTxInfo.txt.gz and knownGene.txt.gz) files of UCSC annotation database. The output gtf contains gene_id, transcript_id and gene_name attributes, so it convenients to use as a gene/transcript annotation file for tophat/cufflinks etc.
|
28
|
-
|
29
25
|
* Contributing
|
30
26
|
|
31
27
|
1. Fork it
|
data/lib/bio-gadget/demlt.rb
CHANGED
@@ -8,16 +8,20 @@ module Bio
|
|
8
8
|
|
9
9
|
namespace :bio
|
10
10
|
|
11
|
-
desc 'demlt BARCODE [FASTQ]',
|
11
|
+
desc 'demlt BARCODE [FASTQ]', "Demultiplex fastq from STDIN by barcodes.\n\n"
|
12
12
|
option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
|
13
|
-
option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon
|
14
|
-
option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => '-1 is no trimming by length.'
|
15
|
-
option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name.
|
13
|
+
option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon reduction.'
|
14
|
+
option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => 'Trimming length before PCA-amplicon reduction. -1 is no trimming by length.'
|
15
|
+
option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name."
|
16
|
+
option 'q-trimming', :aliases => '-q', :type => :string, :default => '~', :desc => "Quality threshold - nucleotides with lower quality will be trimmed, from the end of the sequence. '~' is no trimming by quality, because this is the maximum quality base character."
|
17
|
+
option 'min-length', :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Length threshold - sequences shorter than this after trimming will be filtered out. 0 is no filtering.'
|
16
18
|
def demlt(bcfile, fastq=:stdin)
|
17
19
|
|
18
20
|
ofs = options['umi-length']
|
19
|
-
|
20
|
-
|
21
|
+
clen = options['cdna-length']
|
22
|
+
gtrim = options['g-trimming']
|
23
|
+
qtrim = options['q-trimming']
|
24
|
+
mlen = options['min-length']
|
21
25
|
|
22
26
|
wells = Array.new
|
23
27
|
bcs = Array.new
|
@@ -117,7 +121,7 @@ module Bio
|
|
117
121
|
outpath = "#{options['output-dir']}/#{well}.fq.xz"
|
118
122
|
pid = Kernel.fork {
|
119
123
|
left = ofs+bclen
|
120
|
-
right =
|
124
|
+
right = clen > -1 ? -1 : ofs+bclen+clen-1
|
121
125
|
preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
|
122
126
|
ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
|
123
127
|
| sort -k 1 -r | cut -f 2- | uniq -f 2 \\
|
@@ -127,8 +131,13 @@ DEDUPandFORMAT
|
|
127
131
|
ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
|
128
132
|
FORMAT
|
129
133
|
|
130
|
-
preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}")}\'' if
|
134
|
+
preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}") if s.length>0}\'' if gtrim
|
131
135
|
|
136
|
+
if qtrim != '~' || mlen > 0
|
137
|
+
preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|m=v[2].length-1;0.upto(m){|i|if v[2][i]<"'+qtrim+'" then m=i-1;break;end};puts("@#{v[0]}\n#{v[1][0..m]}\n+\n#{v[2][0..m]}") if m>'+mlen.to_s+'}\''
|
138
|
+
end
|
139
|
+
|
140
|
+
puts preprocess
|
132
141
|
exec preprocess+"| xz -z -c -e > #{outpath}"
|
133
142
|
}
|
134
143
|
}
|
data/lib/bio-gadget/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gadget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shintaro Katayama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-04-
|
11
|
+
date: 2013-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|