bio-gadget 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.org +0 -4
- data/lib/bio-gadget/demlt.rb +17 -8
- data/lib/bio-gadget/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 27884e24ae8337cdb9a1f93c1baf1e1b8c19c5af
|
4
|
+
data.tar.gz: 7b0ee4c467ba98d3564014f8786c197d9e81756f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: badbe0d6f1f09e5005b5bc29b1d94c35f115d1286050d99de3caf18bd3222b3ca497ac0a632448dea5eb931fd8951bfc08f0e77cf9d4b275d0cf04701bd3900c
|
7
|
+
data.tar.gz: 86c0c9b27fa747f27091a7d1ca994e36d9f5393e690912b51cbcb8612daf9c26bc76a70de069d02c6403dc2152701b861287350fa19e21fdd2d019356521307b
|
data/README.org
CHANGED
@@ -22,10 +22,6 @@ Currently available commands are
|
|
22
22
|
- wig5p :: Convert bam-format alignments into wig-format table
|
23
23
|
- wigchr :: Extract wiggle track on specified chromosome
|
24
24
|
|
25
|
-
** Memo :noexport:
|
26
|
-
- gtfensembl :: Create gtf of ENSEMBL Genes (a.k.a. ensGene.gtf) from XX files of UCSC annotation database; .txt.gz of
|
27
|
-
- gtfucsc :: Create gtf of UCSC Genes from four (knownIsoforms.txt.gz, kgXref.txt.gz, kgTxInfo.txt.gz and knownGene.txt.gz) files of UCSC annotation database. The output gtf contains gene_id, transcript_id and gene_name attributes, so it convenients to use as a gene/transcript annotation file for tophat/cufflinks etc.
|
28
|
-
|
29
25
|
* Contributing
|
30
26
|
|
31
27
|
1. Fork it
|
data/lib/bio-gadget/demlt.rb
CHANGED
@@ -8,16 +8,20 @@ module Bio
|
|
8
8
|
|
9
9
|
namespace :bio
|
10
10
|
|
11
|
-
desc 'demlt BARCODE [FASTQ]',
|
11
|
+
desc 'demlt BARCODE [FASTQ]', "Demultiplex fastq from STDIN by barcodes.\n\n"
|
12
12
|
option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
|
13
|
-
option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon
|
14
|
-
option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => '-1 is no trimming by length.'
|
15
|
-
option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name.
|
13
|
+
option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon reduction.'
|
14
|
+
option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => 'Trimming length before PCA-amplicon reduction. -1 is no trimming by length.'
|
15
|
+
option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name."
|
16
|
+
option 'q-trimming', :aliases => '-q', :type => :string, :default => '~', :desc => "Quality threshold - nucleotides with lower quality will be trimmed, from the end of the sequence. '~' is no trimming by quality, because this is the maximum quality base character."
|
17
|
+
option 'min-length', :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Length threshold - sequences shorter than this after trimming will be filtered out. 0 is no filtering.'
|
16
18
|
def demlt(bcfile, fastq=:stdin)
|
17
19
|
|
18
20
|
ofs = options['umi-length']
|
19
|
-
|
20
|
-
|
21
|
+
clen = options['cdna-length']
|
22
|
+
gtrim = options['g-trimming']
|
23
|
+
qtrim = options['q-trimming']
|
24
|
+
mlen = options['min-length']
|
21
25
|
|
22
26
|
wells = Array.new
|
23
27
|
bcs = Array.new
|
@@ -117,7 +121,7 @@ module Bio
|
|
117
121
|
outpath = "#{options['output-dir']}/#{well}.fq.xz"
|
118
122
|
pid = Kernel.fork {
|
119
123
|
left = ofs+bclen
|
120
|
-
right =
|
124
|
+
right = clen > -1 ? -1 : ofs+bclen+clen-1
|
121
125
|
preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
|
122
126
|
ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
|
123
127
|
| sort -k 1 -r | cut -f 2- | uniq -f 2 \\
|
@@ -127,8 +131,13 @@ DEDUPandFORMAT
|
|
127
131
|
ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
|
128
132
|
FORMAT
|
129
133
|
|
130
|
-
preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}")}\'' if
|
134
|
+
preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}") if s.length>0}\'' if gtrim
|
131
135
|
|
136
|
+
if qtrim != '~' || mlen > 0
|
137
|
+
preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|m=v[2].length-1;0.upto(m){|i|if v[2][i]<"'+qtrim+'" then m=i-1;break;end};puts("@#{v[0]}\n#{v[1][0..m]}\n+\n#{v[2][0..m]}") if m>'+mlen.to_s+'}\''
|
138
|
+
end
|
139
|
+
|
140
|
+
puts preprocess
|
132
141
|
exec preprocess+"| xz -z -c -e > #{outpath}"
|
133
142
|
}
|
134
143
|
}
|
data/lib/bio-gadget/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gadget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shintaro Katayama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-04-
|
11
|
+
date: 2013-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|