bio-gadget 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 83130daa98cf1f04006aba7a9602b52a036d8d9c
4
- data.tar.gz: 3c99911a518cacaa4ee0661bbff1d007b506500b
3
+ metadata.gz: 27884e24ae8337cdb9a1f93c1baf1e1b8c19c5af
4
+ data.tar.gz: 7b0ee4c467ba98d3564014f8786c197d9e81756f
5
5
  SHA512:
6
- metadata.gz: 2869693a0cb2357c9d9bab623a6686475c96ef9b9e28e6498a3b8cbe66404f7d31cccbe3b09957436eab9c9c87442df37177b2ca3704244bd03f97d2bb09b9b8
7
- data.tar.gz: 644f86a29aed0eebbf30031f54fe70b28ce607d8248b594d393396bee192e83aa30ea0faffebcc35ffd8bd294e36e3f1f6f99b5ba266b11cdc3f266db76a01c9
6
+ metadata.gz: badbe0d6f1f09e5005b5bc29b1d94c35f115d1286050d99de3caf18bd3222b3ca497ac0a632448dea5eb931fd8951bfc08f0e77cf9d4b275d0cf04701bd3900c
7
+ data.tar.gz: 86c0c9b27fa747f27091a7d1ca994e36d9f5393e690912b51cbcb8612daf9c26bc76a70de069d02c6403dc2152701b861287350fa19e21fdd2d019356521307b
data/README.org CHANGED
@@ -22,10 +22,6 @@ Currently available commands are
22
22
  - wig5p :: Convert bam-format alignments into wig-format table
23
23
  - wigchr :: Extract wiggle track on specified chromosome
24
24
 
25
- ** Memo :noexport:
26
- - gtfensembl :: Create gtf of ENSEMBL Genes (a.k.a. ensGene.gtf) from XX files of UCSC annotation database; .txt.gz of
27
- - gtfucsc :: Create gtf of UCSC Genes from four (knownIsoforms.txt.gz, kgXref.txt.gz, kgTxInfo.txt.gz and knownGene.txt.gz) files of UCSC annotation database. The output gtf contains gene_id, transcript_id and gene_name attributes, so it convenients to use as a gene/transcript annotation file for tophat/cufflinks etc.
28
-
29
25
  * Contributing
30
26
 
31
27
  1. Fork it
@@ -8,16 +8,20 @@ module Bio
8
8
 
9
9
  namespace :bio
10
10
 
11
- desc 'demlt BARCODE [FASTQ]', 'Demultiplex fastq from STDIN by barcodes. '
11
+ desc 'demlt BARCODE [FASTQ]', "Demultiplex fastq from STDIN by barcodes.\n\n"
12
12
  option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
13
- option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon removal.'
14
- option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => '-1 is no trimming by length.'
15
- option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name. This sets cdna-length option to -1."
13
+ option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon reduction.'
14
+ option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => 'Trimming length before PCA-amplicon reduction. -1 is no trimming by length.'
15
+ option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name."
16
+ option 'q-trimming', :aliases => '-q', :type => :string, :default => '~', :desc => "Quality threshold - nucleotides with lower quality will be trimmed, from the end of the sequence. '~' is no trimming by quality, because this is the maximum quality base character."
17
+ option 'min-length', :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Length threshold - sequences shorter than this after trimming will be filtered out. 0 is no filtering.'
16
18
  def demlt(bcfile, fastq=:stdin)
17
19
 
18
20
  ofs = options['umi-length']
19
- trim = options['g-trimming']
20
- len = trim ? -1 : options['cdna-length']
21
+ clen = options['cdna-length']
22
+ gtrim = options['g-trimming']
23
+ qtrim = options['q-trimming']
24
+ mlen = options['min-length']
21
25
 
22
26
  wells = Array.new
23
27
  bcs = Array.new
@@ -117,7 +121,7 @@ module Bio
117
121
  outpath = "#{options['output-dir']}/#{well}.fq.xz"
118
122
  pid = Kernel.fork {
119
123
  left = ofs+bclen
120
- right = trim ? -1 : ofs+bclen+len-1
124
+ right = clen > -1 ? -1 : ofs+bclen+clen-1
121
125
  preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
122
126
  ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
123
127
  | sort -k 1 -r | cut -f 2- | uniq -f 2 \\
@@ -127,8 +131,13 @@ DEDUPandFORMAT
127
131
  ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
128
132
  FORMAT
129
133
 
130
- preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}")}\'' if trim
134
+ preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}") if s.length>0}\'' if gtrim
131
135
 
136
+ if qtrim != '~' || mlen > 0
137
+ preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|m=v[2].length-1;0.upto(m){|i|if v[2][i]<"'+qtrim+'" then m=i-1;break;end};puts("@#{v[0]}\n#{v[1][0..m]}\n+\n#{v[2][0..m]}") if m>'+mlen.to_s+'}\''
138
+ end
139
+
140
+ puts preprocess
132
141
  exec preprocess+"| xz -z -c -e > #{outpath}"
133
142
  }
134
143
  }
@@ -3,7 +3,7 @@ require 'thor'
3
3
  module Bio
4
4
  class Gadget < Thor
5
5
 
6
- VERSION = "0.4.1"
6
+ VERSION = "0.4.2"
7
7
 
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shintaro Katayama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-10 00:00:00.000000000 Z
11
+ date: 2013-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor