bio-gadget 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 83130daa98cf1f04006aba7a9602b52a036d8d9c
4
- data.tar.gz: 3c99911a518cacaa4ee0661bbff1d007b506500b
3
+ metadata.gz: 27884e24ae8337cdb9a1f93c1baf1e1b8c19c5af
4
+ data.tar.gz: 7b0ee4c467ba98d3564014f8786c197d9e81756f
5
5
  SHA512:
6
- metadata.gz: 2869693a0cb2357c9d9bab623a6686475c96ef9b9e28e6498a3b8cbe66404f7d31cccbe3b09957436eab9c9c87442df37177b2ca3704244bd03f97d2bb09b9b8
7
- data.tar.gz: 644f86a29aed0eebbf30031f54fe70b28ce607d8248b594d393396bee192e83aa30ea0faffebcc35ffd8bd294e36e3f1f6f99b5ba266b11cdc3f266db76a01c9
6
+ metadata.gz: badbe0d6f1f09e5005b5bc29b1d94c35f115d1286050d99de3caf18bd3222b3ca497ac0a632448dea5eb931fd8951bfc08f0e77cf9d4b275d0cf04701bd3900c
7
+ data.tar.gz: 86c0c9b27fa747f27091a7d1ca994e36d9f5393e690912b51cbcb8612daf9c26bc76a70de069d02c6403dc2152701b861287350fa19e21fdd2d019356521307b
data/README.org CHANGED
@@ -22,10 +22,6 @@ Currently available commands are
22
22
  - wig5p :: Convert bam-format alignments into wig-format table
23
23
  - wigchr :: Extract wiggle track on specified chromosome
24
24
 
25
- ** Memo :noexport:
26
- - gtfensembl :: Create gtf of ENSEMBL Genes (a.k.a. ensGene.gtf) from XX files of UCSC annotation database; .txt.gz of
27
- - gtfucsc :: Create gtf of UCSC Genes from four (knownIsoforms.txt.gz, kgXref.txt.gz, kgTxInfo.txt.gz and knownGene.txt.gz) files of UCSC annotation database. The output gtf contains gene_id, transcript_id and gene_name attributes, so it convenients to use as a gene/transcript annotation file for tophat/cufflinks etc.
28
-
29
25
  * Contributing
30
26
 
31
27
  1. Fork it
@@ -8,16 +8,20 @@ module Bio
8
8
 
9
9
  namespace :bio
10
10
 
11
- desc 'demlt BARCODE [FASTQ]', 'Demultiplex fastq from STDIN by barcodes. '
11
+ desc 'demlt BARCODE [FASTQ]', "Demultiplex fastq from STDIN by barcodes.\n\n"
12
12
  option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
13
- option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon removal.'
14
- option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => '-1 is no trimming by length.'
15
- option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name. This sets cdna-length option to -1."
13
+ option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon reduction.'
14
+ option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => 'Trimming length before PCA-amplicon reduction. -1 is no trimming by length.'
15
+ option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name."
16
+ option 'q-trimming', :aliases => '-q', :type => :string, :default => '~', :desc => "Quality threshold - nucleotides with lower quality will be trimmed, from the end of the sequence. '~' is no trimming by quality, because this is the maximum quality base character."
17
+ option 'min-length', :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Length threshold - sequences shorter than this after trimming will be filtered out. 0 is no filtering.'
16
18
  def demlt(bcfile, fastq=:stdin)
17
19
 
18
20
  ofs = options['umi-length']
19
- trim = options['g-trimming']
20
- len = trim ? -1 : options['cdna-length']
21
+ clen = options['cdna-length']
22
+ gtrim = options['g-trimming']
23
+ qtrim = options['q-trimming']
24
+ mlen = options['min-length']
21
25
 
22
26
  wells = Array.new
23
27
  bcs = Array.new
@@ -117,7 +121,7 @@ module Bio
117
121
  outpath = "#{options['output-dir']}/#{well}.fq.xz"
118
122
  pid = Kernel.fork {
119
123
  left = ofs+bclen
120
- right = trim ? -1 : ofs+bclen+len-1
124
+ right = clen > -1 ? -1 : ofs+bclen+clen-1
121
125
  preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
122
126
  ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
123
127
  | sort -k 1 -r | cut -f 2- | uniq -f 2 \\
@@ -127,8 +131,13 @@ DEDUPandFORMAT
127
131
  ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
128
132
  FORMAT
129
133
 
130
- preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}")}\'' if trim
134
+ preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}") if s.length>0}\'' if gtrim
131
135
 
136
+ if qtrim != '~' || mlen > 0
137
+ preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|m=v[2].length-1;0.upto(m){|i|if v[2][i]<"'+qtrim+'" then m=i-1;break;end};puts("@#{v[0]}\n#{v[1][0..m]}\n+\n#{v[2][0..m]}") if m>'+mlen.to_s+'}\''
138
+ end
139
+
140
+ puts preprocess
132
141
  exec preprocess+"| xz -z -c -e > #{outpath}"
133
142
  }
134
143
  }
@@ -3,7 +3,7 @@ require 'thor'
3
3
  module Bio
4
4
  class Gadget < Thor
5
5
 
6
- VERSION = "0.4.1"
6
+ VERSION = "0.4.2"
7
7
 
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shintaro Katayama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-10 00:00:00.000000000 Z
11
+ date: 2013-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor