bio-gadget 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9bd8ea9d5f88557147da247ea9f817d4f069aae1
4
- data.tar.gz: 60235e4a8b6b82c01366d27f38bd4c40e0ef0e71
3
+ metadata.gz: aaeb43dbf9adba74b5741250dab6e1764421a4c4
4
+ data.tar.gz: f45ff460eeee4aee51b7888f39592dac24075898
5
5
  SHA512:
6
- metadata.gz: bdb115e0f1e9df39a39e108e7f8d00e9a1f2d69ed51b098217659050291034e5a052eaa2aed976cf6b9d18bea5a5929149367fe6b0f33e9a0ee7bc245668d2a5
7
- data.tar.gz: d959b569cec30289b3bdf16676b4b26a5e0ed7adceb3e90407646ba074b5621d506eae98f362d57e4b838525d370fc91112e88141dea24a6ae4da3b2dc8faa80
6
+ metadata.gz: 0f7dbebd237bb629a0e1d758b6e024427586d54a7cf4ccac59270b34e9c2dec374ee0d5bf81c88465eaabf531a520c712876349d8e72db3793fd9560c45cceee
7
+ data.tar.gz: 4d39fb3ee8a65513bbea18750e435b5f5d54166c1e3f92920f6905e83248c7e0a39361382660ca80253572b466b0458d60b6cf4e3891971f02e34ef83292bed8
data/README.org CHANGED
@@ -15,13 +15,13 @@ To check all commands in this package,
15
15
 
16
16
  Currently available commands are
17
17
 
18
- : bio
19
- : ---
20
- : bio-gadget dedup # deduplicate fastq (via STDIN)
21
- : bio-gadget demlt -b,--barcode-file=BARCODE-FILE # demultiplex fastq by barcodes
22
- : bio-gadget fqxz # automatic (re)compression of *.fq(.gz|.bz2) files
23
- : bio-gadget qvstat QUAL # statistics of quality values in *.qual file
24
- : bio-gadget wigchr WIG CHR # extract wiggle track on specified chromosome
18
+ : bio-gadget dedup # deduplicate fastq (via STDIN)
19
+ : bio-gadget demlt BARCODE [FASTQ] # demultiplex fastq by barcodes
20
+ : bio-gadget ensann GENENAMEGZ [ENSGENE] # create ensGene.gtf
21
+ : bio-gadget fqxz # automatic (re)compression of *.fq(.gz|.bz2) files
22
+ : bio-gadget qvstat QUAL # statistics of quality values in *.qual file
23
+ : bio-gadget ucscann ISOFORMSGZ XREFGZ [KNOWNGENE] # create knownGene.gtf
24
+ : bio-gadget wigchr WIG CHR # extract wiggle track on specified chromosome
25
25
 
26
26
  * Contributing
27
27
 
@@ -8,14 +8,12 @@ module Bio
8
8
 
9
9
  namespace :bio
10
10
 
11
- desc 'demlt', 'demultiplex fastq from STDIN by barcodes'
12
- option 'barcode-file', :aliases => '-b', :type => :string, :required => true
11
+ desc 'demlt BARCODE [FASTQ]', 'demultiplex fastq from STDIN by barcodes'
13
12
  option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
14
13
  option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4
15
14
  option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37
16
- def demlt
15
+ def demlt(bcfile, fastq=:stdin)
17
16
 
18
- bcfile = options['barcode-file']
19
17
  ofs = options['umi-length']
20
18
  len = options['cdna-length']
21
19
 
@@ -47,7 +45,7 @@ module Bio
47
45
  fifo1s = Array.new
48
46
  fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
49
47
  total = 0
50
- Bio::Faster.new(:stdin).each_record(:quality => :raw) do |vals|
48
+ Bio::Faster.new(fastq).each_record(:quality => :raw) do |vals|
51
49
  fifo1 = fifo1s[total % procs]
52
50
  fifo1.puts(vals.join("\t"))
53
51
  total += 1
@@ -0,0 +1,93 @@
1
+ module Bio
2
+ class Gadget < Thor
3
+
4
+ desc 'ucscann ISOFORMSGZ XREFGZ [KNOWNGENE]', 'create knownGene.gtf'
5
+ def ucscann(isoforms, xref, knowngene='/dev/stdin')
6
+
7
+ acc2id = Hash.new
8
+ open("| gunzip -c #{isoforms}").each { |line|
9
+ id, acc = line.rstrip.split(/\t/)
10
+ acc2id[acc] = id
11
+ }
12
+
13
+ acc2sym = Hash.new
14
+ open("| gunzip -c #{xref} | cut -f 1,5").each { |line|
15
+ acc, sym = line.rstrip.split(/\t/)
16
+ acc2sym[acc] = sym
17
+ }
18
+
19
+ open("| sort -k1,1 -k4,4n", 'w') { |fp|
20
+ open(knowngene).each { |line|
21
+ cols = line.rstrip.split(/\t/)
22
+ acc, chr, str = cols.values_at(0, 1, 2)
23
+ cs = cols[5].to_i
24
+ ce = cols[6].to_i
25
+ lefts = cols[8].split(/,/)
26
+ rights = cols[9].split(/,/)
27
+ prop = "gene_id \"#{acc2id[acc]}\"; transcript_id \"#{acc}\""
28
+ prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
29
+ lefts.each_index { |i|
30
+ next if lefts[i].nil?
31
+ l = lefts[i].to_i
32
+ r = rights[i].to_i
33
+ fp.puts [chr, 'knownGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
34
+ }
35
+ if cs != ce
36
+ fp.puts [chr, 'knownGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
37
+ lefts.each_index { |i|
38
+ next if lefts[i].nil?
39
+ l = lefts[i].to_i
40
+ next if ce-1 < l
41
+ r = rights[i].to_i
42
+ next if r-1 < cs
43
+ fp.puts [chr, 'knownGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
44
+ }
45
+ end
46
+ }
47
+ }
48
+
49
+ end
50
+
51
+ desc 'ensann GENENAMEGZ [ENSGENE]', 'create ensGene.gtf'
52
+ def ensann(genename, ensgene='/dev/stdin')
53
+
54
+ acc2sym = Hash.new
55
+ open("| gunzip -c #{genename}").each { |line|
56
+ acc, sym = line.rstrip.split(/\t/)
57
+ acc2sym[acc] = sym
58
+ }
59
+
60
+ open("| sort -k1,1 -k4,4n", 'w') { |fp|
61
+ open(ensgene).each { |line|
62
+ cols = line.rstrip.split(/\t/)
63
+ acc, chr, str, id = cols.values_at(1, 2, 3, 12)
64
+ cs = cols[6].to_i
65
+ ce = cols[7].to_i
66
+ lefts = cols[9].split(/,/)
67
+ rights = cols[10].split(/,/)
68
+ prop = "gene_id \"#{id}\"; transcript_id \"#{acc}\""
69
+ prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
70
+ lefts.each_index { |i|
71
+ next if lefts[i].nil?
72
+ l = lefts[i].to_i
73
+ r = rights[i].to_i
74
+ fp.puts [chr, 'ensGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
75
+ }
76
+ if cs != ce
77
+ fp.puts [chr, 'ensGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
78
+ lefts.each_index { |i|
79
+ next if lefts[i].nil?
80
+ l = lefts[i].to_i
81
+ next if ce-1 < l
82
+ r = rights[i].to_i
83
+ next if r-1 < cs
84
+ fp.puts [chr, 'ensGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
85
+ }
86
+ end
87
+ }
88
+ }
89
+
90
+ end
91
+
92
+ end
93
+ end
@@ -3,7 +3,7 @@ require 'thor'
3
3
  module Bio
4
4
  class Gadget < Thor
5
5
 
6
- VERSION = "0.3.0"
6
+ VERSION = "0.3.1"
7
7
 
8
8
  end
9
9
  end
data/lib/bio-gadget.rb CHANGED
@@ -2,6 +2,7 @@ require 'bio-gadget/version'
2
2
  require 'bio-gadget/dedup'
3
3
  require 'bio-gadget/demlt'
4
4
  require 'bio-gadget/fqxz'
5
+ require 'bio-gadget/gtfann'
5
6
  require 'bio-gadget/qvstat'
6
7
  require 'bio-gadget/wigchr'
7
8
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shintaro Katayama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-16 00:00:00.000000000 Z
11
+ date: 2013-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -100,6 +100,7 @@ files:
100
100
  - lib/bio-gadget/dedup.rb
101
101
  - lib/bio-gadget/demlt.rb
102
102
  - lib/bio-gadget/fqxz.rb
103
+ - lib/bio-gadget/gtfann.rb
103
104
  - lib/bio-gadget/qvstat.rb
104
105
  - lib/bio-gadget/version.rb
105
106
  - lib/bio-gadget/wigchr.rb