bio-gadget 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9bd8ea9d5f88557147da247ea9f817d4f069aae1
4
- data.tar.gz: 60235e4a8b6b82c01366d27f38bd4c40e0ef0e71
3
+ metadata.gz: aaeb43dbf9adba74b5741250dab6e1764421a4c4
4
+ data.tar.gz: f45ff460eeee4aee51b7888f39592dac24075898
5
5
  SHA512:
6
- metadata.gz: bdb115e0f1e9df39a39e108e7f8d00e9a1f2d69ed51b098217659050291034e5a052eaa2aed976cf6b9d18bea5a5929149367fe6b0f33e9a0ee7bc245668d2a5
7
- data.tar.gz: d959b569cec30289b3bdf16676b4b26a5e0ed7adceb3e90407646ba074b5621d506eae98f362d57e4b838525d370fc91112e88141dea24a6ae4da3b2dc8faa80
6
+ metadata.gz: 0f7dbebd237bb629a0e1d758b6e024427586d54a7cf4ccac59270b34e9c2dec374ee0d5bf81c88465eaabf531a520c712876349d8e72db3793fd9560c45cceee
7
+ data.tar.gz: 4d39fb3ee8a65513bbea18750e435b5f5d54166c1e3f92920f6905e83248c7e0a39361382660ca80253572b466b0458d60b6cf4e3891971f02e34ef83292bed8
data/README.org CHANGED
@@ -15,13 +15,13 @@ To check all commands in this package,
15
15
 
16
16
  Currently available commands are
17
17
 
18
- : bio
19
- : ---
20
- : bio-gadget dedup # deduplicate fastq (via STDIN)
21
- : bio-gadget demlt -b,--barcode-file=BARCODE-FILE # demultiplex fastq by barcodes
22
- : bio-gadget fqxz # automatic (re)compression of *.fq(.gz|.bz2) files
23
- : bio-gadget qvstat QUAL # statistics of quality values in *.qual file
24
- : bio-gadget wigchr WIG CHR # extract wiggle track on specified chromosome
18
+ : bio-gadget dedup # deduplicate fastq (via STDIN)
19
+ : bio-gadget demlt BARCODE [FASTQ] # demultiplex fastq by barcodes
20
+ : bio-gadget ensann GENENAMEGZ [ENSGENE] # create ensGene.gtf
21
+ : bio-gadget fqxz # automatic (re)compression of *.fq(.gz|.bz2) files
22
+ : bio-gadget qvstat QUAL # statistics of quality values in *.qual file
23
+ : bio-gadget ucscann ISOFORMSGZ XREFGZ [KNOWNGENE] # create knownGene.gtf
24
+ : bio-gadget wigchr WIG CHR # extract wiggle track on specified chromosome
25
25
 
26
26
  * Contributing
27
27
 
@@ -8,14 +8,12 @@ module Bio
8
8
 
9
9
  namespace :bio
10
10
 
11
- desc 'demlt', 'demultiplex fastq from STDIN by barcodes'
12
- option 'barcode-file', :aliases => '-b', :type => :string, :required => true
11
+ desc 'demlt BARCODE [FASTQ]', 'demultiplex fastq from STDIN by barcodes'
13
12
  option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
14
13
  option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4
15
14
  option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37
16
- def demlt
15
+ def demlt(bcfile, fastq=:stdin)
17
16
 
18
- bcfile = options['barcode-file']
19
17
  ofs = options['umi-length']
20
18
  len = options['cdna-length']
21
19
 
@@ -47,7 +45,7 @@ module Bio
47
45
  fifo1s = Array.new
48
46
  fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
49
47
  total = 0
50
- Bio::Faster.new(:stdin).each_record(:quality => :raw) do |vals|
48
+ Bio::Faster.new(fastq).each_record(:quality => :raw) do |vals|
51
49
  fifo1 = fifo1s[total % procs]
52
50
  fifo1.puts(vals.join("\t"))
53
51
  total += 1
@@ -0,0 +1,93 @@
1
+ module Bio
2
+ class Gadget < Thor
3
+
4
+ desc 'ucscann ISOFORMSGZ XREFGZ [KNOWNGENE]', 'create knownGene.gtf'
5
+ def ucscann(isoforms, xref, knowngene='/dev/stdin')
6
+
7
+ acc2id = Hash.new
8
+ open("| gunzip -c #{isoforms}").each { |line|
9
+ id, acc = line.rstrip.split(/\t/)
10
+ acc2id[acc] = id
11
+ }
12
+
13
+ acc2sym = Hash.new
14
+ open("| gunzip -c #{xref} | cut -f 1,5").each { |line|
15
+ acc, sym = line.rstrip.split(/\t/)
16
+ acc2sym[acc] = sym
17
+ }
18
+
19
+ open("| sort -k1,1 -k4,4n", 'w') { |fp|
20
+ open(knowngene).each { |line|
21
+ cols = line.rstrip.split(/\t/)
22
+ acc, chr, str = cols.values_at(0, 1, 2)
23
+ cs = cols[5].to_i
24
+ ce = cols[6].to_i
25
+ lefts = cols[8].split(/,/)
26
+ rights = cols[9].split(/,/)
27
+ prop = "gene_id \"#{acc2id[acc]}\"; transcript_id \"#{acc}\""
28
+ prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
29
+ lefts.each_index { |i|
30
+ next if lefts[i].nil?
31
+ l = lefts[i].to_i
32
+ r = rights[i].to_i
33
+ fp.puts [chr, 'knownGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
34
+ }
35
+ if cs != ce
36
+ fp.puts [chr, 'knownGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
37
+ lefts.each_index { |i|
38
+ next if lefts[i].nil?
39
+ l = lefts[i].to_i
40
+ next if ce-1 < l
41
+ r = rights[i].to_i
42
+ next if r-1 < cs
43
+ fp.puts [chr, 'knownGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
44
+ }
45
+ end
46
+ }
47
+ }
48
+
49
+ end
50
+
51
+ desc 'ensann GENENAMEGZ [ENSGENE]', 'create ensGene.gtf'
52
+ def ensann(genename, ensgene='/dev/stdin')
53
+
54
+ acc2sym = Hash.new
55
+ open("| gunzip -c #{genename}").each { |line|
56
+ acc, sym = line.rstrip.split(/\t/)
57
+ acc2sym[acc] = sym
58
+ }
59
+
60
+ open("| sort -k1,1 -k4,4n", 'w') { |fp|
61
+ open(ensgene).each { |line|
62
+ cols = line.rstrip.split(/\t/)
63
+ acc, chr, str, id = cols.values_at(1, 2, 3, 12)
64
+ cs = cols[6].to_i
65
+ ce = cols[7].to_i
66
+ lefts = cols[9].split(/,/)
67
+ rights = cols[10].split(/,/)
68
+ prop = "gene_id \"#{id}\"; transcript_id \"#{acc}\""
69
+ prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
70
+ lefts.each_index { |i|
71
+ next if lefts[i].nil?
72
+ l = lefts[i].to_i
73
+ r = rights[i].to_i
74
+ fp.puts [chr, 'ensGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
75
+ }
76
+ if cs != ce
77
+ fp.puts [chr, 'ensGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
78
+ lefts.each_index { |i|
79
+ next if lefts[i].nil?
80
+ l = lefts[i].to_i
81
+ next if ce-1 < l
82
+ r = rights[i].to_i
83
+ next if r-1 < cs
84
+ fp.puts [chr, 'ensGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
85
+ }
86
+ end
87
+ }
88
+ }
89
+
90
+ end
91
+
92
+ end
93
+ end
@@ -3,7 +3,7 @@ require 'thor'
3
3
  module Bio
4
4
  class Gadget < Thor
5
5
 
6
- VERSION = "0.3.0"
6
+ VERSION = "0.3.1"
7
7
 
8
8
  end
9
9
  end
data/lib/bio-gadget.rb CHANGED
@@ -2,6 +2,7 @@ require 'bio-gadget/version'
2
2
  require 'bio-gadget/dedup'
3
3
  require 'bio-gadget/demlt'
4
4
  require 'bio-gadget/fqxz'
5
+ require 'bio-gadget/gtfann'
5
6
  require 'bio-gadget/qvstat'
6
7
  require 'bio-gadget/wigchr'
7
8
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shintaro Katayama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-16 00:00:00.000000000 Z
11
+ date: 2013-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -100,6 +100,7 @@ files:
100
100
  - lib/bio-gadget/dedup.rb
101
101
  - lib/bio-gadget/demlt.rb
102
102
  - lib/bio-gadget/fqxz.rb
103
+ - lib/bio-gadget/gtfann.rb
103
104
  - lib/bio-gadget/qvstat.rb
104
105
  - lib/bio-gadget/version.rb
105
106
  - lib/bio-gadget/wigchr.rb