bio-gadget 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.org +7 -7
- data/lib/bio-gadget/demlt.rb +3 -5
- data/lib/bio-gadget/gtfann.rb +93 -0
- data/lib/bio-gadget/version.rb +1 -1
- data/lib/bio-gadget.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aaeb43dbf9adba74b5741250dab6e1764421a4c4
|
4
|
+
data.tar.gz: f45ff460eeee4aee51b7888f39592dac24075898
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f7dbebd237bb629a0e1d758b6e024427586d54a7cf4ccac59270b34e9c2dec374ee0d5bf81c88465eaabf531a520c712876349d8e72db3793fd9560c45cceee
|
7
|
+
data.tar.gz: 4d39fb3ee8a65513bbea18750e435b5f5d54166c1e3f92920f6905e83248c7e0a39361382660ca80253572b466b0458d60b6cf4e3891971f02e34ef83292bed8
|
data/README.org
CHANGED
@@ -15,13 +15,13 @@ To check all commands in this package,
|
|
15
15
|
|
16
16
|
Currently available commands are
|
17
17
|
|
18
|
-
: bio
|
19
|
-
:
|
20
|
-
: bio-gadget
|
21
|
-
: bio-gadget
|
22
|
-
: bio-gadget
|
23
|
-
: bio-gadget
|
24
|
-
: bio-gadget wigchr WIG CHR
|
18
|
+
: bio-gadget dedup # deduplicate fastq (via STDIN)
|
19
|
+
: bio-gadget demlt BARCODE [FASTQ] # demultiplex fastq by barcodes
|
20
|
+
: bio-gadget ensann GENENAMEGZ [ENSGENE] # create ensGene.gtf
|
21
|
+
: bio-gadget fqxz # automatic (re)compression of *.fq(.gz|.bz2) files
|
22
|
+
: bio-gadget qvstat QUAL # statistics of quality values in *.qual file
|
23
|
+
: bio-gadget ucscann ISOFORMSGZ XREFGZ [KNOWNGENE] # create knownGene.gtf
|
24
|
+
: bio-gadget wigchr WIG CHR # extract wiggle track on specified chromosome
|
25
25
|
|
26
26
|
* Contributing
|
27
27
|
|
data/lib/bio-gadget/demlt.rb
CHANGED
@@ -8,14 +8,12 @@ module Bio
|
|
8
8
|
|
9
9
|
namespace :bio
|
10
10
|
|
11
|
-
desc 'demlt', 'demultiplex fastq from STDIN by barcodes'
|
12
|
-
option 'barcode-file', :aliases => '-b', :type => :string, :required => true
|
11
|
+
desc 'demlt BARCODE [FASTQ]', 'demultiplex fastq from STDIN by barcodes'
|
13
12
|
option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
|
14
13
|
option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4
|
15
14
|
option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37
|
16
|
-
def demlt
|
15
|
+
def demlt(bcfile, fastq=:stdin)
|
17
16
|
|
18
|
-
bcfile = options['barcode-file']
|
19
17
|
ofs = options['umi-length']
|
20
18
|
len = options['cdna-length']
|
21
19
|
|
@@ -47,7 +45,7 @@ module Bio
|
|
47
45
|
fifo1s = Array.new
|
48
46
|
fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
|
49
47
|
total = 0
|
50
|
-
Bio::Faster.new(
|
48
|
+
Bio::Faster.new(fastq).each_record(:quality => :raw) do |vals|
|
51
49
|
fifo1 = fifo1s[total % procs]
|
52
50
|
fifo1.puts(vals.join("\t"))
|
53
51
|
total += 1
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module Bio
|
2
|
+
class Gadget < Thor
|
3
|
+
|
4
|
+
desc 'ucscann ISOFORMSGZ XREFGZ [KNOWNGENE]', 'create knownGene.gtf'
|
5
|
+
def ucscann(isoforms, xref, knowngene='/dev/stdin')
|
6
|
+
|
7
|
+
acc2id = Hash.new
|
8
|
+
open("| gunzip -c #{isoforms}").each { |line|
|
9
|
+
id, acc = line.rstrip.split(/\t/)
|
10
|
+
acc2id[acc] = id
|
11
|
+
}
|
12
|
+
|
13
|
+
acc2sym = Hash.new
|
14
|
+
open("| gunzip -c #{xref} | cut -f 1,5").each { |line|
|
15
|
+
acc, sym = line.rstrip.split(/\t/)
|
16
|
+
acc2sym[acc] = sym
|
17
|
+
}
|
18
|
+
|
19
|
+
open("| sort -k1,1 -k4,4n", 'w') { |fp|
|
20
|
+
open(knowngene).each { |line|
|
21
|
+
cols = line.rstrip.split(/\t/)
|
22
|
+
acc, chr, str = cols.values_at(0, 1, 2)
|
23
|
+
cs = cols[5].to_i
|
24
|
+
ce = cols[6].to_i
|
25
|
+
lefts = cols[8].split(/,/)
|
26
|
+
rights = cols[9].split(/,/)
|
27
|
+
prop = "gene_id \"#{acc2id[acc]}\"; transcript_id \"#{acc}\""
|
28
|
+
prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
|
29
|
+
lefts.each_index { |i|
|
30
|
+
next if lefts[i].nil?
|
31
|
+
l = lefts[i].to_i
|
32
|
+
r = rights[i].to_i
|
33
|
+
fp.puts [chr, 'knownGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
|
34
|
+
}
|
35
|
+
if cs != ce
|
36
|
+
fp.puts [chr, 'knownGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
|
37
|
+
lefts.each_index { |i|
|
38
|
+
next if lefts[i].nil?
|
39
|
+
l = lefts[i].to_i
|
40
|
+
next if ce-1 < l
|
41
|
+
r = rights[i].to_i
|
42
|
+
next if r-1 < cs
|
43
|
+
fp.puts [chr, 'knownGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
|
44
|
+
}
|
45
|
+
end
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
desc 'ensann GENENAMEGZ [ENSGENE]', 'create ensGene.gtf'
|
52
|
+
def ensann(genename, ensgene='/dev/stdin')
|
53
|
+
|
54
|
+
acc2sym = Hash.new
|
55
|
+
open("| gunzip -c #{genename}").each { |line|
|
56
|
+
acc, sym = line.rstrip.split(/\t/)
|
57
|
+
acc2sym[acc] = sym
|
58
|
+
}
|
59
|
+
|
60
|
+
open("| sort -k1,1 -k4,4n", 'w') { |fp|
|
61
|
+
open(ensgene).each { |line|
|
62
|
+
cols = line.rstrip.split(/\t/)
|
63
|
+
acc, chr, str, id = cols.values_at(1, 2, 3, 12)
|
64
|
+
cs = cols[6].to_i
|
65
|
+
ce = cols[7].to_i
|
66
|
+
lefts = cols[9].split(/,/)
|
67
|
+
rights = cols[10].split(/,/)
|
68
|
+
prop = "gene_id \"#{id}\"; transcript_id \"#{acc}\""
|
69
|
+
prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
|
70
|
+
lefts.each_index { |i|
|
71
|
+
next if lefts[i].nil?
|
72
|
+
l = lefts[i].to_i
|
73
|
+
r = rights[i].to_i
|
74
|
+
fp.puts [chr, 'ensGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
|
75
|
+
}
|
76
|
+
if cs != ce
|
77
|
+
fp.puts [chr, 'ensGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
|
78
|
+
lefts.each_index { |i|
|
79
|
+
next if lefts[i].nil?
|
80
|
+
l = lefts[i].to_i
|
81
|
+
next if ce-1 < l
|
82
|
+
r = rights[i].to_i
|
83
|
+
next if r-1 < cs
|
84
|
+
fp.puts [chr, 'ensGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
|
85
|
+
}
|
86
|
+
end
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
data/lib/bio-gadget/version.rb
CHANGED
data/lib/bio-gadget.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gadget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shintaro Katayama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-03-
|
11
|
+
date: 2013-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- lib/bio-gadget/dedup.rb
|
101
101
|
- lib/bio-gadget/demlt.rb
|
102
102
|
- lib/bio-gadget/fqxz.rb
|
103
|
+
- lib/bio-gadget/gtfann.rb
|
103
104
|
- lib/bio-gadget/qvstat.rb
|
104
105
|
- lib/bio-gadget/version.rb
|
105
106
|
- lib/bio-gadget/wigchr.rb
|