bio-gadget 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.org +7 -7
- data/lib/bio-gadget/demlt.rb +3 -5
- data/lib/bio-gadget/gtfann.rb +93 -0
- data/lib/bio-gadget/version.rb +1 -1
- data/lib/bio-gadget.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aaeb43dbf9adba74b5741250dab6e1764421a4c4
|
4
|
+
data.tar.gz: f45ff460eeee4aee51b7888f39592dac24075898
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f7dbebd237bb629a0e1d758b6e024427586d54a7cf4ccac59270b34e9c2dec374ee0d5bf81c88465eaabf531a520c712876349d8e72db3793fd9560c45cceee
|
7
|
+
data.tar.gz: 4d39fb3ee8a65513bbea18750e435b5f5d54166c1e3f92920f6905e83248c7e0a39361382660ca80253572b466b0458d60b6cf4e3891971f02e34ef83292bed8
|
data/README.org
CHANGED
@@ -15,13 +15,13 @@ To check all commands in this package,
|
|
15
15
|
|
16
16
|
Currently available commands are
|
17
17
|
|
18
|
-
: bio
|
19
|
-
:
|
20
|
-
: bio-gadget
|
21
|
-
: bio-gadget
|
22
|
-
: bio-gadget
|
23
|
-
: bio-gadget
|
24
|
-
: bio-gadget wigchr WIG CHR
|
18
|
+
: bio-gadget dedup # deduplicate fastq (via STDIN)
|
19
|
+
: bio-gadget demlt BARCODE [FASTQ] # demultiplex fastq by barcodes
|
20
|
+
: bio-gadget ensann GENENAMEGZ [ENSGENE] # create ensGene.gtf
|
21
|
+
: bio-gadget fqxz # automatic (re)compression of *.fq(.gz|.bz2) files
|
22
|
+
: bio-gadget qvstat QUAL # statistics of quality values in *.qual file
|
23
|
+
: bio-gadget ucscann ISOFORMSGZ XREFGZ [KNOWNGENE] # create knownGene.gtf
|
24
|
+
: bio-gadget wigchr WIG CHR # extract wiggle track on specified chromosome
|
25
25
|
|
26
26
|
* Contributing
|
27
27
|
|
data/lib/bio-gadget/demlt.rb
CHANGED
@@ -8,14 +8,12 @@ module Bio
|
|
8
8
|
|
9
9
|
namespace :bio
|
10
10
|
|
11
|
-
desc 'demlt', 'demultiplex fastq from STDIN by barcodes'
|
12
|
-
option 'barcode-file', :aliases => '-b', :type => :string, :required => true
|
11
|
+
desc 'demlt BARCODE [FASTQ]', 'demultiplex fastq from STDIN by barcodes'
|
13
12
|
option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
|
14
13
|
option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4
|
15
14
|
option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37
|
16
|
-
def demlt
|
15
|
+
def demlt(bcfile, fastq=:stdin)
|
17
16
|
|
18
|
-
bcfile = options['barcode-file']
|
19
17
|
ofs = options['umi-length']
|
20
18
|
len = options['cdna-length']
|
21
19
|
|
@@ -47,7 +45,7 @@ module Bio
|
|
47
45
|
fifo1s = Array.new
|
48
46
|
fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
|
49
47
|
total = 0
|
50
|
-
Bio::Faster.new(
|
48
|
+
Bio::Faster.new(fastq).each_record(:quality => :raw) do |vals|
|
51
49
|
fifo1 = fifo1s[total % procs]
|
52
50
|
fifo1.puts(vals.join("\t"))
|
53
51
|
total += 1
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module Bio
|
2
|
+
class Gadget < Thor
|
3
|
+
|
4
|
+
desc 'ucscann ISOFORMSGZ XREFGZ [KNOWNGENE]', 'create knownGene.gtf'
|
5
|
+
def ucscann(isoforms, xref, knowngene='/dev/stdin')
|
6
|
+
|
7
|
+
acc2id = Hash.new
|
8
|
+
open("| gunzip -c #{isoforms}").each { |line|
|
9
|
+
id, acc = line.rstrip.split(/\t/)
|
10
|
+
acc2id[acc] = id
|
11
|
+
}
|
12
|
+
|
13
|
+
acc2sym = Hash.new
|
14
|
+
open("| gunzip -c #{xref} | cut -f 1,5").each { |line|
|
15
|
+
acc, sym = line.rstrip.split(/\t/)
|
16
|
+
acc2sym[acc] = sym
|
17
|
+
}
|
18
|
+
|
19
|
+
open("| sort -k1,1 -k4,4n", 'w') { |fp|
|
20
|
+
open(knowngene).each { |line|
|
21
|
+
cols = line.rstrip.split(/\t/)
|
22
|
+
acc, chr, str = cols.values_at(0, 1, 2)
|
23
|
+
cs = cols[5].to_i
|
24
|
+
ce = cols[6].to_i
|
25
|
+
lefts = cols[8].split(/,/)
|
26
|
+
rights = cols[9].split(/,/)
|
27
|
+
prop = "gene_id \"#{acc2id[acc]}\"; transcript_id \"#{acc}\""
|
28
|
+
prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
|
29
|
+
lefts.each_index { |i|
|
30
|
+
next if lefts[i].nil?
|
31
|
+
l = lefts[i].to_i
|
32
|
+
r = rights[i].to_i
|
33
|
+
fp.puts [chr, 'knownGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
|
34
|
+
}
|
35
|
+
if cs != ce
|
36
|
+
fp.puts [chr, 'knownGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
|
37
|
+
lefts.each_index { |i|
|
38
|
+
next if lefts[i].nil?
|
39
|
+
l = lefts[i].to_i
|
40
|
+
next if ce-1 < l
|
41
|
+
r = rights[i].to_i
|
42
|
+
next if r-1 < cs
|
43
|
+
fp.puts [chr, 'knownGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
|
44
|
+
}
|
45
|
+
end
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
desc 'ensann GENENAMEGZ [ENSGENE]', 'create ensGene.gtf'
|
52
|
+
def ensann(genename, ensgene='/dev/stdin')
|
53
|
+
|
54
|
+
acc2sym = Hash.new
|
55
|
+
open("| gunzip -c #{genename}").each { |line|
|
56
|
+
acc, sym = line.rstrip.split(/\t/)
|
57
|
+
acc2sym[acc] = sym
|
58
|
+
}
|
59
|
+
|
60
|
+
open("| sort -k1,1 -k4,4n", 'w') { |fp|
|
61
|
+
open(ensgene).each { |line|
|
62
|
+
cols = line.rstrip.split(/\t/)
|
63
|
+
acc, chr, str, id = cols.values_at(1, 2, 3, 12)
|
64
|
+
cs = cols[6].to_i
|
65
|
+
ce = cols[7].to_i
|
66
|
+
lefts = cols[9].split(/,/)
|
67
|
+
rights = cols[10].split(/,/)
|
68
|
+
prop = "gene_id \"#{id}\"; transcript_id \"#{acc}\""
|
69
|
+
prop += "; gene_name \"#{acc2sym[acc]}\"" if acc2sym.has_key?(acc)
|
70
|
+
lefts.each_index { |i|
|
71
|
+
next if lefts[i].nil?
|
72
|
+
l = lefts[i].to_i
|
73
|
+
r = rights[i].to_i
|
74
|
+
fp.puts [chr, 'ensGene', 'exon', l+1, r, 0, str, '.', prop].join("\t")
|
75
|
+
}
|
76
|
+
if cs != ce
|
77
|
+
fp.puts [chr, 'ensGene', 'start_codon', (str == '+' ? cs+1 : ce-2), (str == '+' ? cs+3 : ce), 0, str, '.', prop].join("\t")
|
78
|
+
lefts.each_index { |i|
|
79
|
+
next if lefts[i].nil?
|
80
|
+
l = lefts[i].to_i
|
81
|
+
next if ce-1 < l
|
82
|
+
r = rights[i].to_i
|
83
|
+
next if r-1 < cs
|
84
|
+
fp.puts [chr, 'ensGene', 'CDS', (cs < l ? l : cs), (r < ce ? r : ce), 0, str, '.', prop].join("\t")
|
85
|
+
}
|
86
|
+
end
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
data/lib/bio-gadget/version.rb
CHANGED
data/lib/bio-gadget.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gadget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shintaro Katayama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-03-
|
11
|
+
date: 2013-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- lib/bio-gadget/dedup.rb
|
101
101
|
- lib/bio-gadget/demlt.rb
|
102
102
|
- lib/bio-gadget/fqxz.rb
|
103
|
+
- lib/bio-gadget/gtfann.rb
|
103
104
|
- lib/bio-gadget/qvstat.rb
|
104
105
|
- lib/bio-gadget/version.rb
|
105
106
|
- lib/bio-gadget/wigchr.rb
|