bio-gadget 0.4.8 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,61 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- desc 'femrg [GTF...]', 'Extract and merge overlapping first exons.'
5
-
6
- def femrg(*gtf_files)
7
-
8
- exon1s = Hash.new
9
- fp = open("| grep -h '\texon\t' #{gtf_files.nil? ? '-' : gtf_files.join(' ')} | cut -f 1,4,5,7,9")
10
- fp.each { |line|
11
- chr, sstart, sstop, str, attr = line.rstrip.split(/\t/)
12
- exon1s[chr] = Hash.new unless exon1s.key?(chr)
13
- exon1s[chr][str] = Hash.new unless exon1s[chr].key?(str)
14
- id = attr.match(/transcript_id \"([^\"]+)\"/).to_a[1]
15
- en = attr.match(/exon_number \"(\d+)\"/).to_a[1].to_i
16
- if (!exon1s[chr][str].key?(id) ||
17
- (str == "+" && en < exon1s[chr][str][id][2]) ||
18
- (str == "-" && exon1s[chr][str][id][2] < en))
19
- exon1s[chr][str][id] = [sstart.to_i, sstop.to_i, en]
20
- end
21
- }
22
- fp.close
23
-
24
- idx = 0
25
- exon1s.each { |chr, exon1schr|
26
- exon1schr.each { |str, exon1schrstr|
27
- ids = exon1schrstr.keys.sort { |a, b|
28
- exon1schrstr[a][0] <=> exon1schrstr[b][0]
29
- }
30
- #
31
- clusters = Array.new
32
- members = [id = ids.shift]
33
- start, stop, en = exon1schrstr[id]
34
- while ids.length > 0
35
- nid = ids.shift
36
- nstart, nstop, nen = exon1schrstr[nid]
37
- if stop < nstart
38
- clusters.push([members, start, stop])
39
- members = [nid]
40
- start = nstart
41
- stop = nstop
42
- else
43
- members.push(nid)
44
- start, stop = [start, stop, nstart, nstop].sort.values_at(0, 3)
45
- end
46
- end
47
- clusters.push([members, start, stop])
48
- #
49
- clusters.each { |members, start, stop|
50
- attr = "gene_id \"FE#{idx}\"; transcript_id \"FE#{idx}\"; "
51
- puts [chr, 'bio-gadget:femrg', 'transcript', start, stop, 1000, str, '.', attr].join("\t")
52
- puts [chr, 'bio-gadget:femrg', 'exon', start, stop, 1000, str, '.', "#{attr}exon_number \"1\"; member_ids \"#{members.join('|')}\""].join("\t")
53
- idx += 1
54
- }
55
- }
56
- }
57
-
58
- end
59
-
60
- end
61
- end
@@ -1,30 +0,0 @@
1
- require 'parallel'
2
- require 'pathname'
3
-
4
- module Bio
5
- class Gadget < Thor
6
-
7
- namespace :bio
8
-
9
- desc 'fqxz', 'automatic (re)compression of *.fq(.gz|.bz2) files'
10
- def fqxz
11
- Parallel.map(Pathname.glob('*.fq{.gz,.bz2,}')) { |fqfilename|
12
- xzfilename = fqfilename.sub(/\.fq(\.(gz|bz2))*$/, '.fq.xz')
13
- if !xzfilename.exist?
14
- case fqfilename.extname
15
- when '.gz'
16
- decompressor = 'gunzip -c'
17
- when '.bz2'
18
- decompressor = 'bunzip2 -c'
19
- else
20
- decompressor = 'cat'
21
- end
22
- puts "compressing #{xzfilename}..."
23
- system "#{decompressor} #{fqfilename} | xz -z -e -c > #{xzfilename} 2> #{xzfilename}.log"
24
- system "xz -t #{xzfilename} >> #{xzfilename}.log 2>&1"
25
- end
26
- }
27
- end
28
-
29
- end
30
- end
@@ -1,94 +0,0 @@
1
- require 'mkfifo'
2
- require 'parallel'
3
-
4
- module Bio
5
- class Gadget < Thor
6
-
7
- desc 'peak WIG1,WIG2,... [GTF]', <<DESC
8
- Find peak within each exon from (gzipped) variableStep wigs by a majority vote. It will read from a standard input if no GTF option.
9
- DESC
10
- def peak(wigs, gtf="/dev/stdin")
11
-
12
- nchrpos2val = Hash.new
13
- wigs.split(/,/).each { |wig|
14
- n = wigs.split(/,/).index(wig)
15
- nchrpos2val[n] = Hash.new unless nchrpos2val.key?(n)
16
- chr = ''
17
- fp = open("| zcat #{wig} | grep -v ^track")
18
- fp.each { |line|
19
- cols = line.rstrip.split(/\s+/)
20
- if cols[0] == 'variableStep'
21
- chr = cols[1].match(/chrom=(\S+)$/).to_a[1]
22
- nchrpos2val[n][chr] = Hash.new unless nchrpos2val[n].key?(chr)
23
- else
24
- nchrpos2val[n][chr][cols[0].to_i] = cols[1].to_f
25
- end
26
- }
27
- fp.close
28
- }
29
-
30
- chr2exon = Hash.new
31
- open("| grep exon #{gtf}").each { |line|
32
- cols = line.rstrip.split(/\t/)
33
- oid = cols[8].match(/oId \"([^\"]+)/).to_a[1]
34
- oid = "#{cols[0]}.#{oid}" if cols[0] =~ /^RNA_SPIKE_/
35
- exn = cols[8].match(/exon_number \"(\d+)\"/).to_a[1]
36
- chr = cols[0]
37
- str = cols[6]
38
- start = cols[3].to_i
39
- stop = cols[4].to_i
40
- chr2exon[chr] = Array.new unless chr2exon.key?(chr)
41
- chr2exon[chr].push([str, oid, exn, start, stop])
42
- }
43
-
44
- fifopath = mytemppath('fifo-')
45
- File.mkfifo(fifopath)
46
-
47
- pid = Kernel.fork {
48
- exec "cat #{fifopath}"
49
- }
50
-
51
- fp = open(fifopath, 'w')
52
- Parallel.each(chr2exon.keys,
53
- :in_processes => Parallel.processor_count) { |chr|
54
- #
55
- chr2exon[chr].each { |str, oid, exn, start, stop|
56
- #
57
- peak = ''
58
- poss = Hash.new
59
- nchrpos2val.each { |n, chrpos2val|
60
- if chrpos2val.key?(chr)
61
- pos2val = chrpos2val[chr]
62
- tmppos2val = Hash.new
63
- pos2val.each { |pos, val|
64
- tmppos2val[pos] = val if start <= pos && pos <= stop
65
- }
66
- if tmppos2val.size > 0
67
- tmpposs = tmppos2val.keys.sort { |a, b|
68
- tmppos2val[b] == tmppos2val[a] ? (str == '+' ? (a <=> b) : (b <=> a)) : (tmppos2val[b] <=> tmppos2val[a])
69
- }
70
- tmppos = tmpposs[0]
71
- # puts "#{n} | #{chr}:#{start}-#{stop} #{str} | #{tmpposs}"
72
- poss[tmppos] = poss.key?(tmppos) ? poss[tmppos]+1 : 1
73
- end
74
- end
75
- }
76
- if poss.size > 0
77
- peaks = poss.keys.sort { |a, b|
78
- poss[b] == poss[a] ? (str == '+' ? (a <=> b) : (b <=> a)) : (poss[b] <=> poss[a])
79
- }
80
- peak = peaks[0]
81
- end
82
- #
83
- fp.syswrite([oid, exn, peak].join("\t") + "\n")
84
- }
85
- #
86
- }
87
- fp.close
88
-
89
- Process.waitall
90
-
91
- end
92
-
93
- end
94
- end
@@ -1,34 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- namespace :bio
5
-
6
- desc 'qvstat QUAL', 'statistics of quality values in *.qual file'
7
- def qvstat(qualfile)
8
- stat = Hash.new
9
- myopen(qualfile) { |fp|
10
- fp.each { |line|
11
- next if /^[\#\>]/ =~ line
12
- qvs = line.rstrip.split
13
- qvs.each_index { |i|
14
- qv = qvs[i]
15
- stat[qv] = Array.new unless stat.key?(qv)
16
- if stat[qv][i].nil?
17
- stat[qv][i] = 1
18
- else
19
- stat[qv][i] += 1
20
- end
21
- }
22
- }
23
- }
24
- statfile = qualfile.sub(/.qual(.gz)?$/, '.qvstat')
25
- open(statfile, 'w') { |out|
26
- qvs = stat.keys.sort { |a, b| a.to_i <=> b.to_i }
27
- qvs.each { |qv|
28
- out.puts "#{qv} #{stat[qv].join(' ')}"
29
- }
30
- }
31
- end
32
-
33
- end
34
- end
@@ -1,60 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- desc 'rgt2mtx [RGT]', <<DESC
5
- Convert cuffdiff read group tracking file (*.read_group_tracking) into tab-separated matrix. If no given name of tracking file, it reads from standard input.
6
- DESC
7
- option 'gtf', :aliases => '-g', :type => :string, :desc => 'GTF to revert old transcript_id (oId) renamed by cuffcompare. Moreover, "RNA_SPIKE_*" chromosome names will be inserted for the transcripts aligned on the forward strand of spike-in sequences.'
8
- option 'sample', :aliases => '-s', :type => :string, :desc => 'Mapping from condition/replicate to sample ID for the column names of output matrix. Tab-separated text with three columns of condition, replicate and the sample ID.'
9
- def rgt2mtx(rgt="/dev/stdin")
10
-
11
- tid2oid = Hash.new
12
- unless options['gtf'].nil?
13
- open(options['gtf']).each { |line|
14
- cols = line.rstrip.split(/\t/)
15
- tid = cols[8].match(/transcript_id \"([^\"]+)/).to_a[1]
16
- oid = cols[8].match(/oId \"([^\"]+)/).to_a[1]
17
- oid = "#{cols[0]}.#{oid}" if cols[0] =~ /^RNA_SPIKE_/
18
- tid2oid[tid] = oid
19
- }
20
- end
21
-
22
- cr2sid = Hash.new
23
- unless options['sample'].nil?
24
- open(options['sample']).each { |line|
25
- c, r, sid = line.rstrip.split(/\t/)
26
- cr2sid["#{c}|#{r}"] = sid
27
- }
28
- end
29
-
30
- id = nil
31
- header = true
32
- raws = Hash.new
33
- open("| tail -n +2 #{rgt} | sort -k 1").each { |line|
34
- cols = line.rstrip.split(/\t/)
35
- id = cols[0] if id.nil?
36
- if id != cols[0]
37
- if header
38
- if options['sample'].nil?
39
- puts ([''] + raws.keys.sort).join("\t")
40
- else
41
- tmp = ['']
42
- raws.keys.sort.each { |cr| tmp.push(cr2sid[cr]) }
43
- puts tmp.join("\t")
44
- end
45
- header = false
46
- end
47
- tmp = [tid2oid.key?(id) ? tid2oid[id] : id]
48
- raws.keys.sort.each { |k| tmp.push(raws[k]) }
49
- puts tmp.join("\t")
50
- id = cols[0]
51
- end
52
- raws["#{cols[1]}|#{cols[2]}"] = cols[3]
53
- }
54
- tmp = [tid2oid.key?(id) ? tid2oid[id] : id]
55
- raws.keys.sort.each { |k| tmp.push(raws[k]) }
56
- puts tmp.join("\t")
57
- end
58
-
59
- end
60
- end
@@ -1,9 +0,0 @@
1
- require 'thor'
2
-
3
- module Bio
4
- class Gadget < Thor
5
-
6
- VERSION = "0.4.8"
7
-
8
- end
9
- end
@@ -1,51 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- desc 'wig5p BAM', <<DESC
5
- Convert bam-format alignments into wig-format table. Count is at 5'-end of alignments. Moreover the counts are normalized when there are alignments to references named 'RNA_SPIKE_*'. This procedure requires samtools and bamToBed in BEDTools.
6
- DESC
7
- option 'reverse', :aliases => '-r', :type => :boolean, :default => false
8
- option 'name', :aliases => '-n', :type => :string, :default => ' '
9
- def wig5p(bam)
10
-
11
- str = options['reverse'] ? '-' : '+'
12
-
13
- tmp = 0.0
14
- open("| samtools view #{bam} | grep RNA_SPIKE_ | grep 'XS:A:+' | cut -f 1 | sort -u").each { |line| tmp += 1.0 }
15
- abort "No spike-in reads." if tmp == 0.0
16
- spike = 2234.8/tmp
17
-
18
- acc2dups = Hash.new
19
- open("| samtools view #{bam} | cut -f 1 | sort | uniq -c").each { |line|
20
- dups, acc = line.strip.split(/\s/)
21
- acc2dups[acc] = dups.to_f
22
- }
23
-
24
- cnts = Hash.new
25
- tmpbam = mytemppath('.bam')
26
- abort unless system("samtools view -h #{bam} | grep -E 'XS:A:\\#{str}|^@SQ' | samtools view -S -b - > #{tmpbam}")
27
-
28
- open("| bamToBed -i #{tmpbam} | cut -f 1,#{options['reverse'] ? 3 : 2},4").each { |line|
29
- chr, poss, acc = line.rstrip.split(/\t/)
30
- cnts[chr] = Hash.new if !cnts.key?(chr)
31
- pos = poss.to_i
32
- if !cnts[chr].key?(pos)
33
- cnts[chr][pos] = 1.0/acc2dups[acc]
34
- else
35
- cnts[chr][pos] = cnts[chr][pos]+1.0/acc2dups[acc]
36
- end
37
- }
38
-
39
- puts "track type=wiggle_0 name=\"#{options['name']}\" description=\" \" alwaysZero=on visivility=full maxHeightPixels=128:64:16 color=#{options['reverse'] ? '0,128,255' : '255,128,0'}"
40
-
41
- offset = options['reverse'] ? 0 : 1
42
- signal = options['reverse'] ? -spike : spike
43
- cnts.each { |chr, posvals|
44
- puts "variableStep chrom=#{chr}"
45
- posvals.keys.sort.each { |pos| puts [pos+offset, signal*posvals[pos]].join("\t") }
46
- }
47
-
48
- end
49
-
50
- end
51
- end
@@ -1,28 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- namespace :bio
5
-
6
- desc 'wigchr WIG CHR', 'extract wiggle track on specified chromosome'
7
- def wigchr(wigfile, chr)
8
- target = false
9
- myopen(wigfile) { |fp|
10
- fp.each { |line|
11
- if (/^(fixed|variable)Step/ =~ line)
12
- if (/chrom=#{chr}\s/ =~ line)
13
- target = true
14
- puts line
15
- else
16
- target = false
17
- end
18
- elsif (/^\d/ =~ line)
19
- puts line if target
20
- else
21
- puts line
22
- end
23
- }
24
- }
25
- end
26
-
27
- end
28
- end