bio-gadget 0.4.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,61 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- desc 'femrg [GTF...]', 'Extract and merge overlapping first exons.'
5
-
6
- def femrg(*gtf_files)
7
-
8
- exon1s = Hash.new
9
- fp = open("| grep -h '\texon\t' #{gtf_files.nil? ? '-' : gtf_files.join(' ')} | cut -f 1,4,5,7,9")
10
- fp.each { |line|
11
- chr, sstart, sstop, str, attr = line.rstrip.split(/\t/)
12
- exon1s[chr] = Hash.new unless exon1s.key?(chr)
13
- exon1s[chr][str] = Hash.new unless exon1s[chr].key?(str)
14
- id = attr.match(/transcript_id \"([^\"]+)\"/).to_a[1]
15
- en = attr.match(/exon_number \"(\d+)\"/).to_a[1].to_i
16
- if (!exon1s[chr][str].key?(id) ||
17
- (str == "+" && en < exon1s[chr][str][id][2]) ||
18
- (str == "-" && exon1s[chr][str][id][2] < en))
19
- exon1s[chr][str][id] = [sstart.to_i, sstop.to_i, en]
20
- end
21
- }
22
- fp.close
23
-
24
- idx = 0
25
- exon1s.each { |chr, exon1schr|
26
- exon1schr.each { |str, exon1schrstr|
27
- ids = exon1schrstr.keys.sort { |a, b|
28
- exon1schrstr[a][0] <=> exon1schrstr[b][0]
29
- }
30
- #
31
- clusters = Array.new
32
- members = [id = ids.shift]
33
- start, stop, en = exon1schrstr[id]
34
- while ids.length > 0
35
- nid = ids.shift
36
- nstart, nstop, nen = exon1schrstr[nid]
37
- if stop < nstart
38
- clusters.push([members, start, stop])
39
- members = [nid]
40
- start = nstart
41
- stop = nstop
42
- else
43
- members.push(nid)
44
- start, stop = [start, stop, nstart, nstop].sort.values_at(0, 3)
45
- end
46
- end
47
- clusters.push([members, start, stop])
48
- #
49
- clusters.each { |members, start, stop|
50
- attr = "gene_id \"FE#{idx}\"; transcript_id \"FE#{idx}\"; "
51
- puts [chr, 'bio-gadget:femrg', 'transcript', start, stop, 1000, str, '.', attr].join("\t")
52
- puts [chr, 'bio-gadget:femrg', 'exon', start, stop, 1000, str, '.', "#{attr}exon_number \"1\"; member_ids \"#{members.join('|')}\""].join("\t")
53
- idx += 1
54
- }
55
- }
56
- }
57
-
58
- end
59
-
60
- end
61
- end
@@ -1,30 +0,0 @@
1
- require 'parallel'
2
- require 'pathname'
3
-
4
- module Bio
5
- class Gadget < Thor
6
-
7
- namespace :bio
8
-
9
- desc 'fqxz', 'automatic (re)compression of *.fq(.gz|.bz2) files'
10
- def fqxz
11
- Parallel.map(Pathname.glob('*.fq{.gz,.bz2,}')) { |fqfilename|
12
- xzfilename = fqfilename.sub(/\.fq(\.(gz|bz2))*$/, '.fq.xz')
13
- if !xzfilename.exist?
14
- case fqfilename.extname
15
- when '.gz'
16
- decompressor = 'gunzip -c'
17
- when '.bz2'
18
- decompressor = 'bunzip2 -c'
19
- else
20
- decompressor = 'cat'
21
- end
22
- puts "compressing #{xzfilename}..."
23
- system "#{decompressor} #{fqfilename} | xz -z -e -c > #{xzfilename} 2> #{xzfilename}.log"
24
- system "xz -t #{xzfilename} >> #{xzfilename}.log 2>&1"
25
- end
26
- }
27
- end
28
-
29
- end
30
- end
@@ -1,94 +0,0 @@
1
- require 'mkfifo'
2
- require 'parallel'
3
-
4
- module Bio
5
- class Gadget < Thor
6
-
7
- desc 'peak WIG1,WIG2,... [GTF]', <<DESC
8
- Find peak within each exon from (gzipped) variableStep wigs by a majority vote. It will read from a standard input if no GTF option.
9
- DESC
10
- def peak(wigs, gtf="/dev/stdin")
11
-
12
- nchrpos2val = Hash.new
13
- wigs.split(/,/).each { |wig|
14
- n = wigs.split(/,/).index(wig)
15
- nchrpos2val[n] = Hash.new unless nchrpos2val.key?(n)
16
- chr = ''
17
- fp = open("| zcat #{wig} | grep -v ^track")
18
- fp.each { |line|
19
- cols = line.rstrip.split(/\s+/)
20
- if cols[0] == 'variableStep'
21
- chr = cols[1].match(/chrom=(\S+)$/).to_a[1]
22
- nchrpos2val[n][chr] = Hash.new unless nchrpos2val[n].key?(chr)
23
- else
24
- nchrpos2val[n][chr][cols[0].to_i] = cols[1].to_f
25
- end
26
- }
27
- fp.close
28
- }
29
-
30
- chr2exon = Hash.new
31
- open("| grep exon #{gtf}").each { |line|
32
- cols = line.rstrip.split(/\t/)
33
- oid = cols[8].match(/oId \"([^\"]+)/).to_a[1]
34
- oid = "#{cols[0]}.#{oid}" if cols[0] =~ /^RNA_SPIKE_/
35
- exn = cols[8].match(/exon_number \"(\d+)\"/).to_a[1]
36
- chr = cols[0]
37
- str = cols[6]
38
- start = cols[3].to_i
39
- stop = cols[4].to_i
40
- chr2exon[chr] = Array.new unless chr2exon.key?(chr)
41
- chr2exon[chr].push([str, oid, exn, start, stop])
42
- }
43
-
44
- fifopath = mytemppath('fifo-')
45
- File.mkfifo(fifopath)
46
-
47
- pid = Kernel.fork {
48
- exec "cat #{fifopath}"
49
- }
50
-
51
- fp = open(fifopath, 'w')
52
- Parallel.each(chr2exon.keys,
53
- :in_processes => Parallel.processor_count) { |chr|
54
- #
55
- chr2exon[chr].each { |str, oid, exn, start, stop|
56
- #
57
- peak = ''
58
- poss = Hash.new
59
- nchrpos2val.each { |n, chrpos2val|
60
- if chrpos2val.key?(chr)
61
- pos2val = chrpos2val[chr]
62
- tmppos2val = Hash.new
63
- pos2val.each { |pos, val|
64
- tmppos2val[pos] = val if start <= pos && pos <= stop
65
- }
66
- if tmppos2val.size > 0
67
- tmpposs = tmppos2val.keys.sort { |a, b|
68
- tmppos2val[b] == tmppos2val[a] ? (str == '+' ? (a <=> b) : (b <=> a)) : (tmppos2val[b] <=> tmppos2val[a])
69
- }
70
- tmppos = tmpposs[0]
71
- # puts "#{n} | #{chr}:#{start}-#{stop} #{str} | #{tmpposs}"
72
- poss[tmppos] = poss.key?(tmppos) ? poss[tmppos]+1 : 1
73
- end
74
- end
75
- }
76
- if poss.size > 0
77
- peaks = poss.keys.sort { |a, b|
78
- poss[b] == poss[a] ? (str == '+' ? (a <=> b) : (b <=> a)) : (poss[b] <=> poss[a])
79
- }
80
- peak = peaks[0]
81
- end
82
- #
83
- fp.syswrite([oid, exn, peak].join("\t") + "\n")
84
- }
85
- #
86
- }
87
- fp.close
88
-
89
- Process.waitall
90
-
91
- end
92
-
93
- end
94
- end
@@ -1,34 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- namespace :bio
5
-
6
- desc 'qvstat QUAL', 'statistics of quality values in *.qual file'
7
- def qvstat(qualfile)
8
- stat = Hash.new
9
- myopen(qualfile) { |fp|
10
- fp.each { |line|
11
- next if /^[\#\>]/ =~ line
12
- qvs = line.rstrip.split
13
- qvs.each_index { |i|
14
- qv = qvs[i]
15
- stat[qv] = Array.new unless stat.key?(qv)
16
- if stat[qv][i].nil?
17
- stat[qv][i] = 1
18
- else
19
- stat[qv][i] += 1
20
- end
21
- }
22
- }
23
- }
24
- statfile = qualfile.sub(/.qual(.gz)?$/, '.qvstat')
25
- open(statfile, 'w') { |out|
26
- qvs = stat.keys.sort { |a, b| a.to_i <=> b.to_i }
27
- qvs.each { |qv|
28
- out.puts "#{qv} #{stat[qv].join(' ')}"
29
- }
30
- }
31
- end
32
-
33
- end
34
- end
@@ -1,60 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- desc 'rgt2mtx [RGT]', <<DESC
5
- Convert cuffdiff read group tracking file (*.read_group_tracking) into tab-separated matrix. If no given name of tracking file, it reads from standard input.
6
- DESC
7
- option 'gtf', :aliases => '-g', :type => :string, :desc => 'GTF to revert old transcript_id (oId) renamed by cuffcompare. Moreover, "RNA_SPIKE_*" chromosome names will be inserted for the transcripts aligned on the forward strand of spike-in sequences.'
8
- option 'sample', :aliases => '-s', :type => :string, :desc => 'Mapping from condition/replicate to sample ID for the column names of output matrix. Tab-separated text with three columns of condition, replicate and the sample ID.'
9
- def rgt2mtx(rgt="/dev/stdin")
10
-
11
- tid2oid = Hash.new
12
- unless options['gtf'].nil?
13
- open(options['gtf']).each { |line|
14
- cols = line.rstrip.split(/\t/)
15
- tid = cols[8].match(/transcript_id \"([^\"]+)/).to_a[1]
16
- oid = cols[8].match(/oId \"([^\"]+)/).to_a[1]
17
- oid = "#{cols[0]}.#{oid}" if cols[0] =~ /^RNA_SPIKE_/
18
- tid2oid[tid] = oid
19
- }
20
- end
21
-
22
- cr2sid = Hash.new
23
- unless options['sample'].nil?
24
- open(options['sample']).each { |line|
25
- c, r, sid = line.rstrip.split(/\t/)
26
- cr2sid["#{c}|#{r}"] = sid
27
- }
28
- end
29
-
30
- id = nil
31
- header = true
32
- raws = Hash.new
33
- open("| tail -n +2 #{rgt} | sort -k 1").each { |line|
34
- cols = line.rstrip.split(/\t/)
35
- id = cols[0] if id.nil?
36
- if id != cols[0]
37
- if header
38
- if options['sample'].nil?
39
- puts ([''] + raws.keys.sort).join("\t")
40
- else
41
- tmp = ['']
42
- raws.keys.sort.each { |cr| tmp.push(cr2sid[cr]) }
43
- puts tmp.join("\t")
44
- end
45
- header = false
46
- end
47
- tmp = [tid2oid.key?(id) ? tid2oid[id] : id]
48
- raws.keys.sort.each { |k| tmp.push(raws[k]) }
49
- puts tmp.join("\t")
50
- id = cols[0]
51
- end
52
- raws["#{cols[1]}|#{cols[2]}"] = cols[3]
53
- }
54
- tmp = [tid2oid.key?(id) ? tid2oid[id] : id]
55
- raws.keys.sort.each { |k| tmp.push(raws[k]) }
56
- puts tmp.join("\t")
57
- end
58
-
59
- end
60
- end
@@ -1,9 +0,0 @@
1
- require 'thor'
2
-
3
- module Bio
4
- class Gadget < Thor
5
-
6
- VERSION = "0.4.8"
7
-
8
- end
9
- end
@@ -1,51 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- desc 'wig5p BAM', <<DESC
5
- Convert bam-format alignments into wig-format table. Count is at 5'-end of alignments. Moreover the counts are normalized when there are alignments to references named 'RNA_SPIKE_*'. This procedure requires samtools and bamToBed in BEDTools.
6
- DESC
7
- option 'reverse', :aliases => '-r', :type => :boolean, :default => false
8
- option 'name', :aliases => '-n', :type => :string, :default => ' '
9
- def wig5p(bam)
10
-
11
- str = options['reverse'] ? '-' : '+'
12
-
13
- tmp = 0.0
14
- open("| samtools view #{bam} | grep RNA_SPIKE_ | grep 'XS:A:+' | cut -f 1 | sort -u").each { |line| tmp += 1.0 }
15
- abort "No spike-in reads." if tmp == 0.0
16
- spike = 2234.8/tmp
17
-
18
- acc2dups = Hash.new
19
- open("| samtools view #{bam} | cut -f 1 | sort | uniq -c").each { |line|
20
- dups, acc = line.strip.split(/\s/)
21
- acc2dups[acc] = dups.to_f
22
- }
23
-
24
- cnts = Hash.new
25
- tmpbam = mytemppath('.bam')
26
- abort unless system("samtools view -h #{bam} | grep -E 'XS:A:\\#{str}|^@SQ' | samtools view -S -b - > #{tmpbam}")
27
-
28
- open("| bamToBed -i #{tmpbam} | cut -f 1,#{options['reverse'] ? 3 : 2},4").each { |line|
29
- chr, poss, acc = line.rstrip.split(/\t/)
30
- cnts[chr] = Hash.new if !cnts.key?(chr)
31
- pos = poss.to_i
32
- if !cnts[chr].key?(pos)
33
- cnts[chr][pos] = 1.0/acc2dups[acc]
34
- else
35
- cnts[chr][pos] = cnts[chr][pos]+1.0/acc2dups[acc]
36
- end
37
- }
38
-
39
- puts "track type=wiggle_0 name=\"#{options['name']}\" description=\" \" alwaysZero=on visivility=full maxHeightPixels=128:64:16 color=#{options['reverse'] ? '0,128,255' : '255,128,0'}"
40
-
41
- offset = options['reverse'] ? 0 : 1
42
- signal = options['reverse'] ? -spike : spike
43
- cnts.each { |chr, posvals|
44
- puts "variableStep chrom=#{chr}"
45
- posvals.keys.sort.each { |pos| puts [pos+offset, signal*posvals[pos]].join("\t") }
46
- }
47
-
48
- end
49
-
50
- end
51
- end
@@ -1,28 +0,0 @@
1
- module Bio
2
- class Gadget < Thor
3
-
4
- namespace :bio
5
-
6
- desc 'wigchr WIG CHR', 'extract wiggle track on specified chromosome'
7
- def wigchr(wigfile, chr)
8
- target = false
9
- myopen(wigfile) { |fp|
10
- fp.each { |line|
11
- if (/^(fixed|variable)Step/ =~ line)
12
- if (/chrom=#{chr}\s/ =~ line)
13
- target = true
14
- puts line
15
- else
16
- target = false
17
- end
18
- elsif (/^\d/ =~ line)
19
- puts line if target
20
- else
21
- puts line
22
- end
23
- }
24
- }
25
- end
26
-
27
- end
28
- end