bio-mummer 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8188cd9bbf60cd74e360a423ad45f805161ee649
4
- data.tar.gz: 8476839ded624652d52e9bf7555fe948623351d1
3
+ metadata.gz: 37345c73287e9b80c15dc7426ee1045caad772f7
4
+ data.tar.gz: 8b57b1755f5da0ba52af5ccbb92c3d55f06964e5
5
5
  SHA512:
6
- metadata.gz: 499ca02b01cf755dc9092bbe5f64eda8deb8eeb17e87eb6c666630b95b75cae5cfafb5c9bfe58dc673d59fdca7299d7ea1982ea3ac8899d1a4593aee480c87f0
7
- data.tar.gz: 56922d92c400e907469a2ef73f7ea6e5b5615a1bd29413ff7011b55507d47ca15dee650ee68090246c2e7da53ee0069f7241971d4ddf8ca20d52e985680a7a8c
6
+ metadata.gz: c86e8bb5d957da2331d817101577aa9990e4685a6445d5b194747c069fc8d5708b93465e20ff95b4e1a37de9ab02402552c1d77a7ba1acd98ddbeb61bb98d574
7
+ data.tar.gz: ffb5c859d7add8bb10c9d0a8dc5dc98a51dcb9a06370633d7fe6d69a3bd8d295836bafe4e32796ba6b62a855cde9a224731aa1c1cf45e077e8cc21eb4c31fb84
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
@@ -0,0 +1,181 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'stringio'
4
+ require 'optparse'
5
+ require 'ostruct'
6
+ require 'pathname'
7
+ require 'tmpdir'
8
+ require 'bio-mummer'
9
+ require 'bio'
10
+
11
+ class OptParser
12
+ def self.parse(args)
13
+ options = OpenStruct.new
14
+
15
+ opt_parser = OptionParser.new do |opts|
16
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
17
+ opts.separator ""
18
+ opts.separator "Specific options:"
19
+
20
+ opts.on("-d", "--delta FILENAME", "Delta file generated by nucmer") do |fn|
21
+ pn = Pathname.new(fn)
22
+ if pn.size?
23
+ options.delta = pn
24
+ match = pn.each_line.first.match(/(?<ref>\/.*) (?<qry>\/.*)/)
25
+ if match
26
+ ref = Pathname.new(match[:ref])
27
+ if ref.size?
28
+ options.ref = Hash[Bio::FlatFile.open(ref).map{|e| [e.entry_id, e.naseq]}]
29
+ end
30
+
31
+ qry = Pathname.new(match[:qry])
32
+ if qry.size?
33
+ options.qry = Hash[Bio::FlatFile.open(qry).map{|e| [e.entry_id, e.naseq]}]
34
+ end
35
+ end
36
+ else
37
+ $stderr.puts "Error: cannot read file #{fn}"
38
+ exit(1)
39
+ end
40
+ end
41
+ end
42
+ opt_parser.parse!(args)
43
+ return options
44
+ end
45
+ end
46
+ options = OptParser.parse(ARGV)
47
+
48
+ # Open the Delta File
49
+ d = BioMummer::DeltaFile.new(StringIO.new(`delta-filter -r -q #{options.delta}`))
50
+
51
+ puts '##fileformat=VCFv4.1
52
+ ##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
53
+ ##FILTER=<ID=LowQual,Description="Low quality">
54
+ ##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
55
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
56
+ ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
57
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
58
+ ##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
59
+ ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">'
60
+
61
+ options.ref.each do |name, seq|
62
+ puts "##contig=<ID=#{name},length=#{seq.length}>"
63
+ end
64
+ puts "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tDUMMYSAMPLENAME"
65
+
66
+ d.alignments
67
+ .group_by{|a| a.refname}.sort_by{|refname, alignments| refname}
68
+ .take(2)
69
+ .each do |refname, alignments|
70
+ cursor = 1
71
+ overlap = 0
72
+ alignments.sort_by{|a| [a.refstart, a.refstop].min}.each do |a|
73
+ ref = options.ref[a.refname].subseq(a.refstart, a.refstop)
74
+ qry = a.strand ? options.qry[a.queryname].subseq(a.querystart, a.querystop) : options.qry[a.queryname].subseq(a.querystart, a.querystop).complement
75
+ if a.refstart > cursor
76
+ puts [refname, cursor, '.', options.ref[refname][cursor].upcase, "<NON_REF>", '.', '.', "END=#{a.refstart - 1}", "GT:DP:GQ:MIN_DP:PL", "0:0:0:0:0,0"].join("\t")
77
+ cursor = a.refstart
78
+ elsif a.refstart < cursor
79
+ overlap = cursor - a.refstart
80
+ end
81
+
82
+ a.distances.inject([0,0]) do |mem, distance|
83
+ if distance > 0
84
+ qry.insert(mem.first + distance - 1, ".")
85
+ [mem.first + distance, mem.last + distance]
86
+ else
87
+ ref.insert(mem.last - distance - 1, ".")
88
+ [mem.first - distance, mem.last - distance]
89
+ end
90
+ end
91
+ overlap2 = 0
92
+
93
+ ref.chars.zip(0..(ref.length-1), qry.chars).chunk do |refBase, i, qryBase|
94
+ if refBase == qryBase
95
+ "NOVAR"
96
+ elsif refBase == "."
97
+ "INS"
98
+ elsif qryBase == "."
99
+ "DEL"
100
+ else
101
+ "SNP"
102
+ end
103
+ end.drop_while do |varClass, arr|
104
+ if varClass != "INS"
105
+ overlap, overlap2 = overlap - arr.length , overlap
106
+ end
107
+ overlap >= 0
108
+ end.each do |varClass, arr|
109
+ case varClass
110
+ when "NOVAR"
111
+ if overlap2 > 0
112
+ refBase = arr[overlap2].first.upcase
113
+ else
114
+ refBase = arr.first.first.upcase
115
+ overlap2 = 0
116
+ end
117
+ puts [refname,
118
+ cursor,
119
+ ".",
120
+ refBase,
121
+ "<NON_REF>",
122
+ '.',
123
+ '.',
124
+ "END=#{cursor + arr.length - 1 - overlap2};ALTSCAFF=#{a.queryname};RLEN=#{arr.length};OVERLAP2=#{overlap2}",
125
+ "GT:DP:GQ:MIN_DP:PL",
126
+ "0:200:200:200:0,800"].join("\t")
127
+ cursor += arr.length - overlap2
128
+ overlap2 = 0
129
+ when "SNP"
130
+ arr.each do |snp|
131
+ puts [refname,
132
+ cursor,
133
+ ".",
134
+ snp.first.upcase,
135
+ [snp.last.upcase, "<NON_REF>"].join(","),
136
+ '.',
137
+ '.',
138
+ "END=#{cursor};ALTSCAFF=#{a.queryname}",
139
+ "GT:DP:GQ:MIN_DP:PL",
140
+ "1:200:200:200:800,0,800"].join("\t")
141
+ cursor += 1
142
+ end
143
+ # puts [refname,
144
+ # cursor,
145
+ # ".",
146
+ # arr.map{|a| a.first.upcase}.join,
147
+ # [arr.map{|a| a.last.upcase}.join, "<NON_REF>"].join(","),
148
+ # '.',
149
+ # '.',
150
+ # "END=#{cursor + arr.length - 1};ALTSCAFF=#{a.queryname}",
151
+ # "GT:DP:GQ:MIN_DP:PL",
152
+ # "1:200:200:200:800,0,800"].join("\t")
153
+ # cursor += arr.length
154
+ when "INS"
155
+ refBase = options.ref[refname][cursor-2].upcase
156
+ puts [refname,
157
+ cursor-1,
158
+ ".",
159
+ refBase,
160
+ [(refBase + arr.map{|a| a.last}.join).upcase, "<NON_REF>"].join(","),
161
+ '.',
162
+ '.',
163
+ "END=#{cursor - 1};ALTSCAFF=#{a.queryname};STRAND=#{a.strand}",
164
+ "GT:DP:GQ:MIN_DP:PL",
165
+ "1:200:200:200:800,0,800"].join("\t")
166
+ when "DEL"
167
+ puts [refname,
168
+ cursor,
169
+ ".",
170
+ arr.map{|a| a.first.upcase}.join,
171
+ ".,<NON_REF>",
172
+ '.',
173
+ '.',
174
+ "END=#{cursor + arr.length - 1};ALTSCAFF=#{a.queryname}",
175
+ "GT:DP:GQ:MIN_DP:PL",
176
+ "1:200:200:200:800,0,800"].join("\t")
177
+ cursor += arr.length
178
+ end
179
+ end
180
+ end
181
+ end
@@ -1,3 +1,4 @@
1
+ require 'stringio'
1
2
 
2
3
  module BioMummer
3
4
 
@@ -13,25 +14,23 @@ module BioMummer
13
14
  @querystop = querystop
14
15
  @strand = strand
15
16
  @distances = distances
16
- @deltas = []
17
17
  end
18
18
 
19
19
  def deltas
20
- if @deltas == []
21
- a = @distances.each_with_object([0]) do |d, arr|
22
- state = arr.last
23
- if d > 0
24
- @deltas += Array.new(d - 1, state)
25
- @deltas.push(nil)
26
- arr << state - 1
27
- else
28
- @deltas += Array.new(d * -1 - 1, state)
29
- arr << state + 1
30
- end
20
+ ds = []
21
+ a = @distances.each_with_object([0]) do |d, arr|
22
+ state = arr.last
23
+ if d > 0
24
+ ds += Array.new(d - 1, state)
25
+ ds.push(nil)
26
+ arr << state - 1
27
+ else
28
+ ds += Array.new(d * -1 - 1, state)
29
+ arr << state + 1
31
30
  end
32
- @deltas << a.last
33
31
  end
34
- return @deltas
32
+ ds << a.last
33
+ return ds
35
34
  end
36
35
 
37
36
  def ref_to_query(ref_position)
@@ -58,6 +57,11 @@ module BioMummer
58
57
  @alignments = parse(io.read)
59
58
  end
60
59
 
60
+ def self.open(filename)
61
+ io = File.open(filename)
62
+ self.new(super(io))
63
+ end
64
+
61
65
  def parse(string)
62
66
  string.split("\n").slice_before(/^>/).flat_map do |block|
63
67
  refname, queryname = block.shift.match(/>(.*) (.*) \d+ \d+/).captures
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-mummer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - robsyme
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-10 00:00:00.000000000 Z
11
+ date: 2015-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: shoulda
@@ -97,7 +97,8 @@ dependencies:
97
97
  description: Help for working with the output of the .delta files produced by nucmer
98
98
  and promer
99
99
  email: rob.syme@gmail.com
100
- executables: []
100
+ executables:
101
+ - delta2gvcf.rb
101
102
  extensions: []
102
103
  extra_rdoc_files:
103
104
  - LICENSE.txt
@@ -112,6 +113,7 @@ files:
112
113
  - README.rdoc
113
114
  - Rakefile
114
115
  - VERSION
116
+ - bin/delta2gvcf.rb
115
117
  - lib/bio-mummer.rb
116
118
  - lib/bio-mummer/mummer.rb
117
119
  - test/data/out.delta