bio-mummer 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8188cd9bbf60cd74e360a423ad45f805161ee649
4
- data.tar.gz: 8476839ded624652d52e9bf7555fe948623351d1
3
+ metadata.gz: 37345c73287e9b80c15dc7426ee1045caad772f7
4
+ data.tar.gz: 8b57b1755f5da0ba52af5ccbb92c3d55f06964e5
5
5
  SHA512:
6
- metadata.gz: 499ca02b01cf755dc9092bbe5f64eda8deb8eeb17e87eb6c666630b95b75cae5cfafb5c9bfe58dc673d59fdca7299d7ea1982ea3ac8899d1a4593aee480c87f0
7
- data.tar.gz: 56922d92c400e907469a2ef73f7ea6e5b5615a1bd29413ff7011b55507d47ca15dee650ee68090246c2e7da53ee0069f7241971d4ddf8ca20d52e985680a7a8c
6
+ metadata.gz: c86e8bb5d957da2331d817101577aa9990e4685a6445d5b194747c069fc8d5708b93465e20ff95b4e1a37de9ab02402552c1d77a7ba1acd98ddbeb61bb98d574
7
+ data.tar.gz: ffb5c859d7add8bb10c9d0a8dc5dc98a51dcb9a06370633d7fe6d69a3bd8d295836bafe4e32796ba6b62a855cde9a224731aa1c1cf45e077e8cc21eb4c31fb84
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
@@ -0,0 +1,181 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'stringio'
4
+ require 'optparse'
5
+ require 'ostruct'
6
+ require 'pathname'
7
+ require 'tmpdir'
8
+ require 'bio-mummer'
9
+ require 'bio'
10
+
11
+ class OptParser
12
+ def self.parse(args)
13
+ options = OpenStruct.new
14
+
15
+ opt_parser = OptionParser.new do |opts|
16
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
17
+ opts.separator ""
18
+ opts.separator "Specific options:"
19
+
20
+ opts.on("-d", "--delta FILENAME", "Delta file generated by nucmer") do |fn|
21
+ pn = Pathname.new(fn)
22
+ if pn.size?
23
+ options.delta = pn
24
+ match = pn.each_line.first.match(/(?<ref>\/.*) (?<qry>\/.*)/)
25
+ if match
26
+ ref = Pathname.new(match[:ref])
27
+ if ref.size?
28
+ options.ref = Hash[Bio::FlatFile.open(ref).map{|e| [e.entry_id, e.naseq]}]
29
+ end
30
+
31
+ qry = Pathname.new(match[:qry])
32
+ if qry.size?
33
+ options.qry = Hash[Bio::FlatFile.open(qry).map{|e| [e.entry_id, e.naseq]}]
34
+ end
35
+ end
36
+ else
37
+ $stderr.puts "Error: cannot read file #{fn}"
38
+ exit(1)
39
+ end
40
+ end
41
+ end
42
+ opt_parser.parse!(args)
43
+ return options
44
+ end
45
+ end
46
+ options = OptParser.parse(ARGV)
47
+
48
+ # Open the Delta File
49
+ d = BioMummer::DeltaFile.new(StringIO.new(`delta-filter -r -q #{options.delta}`))
50
+
51
+ puts '##fileformat=VCFv4.1
52
+ ##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
53
+ ##FILTER=<ID=LowQual,Description="Low quality">
54
+ ##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
55
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
56
+ ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
57
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
58
+ ##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
59
+ ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">'
60
+
61
+ options.ref.each do |name, seq|
62
+ puts "##contig=<ID=#{name},length=#{seq.length}>"
63
+ end
64
+ puts "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tDUMMYSAMPLENAME"
65
+
66
+ d.alignments
67
+ .group_by{|a| a.refname}.sort_by{|refname, alignments| refname}
68
+ .take(2)
69
+ .each do |refname, alignments|
70
+ cursor = 1
71
+ overlap = 0
72
+ alignments.sort_by{|a| [a.refstart, a.refstop].min}.each do |a|
73
+ ref = options.ref[a.refname].subseq(a.refstart, a.refstop)
74
+ qry = a.strand ? options.qry[a.queryname].subseq(a.querystart, a.querystop) : options.qry[a.queryname].subseq(a.querystart, a.querystop).complement
75
+ if a.refstart > cursor
76
+ puts [refname, cursor, '.', options.ref[refname][cursor].upcase, "<NON_REF>", '.', '.', "END=#{a.refstart - 1}", "GT:DP:GQ:MIN_DP:PL", "0:0:0:0:0,0"].join("\t")
77
+ cursor = a.refstart
78
+ elsif a.refstart < cursor
79
+ overlap = cursor - a.refstart
80
+ end
81
+
82
+ a.distances.inject([0,0]) do |mem, distance|
83
+ if distance > 0
84
+ qry.insert(mem.first + distance - 1, ".")
85
+ [mem.first + distance, mem.last + distance]
86
+ else
87
+ ref.insert(mem.last - distance - 1, ".")
88
+ [mem.first - distance, mem.last - distance]
89
+ end
90
+ end
91
+ overlap2 = 0
92
+
93
+ ref.chars.zip(0..(ref.length-1), qry.chars).chunk do |refBase, i, qryBase|
94
+ if refBase == qryBase
95
+ "NOVAR"
96
+ elsif refBase == "."
97
+ "INS"
98
+ elsif qryBase == "."
99
+ "DEL"
100
+ else
101
+ "SNP"
102
+ end
103
+ end.drop_while do |varClass, arr|
104
+ if varClass != "INS"
105
+ overlap, overlap2 = overlap - arr.length , overlap
106
+ end
107
+ overlap >= 0
108
+ end.each do |varClass, arr|
109
+ case varClass
110
+ when "NOVAR"
111
+ if overlap2 > 0
112
+ refBase = arr[overlap2].first.upcase
113
+ else
114
+ refBase = arr.first.first.upcase
115
+ overlap2 = 0
116
+ end
117
+ puts [refname,
118
+ cursor,
119
+ ".",
120
+ refBase,
121
+ "<NON_REF>",
122
+ '.',
123
+ '.',
124
+ "END=#{cursor + arr.length - 1 - overlap2};ALTSCAFF=#{a.queryname};RLEN=#{arr.length};OVERLAP2=#{overlap2}",
125
+ "GT:DP:GQ:MIN_DP:PL",
126
+ "0:200:200:200:0,800"].join("\t")
127
+ cursor += arr.length - overlap2
128
+ overlap2 = 0
129
+ when "SNP"
130
+ arr.each do |snp|
131
+ puts [refname,
132
+ cursor,
133
+ ".",
134
+ snp.first.upcase,
135
+ [snp.last.upcase, "<NON_REF>"].join(","),
136
+ '.',
137
+ '.',
138
+ "END=#{cursor};ALTSCAFF=#{a.queryname}",
139
+ "GT:DP:GQ:MIN_DP:PL",
140
+ "1:200:200:200:800,0,800"].join("\t")
141
+ cursor += 1
142
+ end
143
+ # puts [refname,
144
+ # cursor,
145
+ # ".",
146
+ # arr.map{|a| a.first.upcase}.join,
147
+ # [arr.map{|a| a.last.upcase}.join, "<NON_REF>"].join(","),
148
+ # '.',
149
+ # '.',
150
+ # "END=#{cursor + arr.length - 1};ALTSCAFF=#{a.queryname}",
151
+ # "GT:DP:GQ:MIN_DP:PL",
152
+ # "1:200:200:200:800,0,800"].join("\t")
153
+ # cursor += arr.length
154
+ when "INS"
155
+ refBase = options.ref[refname][cursor-2].upcase
156
+ puts [refname,
157
+ cursor-1,
158
+ ".",
159
+ refBase,
160
+ [(refBase + arr.map{|a| a.last}.join).upcase, "<NON_REF>"].join(","),
161
+ '.',
162
+ '.',
163
+ "END=#{cursor - 1};ALTSCAFF=#{a.queryname};STRAND=#{a.strand}",
164
+ "GT:DP:GQ:MIN_DP:PL",
165
+ "1:200:200:200:800,0,800"].join("\t")
166
+ when "DEL"
167
+ puts [refname,
168
+ cursor,
169
+ ".",
170
+ arr.map{|a| a.first.upcase}.join,
171
+ ".,<NON_REF>",
172
+ '.',
173
+ '.',
174
+ "END=#{cursor + arr.length - 1};ALTSCAFF=#{a.queryname}",
175
+ "GT:DP:GQ:MIN_DP:PL",
176
+ "1:200:200:200:800,0,800"].join("\t")
177
+ cursor += arr.length
178
+ end
179
+ end
180
+ end
181
+ end
@@ -1,3 +1,4 @@
1
+ require 'stringio'
1
2
 
2
3
  module BioMummer
3
4
 
@@ -13,25 +14,23 @@ module BioMummer
13
14
  @querystop = querystop
14
15
  @strand = strand
15
16
  @distances = distances
16
- @deltas = []
17
17
  end
18
18
 
19
19
  def deltas
20
- if @deltas == []
21
- a = @distances.each_with_object([0]) do |d, arr|
22
- state = arr.last
23
- if d > 0
24
- @deltas += Array.new(d - 1, state)
25
- @deltas.push(nil)
26
- arr << state - 1
27
- else
28
- @deltas += Array.new(d * -1 - 1, state)
29
- arr << state + 1
30
- end
20
+ ds = []
21
+ a = @distances.each_with_object([0]) do |d, arr|
22
+ state = arr.last
23
+ if d > 0
24
+ ds += Array.new(d - 1, state)
25
+ ds.push(nil)
26
+ arr << state - 1
27
+ else
28
+ ds += Array.new(d * -1 - 1, state)
29
+ arr << state + 1
31
30
  end
32
- @deltas << a.last
33
31
  end
34
- return @deltas
32
+ ds << a.last
33
+ return ds
35
34
  end
36
35
 
37
36
  def ref_to_query(ref_position)
@@ -58,6 +57,11 @@ module BioMummer
58
57
  @alignments = parse(io.read)
59
58
  end
60
59
 
60
+ def self.open(filename)
61
+ io = File.open(filename)
62
+ self.new(super(io))
63
+ end
64
+
61
65
  def parse(string)
62
66
  string.split("\n").slice_before(/^>/).flat_map do |block|
63
67
  refname, queryname = block.shift.match(/>(.*) (.*) \d+ \d+/).captures
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-mummer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - robsyme
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-10 00:00:00.000000000 Z
11
+ date: 2015-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: shoulda
@@ -97,7 +97,8 @@ dependencies:
97
97
  description: Help for working with the output of the .delta files produced by nucmer
98
98
  and promer
99
99
  email: rob.syme@gmail.com
100
- executables: []
100
+ executables:
101
+ - delta2gvcf.rb
101
102
  extensions: []
102
103
  extra_rdoc_files:
103
104
  - LICENSE.txt
@@ -112,6 +113,7 @@ files:
112
113
  - README.rdoc
113
114
  - Rakefile
114
115
  - VERSION
116
+ - bin/delta2gvcf.rb
115
117
  - lib/bio-mummer.rb
116
118
  - lib/bio-mummer/mummer.rb
117
119
  - test/data/out.delta