bfa 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/bfa.rb +47 -0
- data/lib/bfa/binary_cigar.rb +11 -0
- data/lib/bfa/binary_cigar/decode.rb +14 -0
- data/lib/bfa/binary_cigar/encode.rb +14 -0
- data/lib/bfa/constants.rb +25 -0
- data/lib/bfa/error.rb +2 -0
- data/lib/bfa/four_bit_sequence.rb +12 -0
- data/lib/bfa/four_bit_sequence/decode.rb +18 -0
- data/lib/bfa/four_bit_sequence/encode.rb +26 -0
- data/lib/bfa/reader.rb +289 -0
- data/lib/bfa/reader/format_error.rb +4 -0
- data/lib/bfa/writer.rb +273 -0
- metadata +66 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: ae42ae351e604214e30e6871459e0d01c0f6198e
|
|
4
|
+
data.tar.gz: 8c13cb054ffcae83d12529eef8942f1ece28dddd
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: ddff1aa18d8d7d1ebc5b803e98d7e6081a1291b1ecb9e55453a6e029a3ef979c27eecc8d9536c1d39904ee9d3a8fa644a6aa155966e93da2ee25eaa75e8af1ea
|
|
7
|
+
data.tar.gz: 6dc29d2d65144f0489e28b7ebca03a05180178d7b7c7c3c69dffb20880ffffd467f335d01ce5142e9b86c433f896d8d29d076cd2b94eb19d8f2f7da7dc702914
|
data/lib/bfa.rb
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
BFA = Module.new
|
|
2
|
+
|
|
3
|
+
require "rgfa"
|
|
4
|
+
|
|
5
|
+
require_relative "bfa/constants"
|
|
6
|
+
require_relative "bfa/reader"
|
|
7
|
+
require_relative "bfa/writer"
|
|
8
|
+
|
|
9
|
+
class RGFA
|
|
10
|
+
|
|
11
|
+
def to_bfa(filename, compressed=true)
|
|
12
|
+
BFA::Writer.encode(filename, self, compressed)
|
|
13
|
+
return nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
|
|
18
|
+
alias_method :from_gfa, :from_file
|
|
19
|
+
|
|
20
|
+
def from_file(filename)
|
|
21
|
+
f = File.open(filename)
|
|
22
|
+
is_gzip = (f.read(2).bytes == [31,139])
|
|
23
|
+
if is_gzip
|
|
24
|
+
# currently only gzipped bfa are supported
|
|
25
|
+
f.close
|
|
26
|
+
from_bfa(filename)
|
|
27
|
+
end
|
|
28
|
+
is_bfa = (f.read(4) == BFA::Constants::MAGIC_STRING)
|
|
29
|
+
f.close
|
|
30
|
+
if is_bfa
|
|
31
|
+
from_bfa(filename)
|
|
32
|
+
else
|
|
33
|
+
from_gfa(filename)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def from_bfa(filename)
|
|
38
|
+
BFA::Reader.parse(filename)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
class RGFA::Line
|
|
46
|
+
attr_accessor :line_id
|
|
47
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
require_relative "../binary_cigar"
|
|
2
|
+
|
|
3
|
+
module BFA::BinaryCigar::Decode
|
|
4
|
+
|
|
5
|
+
def parse_binary_cigar
|
|
6
|
+
RGFA::CIGAR::Operation.new(self >> 4,
|
|
7
|
+
BFA::BinaryCigar::NUM_TO_OPCODE[self & 15])
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class Integer
|
|
13
|
+
include BFA::BinaryCigar::Decode
|
|
14
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module BFA::Constants
|
|
2
|
+
|
|
3
|
+
MAGIC_STRING = "BFA\1"
|
|
4
|
+
|
|
5
|
+
SIZEOF_TEMPLATE_CODE = "L<"
|
|
6
|
+
|
|
7
|
+
SIZEOF_SIZE = 4
|
|
8
|
+
|
|
9
|
+
INTEGER_DATATYPES = [:i, :I, :c, :C, :s, :S]
|
|
10
|
+
|
|
11
|
+
NUMERIC_TEMPLATE_CODE = {
|
|
12
|
+
:c => "c", :C => "C",
|
|
13
|
+
:s => "s<", :S => "S<",
|
|
14
|
+
:i => "l<", :I => "L<",
|
|
15
|
+
:f => "E"
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
NUMERIC_SIZE = {
|
|
19
|
+
:c => 1, :C => 1,
|
|
20
|
+
:s => 2, :S => 2,
|
|
21
|
+
:i => 4, :I => 4,
|
|
22
|
+
:f => 8
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
end
|
data/lib/bfa/error.rb
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
module BFA::FourBitSequence
|
|
2
|
+
|
|
3
|
+
LETTER_TO_CODE = {
|
|
4
|
+
"=" => 0, "A" => 1, "C" => 2, "M" => 3,
|
|
5
|
+
"G" => 4, "R" => 5, "S" => 6, "V" => 7,
|
|
6
|
+
"T" => 8, "W" => 9, "Y" => 10, "H" => 11,
|
|
7
|
+
"K" => 12, "D" => 13, "B" => 14, "N" => 15,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
CODE_TO_LETTER = LETTER_TO_CODE.invert
|
|
11
|
+
|
|
12
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require_relative "../four_bit_sequence"
|
|
2
|
+
|
|
3
|
+
module BFA::FourBitSequence::Decode
|
|
4
|
+
|
|
5
|
+
def parse_4bits(strsize)
|
|
6
|
+
retval = ""
|
|
7
|
+
each do |code|
|
|
8
|
+
retval << BFA::FourBitSequence::CODE_TO_LETTER[code >> 4]
|
|
9
|
+
retval << BFA::FourBitSequence::CODE_TO_LETTER[code & 15]
|
|
10
|
+
end
|
|
11
|
+
return retval[0..strsize-1]
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class RGFA::ByteArray
|
|
17
|
+
include BFA::FourBitSequence::Decode
|
|
18
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
require_relative "../four_bit_sequence"
|
|
2
|
+
|
|
3
|
+
module BFA::FourBitSequence::Encode
|
|
4
|
+
|
|
5
|
+
def to_4bits
|
|
6
|
+
retval = RGFA::ByteArray.new()
|
|
7
|
+
byte = nil
|
|
8
|
+
each_char do |char|
|
|
9
|
+
code = BFA::FourBitSequence::LETTER_TO_CODE[char.upcase]
|
|
10
|
+
code ||= 15
|
|
11
|
+
if byte.nil?
|
|
12
|
+
byte = (code << 4)
|
|
13
|
+
else
|
|
14
|
+
retval << (byte + code)
|
|
15
|
+
byte = nil
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
retval << byte if !byte.nil?
|
|
19
|
+
return retval
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
class String
|
|
25
|
+
include BFA::FourBitSequence::Encode
|
|
26
|
+
end
|
data/lib/bfa/reader.rb
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
require "rgfa"
|
|
2
|
+
require "zlib"
|
|
3
|
+
require_relative "constants"
|
|
4
|
+
require_relative "binary_cigar/decode"
|
|
5
|
+
require_relative "four_bit_sequence/decode"
|
|
6
|
+
|
|
7
|
+
class BFA::Reader
|
|
8
|
+
|
|
9
|
+
include BFA::Constants
|
|
10
|
+
|
|
11
|
+
# @see BFA::Writer to write BFA files.
|
|
12
|
+
def initialize(filename)
|
|
13
|
+
file = File.new(filename)
|
|
14
|
+
magic = file.read(2)
|
|
15
|
+
file.rewind
|
|
16
|
+
if magic.bytes == [31, 139]
|
|
17
|
+
@io = Zlib::GzipReader.new(file)
|
|
18
|
+
else
|
|
19
|
+
@io = file
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def close
|
|
24
|
+
@io.close
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @return [RGFA]
|
|
28
|
+
def parse
|
|
29
|
+
rgfa = RGFA.new
|
|
30
|
+
validate_magic_string!
|
|
31
|
+
parse_headers(rgfa)
|
|
32
|
+
parse_segments(rgfa)
|
|
33
|
+
parse_links(rgfa)
|
|
34
|
+
parse_containments(rgfa)
|
|
35
|
+
parse_paths(rgfa)
|
|
36
|
+
return rgfa
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.parse(filename)
|
|
40
|
+
br = self.new(filename)
|
|
41
|
+
rgfa = br.parse
|
|
42
|
+
br.close
|
|
43
|
+
return rgfa
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def validate_magic_string!
|
|
49
|
+
ms = @io.read(MAGIC_STRING.size)
|
|
50
|
+
unless ms == MAGIC_STRING
|
|
51
|
+
raise BFA::Reader::FormatError,
|
|
52
|
+
"Magic string not recognized (#{ms.inspect})"
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def parse_headers(rgfa)
|
|
57
|
+
n_optfields = parse_size
|
|
58
|
+
n_optfields.times do
|
|
59
|
+
n, t, v = parse_optfield
|
|
60
|
+
rgfa.header.add(n, v, t)
|
|
61
|
+
end
|
|
62
|
+
# <debug> "Headers data: #{rgfa.header}"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def parse_segments(rgfa)
|
|
66
|
+
n_segments = parse_size
|
|
67
|
+
# <debug> "N.segments: #{n_segments}"
|
|
68
|
+
n_segments.times do
|
|
69
|
+
parse_segment(rgfa)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def parse_optfields(line_data)
|
|
74
|
+
n_optfields = parse_size
|
|
75
|
+
# <debug> "N.optfields: #{n_optfields}"
|
|
76
|
+
n_optfields.times do
|
|
77
|
+
n, t, v = parse_optfield
|
|
78
|
+
line_data[n] = [v, t]
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def parse_segment(rgfa)
|
|
83
|
+
line_data = {}
|
|
84
|
+
line_data[:name] = parse_varlenstr.to_sym
|
|
85
|
+
line_data[:sequence] = parse_sequence
|
|
86
|
+
parse_optfields(line_data)
|
|
87
|
+
# <debug> "Segment data: #{line_data}"
|
|
88
|
+
segment = RGFA::Line::Segment.new(line_data)
|
|
89
|
+
rgfa << segment
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def parse_links(rgfa)
|
|
93
|
+
n_links = parse_size
|
|
94
|
+
# <debug> "N.links: #{n_links}"
|
|
95
|
+
n_links.times do
|
|
96
|
+
parse_edge(rgfa)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def parse_containments(rgfa)
|
|
101
|
+
n_containments = parse_size
|
|
102
|
+
# <debug> "N.containments: #{n_containments}"
|
|
103
|
+
n_containments.times do
|
|
104
|
+
parse_edge(rgfa, true)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def parse_edge(rgfa, containment=false)
|
|
109
|
+
line_data = {}
|
|
110
|
+
[:from, :to].each do |dir|
|
|
111
|
+
line_id = parse_numeric_value(:i)
|
|
112
|
+
line_data[:"#{dir}_orient"] = line_id > 0 ? :+ : :-
|
|
113
|
+
line_data[dir] = rgfa.segment_names[(line_id.abs)-1].to_sym
|
|
114
|
+
end
|
|
115
|
+
line_data[:overlap] = parse_cigar
|
|
116
|
+
if containment
|
|
117
|
+
line_data[:pos] = parse_numeric_value(:I)
|
|
118
|
+
end
|
|
119
|
+
parse_optfields(line_data)
|
|
120
|
+
# <debug> "Edge data: #{line_data}"
|
|
121
|
+
edge = containment ?
|
|
122
|
+
RGFA::Line::Containment.new(line_data) :
|
|
123
|
+
RGFA::Line::Link.new(line_data)
|
|
124
|
+
rgfa << edge
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def parse_paths(rgfa)
|
|
128
|
+
n_paths = parse_size
|
|
129
|
+
# <debug> "N.paths: #{n_paths}"
|
|
130
|
+
n_paths.times do
|
|
131
|
+
parse_path(rgfa)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def parse_path(rgfa)
|
|
136
|
+
line_data = {}
|
|
137
|
+
line_data[:path_name] = parse_varlenstr.to_sym
|
|
138
|
+
n_links = parse_size
|
|
139
|
+
circular = false
|
|
140
|
+
if n_links < 0
|
|
141
|
+
n_links = -n_links
|
|
142
|
+
circular = true
|
|
143
|
+
end
|
|
144
|
+
line_data[:segment_names] = []
|
|
145
|
+
line_data[:cigars] = []
|
|
146
|
+
n_links.times do |i|
|
|
147
|
+
line_id = parse_numeric_value(:i)
|
|
148
|
+
reverse_link = line_id < 0
|
|
149
|
+
link = rgfa.links[line_id.abs-1]
|
|
150
|
+
link = link.reverse if reverse_link
|
|
151
|
+
if line_data[:segment_names].empty?
|
|
152
|
+
line_data[:segment_names] <<
|
|
153
|
+
[link.from, link.from_orient].to_oriented_segment
|
|
154
|
+
end
|
|
155
|
+
if !circular or i < (n_links-1)
|
|
156
|
+
line_data[:segment_names] <<
|
|
157
|
+
[link.to, link.to_orient].to_oriented_segment
|
|
158
|
+
end
|
|
159
|
+
line_data[:cigars] << link.overlap
|
|
160
|
+
end
|
|
161
|
+
parse_optfields(line_data)
|
|
162
|
+
# <debug> "Path data: #{line_data}"
|
|
163
|
+
rgfa << RGFA::Line::Path.new(line_data)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def parse_optfield
|
|
167
|
+
fieldname = parse_fixlenstr(2).to_sym
|
|
168
|
+
datatype = parse_fixlenstr(1).to_sym
|
|
169
|
+
value = parse_data_item(datatype.to_sym)
|
|
170
|
+
datatype = :i if INTEGER_DATATYPES.include?(datatype)
|
|
171
|
+
# <debug> "Optfield #{fieldname}:#{datatype}:#{value.inspect}"
|
|
172
|
+
return fieldname, datatype, value
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def parse_data_item(datatype)
|
|
176
|
+
# <assert> datatype.kind_of?(Symbol)
|
|
177
|
+
case datatype
|
|
178
|
+
when :A
|
|
179
|
+
parse_fixlenstr(1)
|
|
180
|
+
when :Z
|
|
181
|
+
parse_cstr
|
|
182
|
+
when :J
|
|
183
|
+
parse_cstr
|
|
184
|
+
when :i, :I, :c, :C, :s, :S, :f
|
|
185
|
+
parse_numeric_value(datatype)
|
|
186
|
+
when :B
|
|
187
|
+
parse_numeric_array
|
|
188
|
+
when :H
|
|
189
|
+
parse_byte_array
|
|
190
|
+
else
|
|
191
|
+
# <assert> false # this should be impossible
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def parse_sequence
|
|
196
|
+
seqsize = parse_size
|
|
197
|
+
if seqsize == 0
|
|
198
|
+
return "*"
|
|
199
|
+
else
|
|
200
|
+
n_values = (seqsize.to_f/2).ceil
|
|
201
|
+
parse_values(NUMERIC_SIZE[:C], NUMERIC_TEMPLATE_CODE[:C],
|
|
202
|
+
n_values).to_byte_array.parse_4bits(seqsize)
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def parse_cigar
|
|
207
|
+
cigar = parse_numeric_values(:I)
|
|
208
|
+
if cigar.empty?
|
|
209
|
+
return "*"
|
|
210
|
+
else
|
|
211
|
+
return cigar.map(&:parse_binary_cigar)
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def parse_numeric_array
|
|
216
|
+
st = parse_fixlenstr(1).to_sym
|
|
217
|
+
parse_numeric_values(st).to_numeric_array
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def parse_byte_array
|
|
221
|
+
parse_numeric_values(:C).to_byte_array
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def parse_numeric_value(val_type)
|
|
225
|
+
# <assert> NUMERIC_SIZE.has_key?(val_type)
|
|
226
|
+
# <assert> NUMERIC_TEMPLATE_CODE.has_key?(val_type)
|
|
227
|
+
parse_value(NUMERIC_SIZE[val_type],
|
|
228
|
+
NUMERIC_TEMPLATE_CODE[val_type])
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def parse_numeric_values(val_type)
|
|
232
|
+
# <assert> NUMERIC_SIZE.has_key?(val_type)
|
|
233
|
+
# <assert> NUMERIC_TEMPLATE_CODE.has_key?(val_type)
|
|
234
|
+
asize = parse_size
|
|
235
|
+
if asize == 0
|
|
236
|
+
return []
|
|
237
|
+
else
|
|
238
|
+
parse_values(NUMERIC_SIZE[val_type],
|
|
239
|
+
NUMERIC_TEMPLATE_CODE[val_type], asize)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def parse_varlenstr
|
|
244
|
+
strsize = parse_size
|
|
245
|
+
parse_fixlenstr(strsize)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def parse_size
|
|
249
|
+
s = parse_value(SIZEOF_SIZE, SIZEOF_TEMPLATE_CODE)
|
|
250
|
+
return s
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def parse_fixlenstr(len)
|
|
254
|
+
read!(len)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def parse_cstr
|
|
258
|
+
str = ""
|
|
259
|
+
loop do
|
|
260
|
+
c = @io.getc
|
|
261
|
+
if c.nil?
|
|
262
|
+
raise BFA::Reader::FormatError
|
|
263
|
+
elsif c == "\0"
|
|
264
|
+
return str
|
|
265
|
+
else
|
|
266
|
+
str << c
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def parse_value(val_size, val_template_code)
|
|
272
|
+
read!(val_size).unpack(val_template_code)[0]
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def parse_values(val_size, val_template_code, numfields)
|
|
276
|
+
read!(val_size*numfields).unpack(val_template_code+numfields.to_s)
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def read!(val_size)
|
|
280
|
+
str = @io.read(val_size)
|
|
281
|
+
if str.nil? or str.size < val_size
|
|
282
|
+
raise BFA::Reader::FormatError
|
|
283
|
+
end
|
|
284
|
+
return str
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
require_relative "reader/format_error"
|
data/lib/bfa/writer.rb
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
require "rgfa"
|
|
2
|
+
require "zlib"
|
|
3
|
+
require_relative "constants"
|
|
4
|
+
require_relative "binary_cigar/encode"
|
|
5
|
+
require_relative "four_bit_sequence/encode"
|
|
6
|
+
|
|
7
|
+
class BFA::Writer
|
|
8
|
+
|
|
9
|
+
include BFA::Constants
|
|
10
|
+
|
|
11
|
+
# @return [RGFATools::BFAWriter]
|
|
12
|
+
def initialize(filename, compressed=true)
|
|
13
|
+
@template = ""
|
|
14
|
+
@data = []
|
|
15
|
+
file = File.new(filename, "w")
|
|
16
|
+
@io = compressed ? Zlib::GzipWriter.new(file) : file
|
|
17
|
+
@io.print BFA::Constants::MAGIC_STRING
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def encode(rgfa)
|
|
21
|
+
add_headers(rgfa)
|
|
22
|
+
add_segments(rgfa)
|
|
23
|
+
add_links(rgfa)
|
|
24
|
+
add_containments(rgfa)
|
|
25
|
+
add_paths(rgfa)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def close
|
|
29
|
+
@io.close
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.encode(filename, rgfa, compressed=true)
|
|
33
|
+
bw = self.new(filename, compressed)
|
|
34
|
+
bw.encode(rgfa)
|
|
35
|
+
bw.close
|
|
36
|
+
return nil
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def add_headers(rgfa)
|
|
42
|
+
headers_array = rgfa.header.tags
|
|
43
|
+
add_size_of(headers_array)
|
|
44
|
+
headers_array.each do |fieldname, val_type, value|
|
|
45
|
+
add_optfield(fieldname, value, val_type)
|
|
46
|
+
end
|
|
47
|
+
write_data
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def add_optfields(rgfa_line)
|
|
51
|
+
add_size_of(rgfa_line.optional_fieldnames)
|
|
52
|
+
rgfa_line.optional_fieldnames.each do |of|
|
|
53
|
+
add_optfield(of, rgfa_line.get(of), rgfa_line.get_datatype(of))
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def add_segments(rgfa)
|
|
58
|
+
add_size_of(rgfa.segment_names)
|
|
59
|
+
rgfa.segment_names.each_with_index do |segment_name, i|
|
|
60
|
+
s = rgfa.segment!(segment_name)
|
|
61
|
+
add_varlenstr(segment_name)
|
|
62
|
+
add_sequence(s.sequence)
|
|
63
|
+
add_optfields(s)
|
|
64
|
+
write_data
|
|
65
|
+
s.line_id = i
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def add_containments(rgfa)
|
|
70
|
+
add_edges(rgfa, true)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def add_links(rgfa)
|
|
74
|
+
add_edges(rgfa, false)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def add_edges(rgfa, containments=false)
|
|
78
|
+
add_size_of(containments ? rgfa.containments : rgfa.links)
|
|
79
|
+
sn = rgfa.segment_names
|
|
80
|
+
link_id = 0
|
|
81
|
+
sn.each_with_index do |segment_name, segment_id|
|
|
82
|
+
if containments
|
|
83
|
+
edges = rgfa.contained_in(segment_name)
|
|
84
|
+
else
|
|
85
|
+
edges = [:+, :-].map do |orientation|
|
|
86
|
+
rgfa.links_from([segment_name, orientation], false)
|
|
87
|
+
end.flatten
|
|
88
|
+
end
|
|
89
|
+
edges.each do |edge|
|
|
90
|
+
dir_id = {:from => segment_id,
|
|
91
|
+
:to => rgfa.segment(edge.to).line_id}
|
|
92
|
+
[:from, :to].each do |dir|
|
|
93
|
+
dir_id[dir] += 1
|
|
94
|
+
dir_id[dir] = -dir_id[dir] if edge.get(:"#{dir}_orient") == :-
|
|
95
|
+
add_numeric_value(:i, dir_id[dir])
|
|
96
|
+
end
|
|
97
|
+
add_cigar(edge.overlap)
|
|
98
|
+
add_numeric_value(:I, edge.pos) if containments
|
|
99
|
+
add_optfields(edge)
|
|
100
|
+
write_data
|
|
101
|
+
if not containments
|
|
102
|
+
edge.line_id = link_id
|
|
103
|
+
link_id += 1
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def add_paths(rgfa)
|
|
110
|
+
add_size_of(rgfa.paths)
|
|
111
|
+
rgfa.paths.each do |path|
|
|
112
|
+
add_varlenstr(path.path_name)
|
|
113
|
+
links = path.links
|
|
114
|
+
# <debug> "Path links: #{links.inspect}"
|
|
115
|
+
n_links = links.size
|
|
116
|
+
n_links = -n_links if path.circular?
|
|
117
|
+
add_numeric_value(:i, n_links)
|
|
118
|
+
link_ids = links.map do |link, link_or|
|
|
119
|
+
line_id = link.line_id + 1
|
|
120
|
+
# <debug> "line_id: #{line_id.inspect}"
|
|
121
|
+
# <debug> "link_or: #{link_or.inspect}"
|
|
122
|
+
link_or ? line_id : -line_id
|
|
123
|
+
end
|
|
124
|
+
# <debug> "link ids: #{link_ids.inspect}"
|
|
125
|
+
add_numeric_values(:i, link_ids, false)
|
|
126
|
+
add_optfields(path)
|
|
127
|
+
write_data
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Add an optional field to the record
|
|
132
|
+
def add_optfield(fieldname, value, val_type)
|
|
133
|
+
val_type ||= value.gfa_datatype
|
|
134
|
+
add_fixlenstr(fieldname)
|
|
135
|
+
add_fixlenstr(val_type)
|
|
136
|
+
add_data_item(val_type, value)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def write_data
|
|
140
|
+
@io.print(@data.pack(@template))
|
|
141
|
+
@template = ""
|
|
142
|
+
@data = []
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Add a value to the record
|
|
146
|
+
# @param value [Object|String] a ruby object or its rgfa datastring
|
|
147
|
+
# representation
|
|
148
|
+
# @param datatype [RGFA::Line::FIELD_DATATYPE] the datatype of the data
|
|
149
|
+
# @return [void]
|
|
150
|
+
def add_data_item(datatype, value)
|
|
151
|
+
# <assert> RGFA::Line::FIELD_DATATYPE.include?(datatype)
|
|
152
|
+
case datatype
|
|
153
|
+
when :A
|
|
154
|
+
add_fixlenstr(value)
|
|
155
|
+
when :Z
|
|
156
|
+
add_cstr(value)
|
|
157
|
+
when :J
|
|
158
|
+
value = value.to_gfa_datastring(:J) if value.kind_of?(String)
|
|
159
|
+
add_cstr(value)
|
|
160
|
+
when :i
|
|
161
|
+
add_int(value)
|
|
162
|
+
when :f
|
|
163
|
+
add_double(value)
|
|
164
|
+
when :B
|
|
165
|
+
add_numeric_array(value)
|
|
166
|
+
when :H
|
|
167
|
+
add_byte_array(value)
|
|
168
|
+
end
|
|
169
|
+
return nil
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def add_sequence(seq)
|
|
173
|
+
if seq == "*"
|
|
174
|
+
add_numeric_value(:I, 0)
|
|
175
|
+
else
|
|
176
|
+
add_size_of(seq)
|
|
177
|
+
add_values(NUMERIC_TEMPLATE_CODE[:C], seq.to_4bits)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def add_cigar(cigar)
|
|
182
|
+
cigar = cigar.to_cigar
|
|
183
|
+
if cigar.empty?
|
|
184
|
+
add_numeric_value(:I, 0)
|
|
185
|
+
else
|
|
186
|
+
add_numeric_values(:I, cigar.map(&:to_binary))
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def add_int(int)
|
|
191
|
+
int = Integer(int)
|
|
192
|
+
int_type = RGFA::NumericArray.integer_type(int..int)
|
|
193
|
+
replace_fixlenstr(int_type)
|
|
194
|
+
add_numeric_value(RGFA::NumericArray.integer_type(int..int).to_sym, int)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def add_double(float)
|
|
198
|
+
add_numeric_value(:f, Float(float))
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def add_numeric_array(array)
|
|
202
|
+
array = array.parse_datastring(:B) if array.kind_of?(String)
|
|
203
|
+
array = array.to_numeric_array
|
|
204
|
+
st = array.compute_subtype
|
|
205
|
+
add_fixlenstr(st)
|
|
206
|
+
add_numeric_values(st.to_sym, array)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def add_byte_array(array)
|
|
210
|
+
array = array.parse_datastring(:H) if array.kind_of?(String)
|
|
211
|
+
array = array.to_byte_array
|
|
212
|
+
add_numeric_values(:C, array)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def add_fixlenstr(string)
|
|
216
|
+
add_string(string)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def replace_fixlenstr(string)
|
|
220
|
+
# <assert> @data.last.size == string.size
|
|
221
|
+
@data.last.replace(string)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def add_cstr(string)
|
|
225
|
+
# <assert> string.kind_of?(String) or string.kind_of?(Symbol)
|
|
226
|
+
add_string(string.to_s + "\0")
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def add_varlenstr(string)
|
|
230
|
+
# <assert> string.kind_of?(String) or string.kind_of?(Symbol)
|
|
231
|
+
add_size_of(string.to_s)
|
|
232
|
+
add_string(string)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def add_string(string)
|
|
236
|
+
# <assert> string.kind_of?(String) or string.kind_of?(Symbol)
|
|
237
|
+
# <assert> string.size > 0
|
|
238
|
+
string = string.to_s
|
|
239
|
+
add_value("Z#{string.size}", string)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def add_numeric_value(val_type, number)
|
|
243
|
+
# <assert> NUMERIC_TEMPLATE_CODE.has_key?(val_type)
|
|
244
|
+
# <assert> number.kind_of?(Numeric)
|
|
245
|
+
add_value(NUMERIC_TEMPLATE_CODE[val_type], number)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def add_numeric_values(val_type, array, with_size = true)
|
|
249
|
+
# <assert> NUMERIC_TEMPLATE_CODE.has_key?(val_type)
|
|
250
|
+
# <assert> array.kind_of?(Array)
|
|
251
|
+
# <assert> array.each? {|e| e.kind_of?(Numeric)}
|
|
252
|
+
add_size_of(array) if with_size
|
|
253
|
+
add_values(NUMERIC_TEMPLATE_CODE[val_type], array)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def add_size_of(object)
|
|
257
|
+
# <assert> object.kind_of?(Array) or object.kind_of?(String)
|
|
258
|
+
add_value(SIZEOF_TEMPLATE_CODE, object.size)
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def add_value(template, value)
|
|
262
|
+
# <assert> value.kind_of?(String) or value.kind_of?(Numeric)
|
|
263
|
+
@template << template
|
|
264
|
+
@data << value
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def add_values(template, array)
|
|
268
|
+
# <assert> array.kind_of?(Array)
|
|
269
|
+
@template += (template + array.size.to_s)
|
|
270
|
+
@data += array
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: bfa
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: '1.1'
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Giorgio Gonnella
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2016-07-22 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: |2
|
|
14
|
+
The Graphical Fragment Assembly (GFA) is a proposed format which allow
|
|
15
|
+
to describe the product of sequence assembly and is implemented in the
|
|
16
|
+
RGFA class defined in the rgfa gem.
|
|
17
|
+
|
|
18
|
+
The GFA format is a text format. This gem defines a complementary binary
|
|
19
|
+
format, BFA. The methods in this class allow to write a BFA file from a
|
|
20
|
+
RGFA object, and to parse a BFA file into a RGFA object. This also allows
|
|
21
|
+
the conversion from/to GFA format.
|
|
22
|
+
|
|
23
|
+
This gem depends on the "rgfa" gem.
|
|
24
|
+
email: gonnella@zbh.uni-hamburg.de
|
|
25
|
+
executables: []
|
|
26
|
+
extensions: []
|
|
27
|
+
extra_rdoc_files: []
|
|
28
|
+
files:
|
|
29
|
+
- lib/bfa.rb
|
|
30
|
+
- lib/bfa/binary_cigar.rb
|
|
31
|
+
- lib/bfa/binary_cigar/decode.rb
|
|
32
|
+
- lib/bfa/binary_cigar/encode.rb
|
|
33
|
+
- lib/bfa/constants.rb
|
|
34
|
+
- lib/bfa/reader.rb
|
|
35
|
+
- lib/bfa/reader/format_error.rb
|
|
36
|
+
- lib/bfa/error.rb
|
|
37
|
+
- lib/bfa/four_bit_sequence.rb
|
|
38
|
+
- lib/bfa/four_bit_sequence/decode.rb
|
|
39
|
+
- lib/bfa/four_bit_sequence/encode.rb
|
|
40
|
+
- lib/bfa/writer.rb
|
|
41
|
+
homepage: http://github.com/ggonnella/bfa
|
|
42
|
+
licenses:
|
|
43
|
+
- CC-BY-SA
|
|
44
|
+
metadata: {}
|
|
45
|
+
post_install_message:
|
|
46
|
+
rdoc_options: []
|
|
47
|
+
require_paths:
|
|
48
|
+
- lib
|
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '2.0'
|
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
55
|
+
requirements:
|
|
56
|
+
- - ">="
|
|
57
|
+
- !ruby/object:Gem::Version
|
|
58
|
+
version: '0'
|
|
59
|
+
requirements: []
|
|
60
|
+
rubyforge_project:
|
|
61
|
+
rubygems_version: 2.0.3
|
|
62
|
+
signing_key:
|
|
63
|
+
specification_version: 4
|
|
64
|
+
summary: Write and parse the BFA format in Ruby
|
|
65
|
+
test_files: []
|
|
66
|
+
has_rdoc:
|