rgfa 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/gfadiff.rb +420 -0
- data/bin/rgfa-findcrisprs.rb +208 -0
- data/bin/rgfa-mergelinear.rb +14 -0
- data/bin/rgfa-simdebruijn.rb +86 -0
- data/lib/rgfa.rb +376 -0
- data/lib/rgfa/byte_array.rb +74 -0
- data/lib/rgfa/cigar.rb +157 -0
- data/lib/rgfa/connectivity.rb +131 -0
- data/lib/rgfa/containments.rb +97 -0
- data/lib/rgfa/error.rb +3 -0
- data/lib/rgfa/field_array.rb +87 -0
- data/lib/rgfa/field_parser.rb +109 -0
- data/lib/rgfa/field_validator.rb +241 -0
- data/lib/rgfa/field_writer.rb +108 -0
- data/lib/rgfa/headers.rb +76 -0
- data/lib/rgfa/line.rb +721 -0
- data/lib/rgfa/line/containment.rb +87 -0
- data/lib/rgfa/line/header.rb +92 -0
- data/lib/rgfa/line/link.rb +379 -0
- data/lib/rgfa/line/path.rb +106 -0
- data/lib/rgfa/line/segment.rb +209 -0
- data/lib/rgfa/linear_paths.rb +285 -0
- data/lib/rgfa/lines.rb +155 -0
- data/lib/rgfa/links.rb +242 -0
- data/lib/rgfa/logger.rb +192 -0
- data/lib/rgfa/multiplication.rb +156 -0
- data/lib/rgfa/numeric_array.rb +196 -0
- data/lib/rgfa/paths.rb +98 -0
- data/lib/rgfa/rgl.rb +194 -0
- data/lib/rgfa/segment_ends_path.rb +9 -0
- data/lib/rgfa/segment_info.rb +162 -0
- data/lib/rgfa/segments.rb +99 -0
- data/lib/rgfa/sequence.rb +65 -0
- data/lib/rgfatools.rb +102 -0
- data/lib/rgfatools/artifacts.rb +29 -0
- data/lib/rgfatools/copy_number.rb +126 -0
- data/lib/rgfatools/invertible_segments.rb +104 -0
- data/lib/rgfatools/linear_paths.rb +140 -0
- data/lib/rgfatools/multiplication.rb +194 -0
- data/lib/rgfatools/p_bubbles.rb +66 -0
- data/lib/rgfatools/superfluous_links.rb +64 -0
- metadata +97 -0
data/lib/rgfa/headers.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative "error"
|
2
|
+
require_relative "field_array"
|
3
|
+
|
4
|
+
# Methods for accessing the GFA header information.
|
5
|
+
#
|
6
|
+
# The GFA header is accessed using {#header RGFA#header},
|
7
|
+
# which returns a {RGFA::Line::Header} object.
|
8
|
+
#
|
9
|
+
# @example Accessing the header information
|
10
|
+
# rgfa.header.VN # => “1.0”
|
11
|
+
# rgfa.header.co = “This the header comment”
|
12
|
+
# rgfa.header.ni = 100
|
13
|
+
# rgfa.header.field_to_s(:ni) # => “ni:i:100”
|
14
|
+
#
|
15
|
+
# == Multiple header lines defining the same tag
|
16
|
+
#
|
17
|
+
# The specification does not explicitely forbid to have the same tag on
|
18
|
+
# different lines. To represent this case, a "field array"
|
19
|
+
# ({RGFA::FieldArray RGFA::FieldArray}) is used, which is an array of
|
20
|
+
# instances of a tag, from different lines of the header.
|
21
|
+
#
|
22
|
+
# @example Header with tags repeated on different lines (see {RGFA::FieldArray})
|
23
|
+
# rgfa.header.ni # => RGFA::FieldArray<[100,200] @datatype: :i>
|
24
|
+
# rgfa.header.ni[0] # 100
|
25
|
+
# rgfa.header.ni << 200 # “200” is also OK
|
26
|
+
# rgfa.header.ni.map!{|i|i-10}
|
27
|
+
# rgfa.header.ni = [100,200,300].to_rgfa_field_array
|
28
|
+
#
|
29
|
+
# @example Adding instances of a tag (will go on different header lines)
|
30
|
+
# rgfa.header.add(:xx, 100) # => 100 # single i tag, if .xx did not exist yet
|
31
|
+
# rgfa.header.add(:xx, 100) # => RGFA::FieldArray<[100,100] @datatype: :i>
|
32
|
+
# rgfa.header.add(:xx, 100) # => RGFA::FieldArray<[100,100,100] @datatype :i>
|
33
|
+
#
|
34
|
+
module RGFA::Headers
|
35
|
+
|
36
|
+
# @return [RGFA::Line::Header] an header line representing the entire header
|
37
|
+
# information; if multiple header line were present, and they contain the
|
38
|
+
# same tag, the tag value is represented by a {RGFA::FieldArray}
|
39
|
+
def header
|
40
|
+
@headers
|
41
|
+
end
|
42
|
+
|
43
|
+
# Header information in single-tag-lines.
|
44
|
+
#
|
45
|
+
# Returns an array of RGFA::Line::Header
|
46
|
+
# objects, each containing a single field of the header.
|
47
|
+
# @!macro readonly
|
48
|
+
# @note Read-only! The returned array containes copies of the original
|
49
|
+
# values, i.e.\ changes in the lines will not affect the RGFA object; to
|
50
|
+
# update the values in the RGFA use the #header method.
|
51
|
+
# @return [Array<RGFA::Line::Header>]
|
52
|
+
# @api private
|
53
|
+
def headers
|
54
|
+
@headers.split
|
55
|
+
end
|
56
|
+
|
57
|
+
# Remove all information from the header.
|
58
|
+
# @return [RGFA] self
|
59
|
+
# @api private
|
60
|
+
def delete_headers
|
61
|
+
init_headers
|
62
|
+
return self
|
63
|
+
end
|
64
|
+
|
65
|
+
protected
|
66
|
+
|
67
|
+
# Add a GFA line to the header. This is useful for constructing the graph.
|
68
|
+
# For adding values to the header, see #header.
|
69
|
+
# @param gfa_line [String, RGFA::Line::Header] a string representing a valid
|
70
|
+
# header line, or a RGFA header line object
|
71
|
+
def add_header(gfa_line)
|
72
|
+
gfa_line = gfa_line.to_rgfa_line(validate: @validate)
|
73
|
+
@headers.merge(gfa_line)
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
data/lib/rgfa/line.rb
ADDED
@@ -0,0 +1,721 @@
|
|
1
|
+
require "set"
|
2
|
+
#
|
3
|
+
# Generic representation of a record of a RGFA file.
|
4
|
+
#
|
5
|
+
# @!macro [new] rgfa_line
|
6
|
+
# @note
|
7
|
+
# This class is usually not meant to be directly initialized by the user;
|
8
|
+
# initialize instead one of its child classes, which define the concrete
|
9
|
+
# different record types.
|
10
|
+
#
|
11
|
+
class RGFA::Line
|
12
|
+
|
13
|
+
# Separator in the string representation of RGFA lines
|
14
|
+
SEPARATOR = "\t"
|
15
|
+
|
16
|
+
# List of allowed record_type values
|
17
|
+
RECORD_TYPES = [ :H, :S, :L, :C, :P ]
|
18
|
+
|
19
|
+
# Full name of the record types
|
20
|
+
RECORD_TYPE_LABELS = {
|
21
|
+
:H => "header",
|
22
|
+
:S => "segment",
|
23
|
+
:L => "link",
|
24
|
+
:C => "containment",
|
25
|
+
:P => "path",
|
26
|
+
}
|
27
|
+
|
28
|
+
# A symbol representing a datatype for optional fields
|
29
|
+
OPTFIELD_DATATYPE = [:A, :i, :f, :Z, :J, :H, :B]
|
30
|
+
|
31
|
+
# A symbol representing a datatype for required fields
|
32
|
+
REQFIELD_DATATYPE = [:lbl, :orn, :lbs, :seq, :pos, :cig, :cgs]
|
33
|
+
|
34
|
+
# A symbol representing a valid datatype
|
35
|
+
FIELD_DATATYPE = OPTFIELD_DATATYPE + REQFIELD_DATATYPE
|
36
|
+
|
37
|
+
# List of data types which are parsed only on access;
|
38
|
+
# all other are parsed when read.
|
39
|
+
DELAYED_PARSING_DATATYPES = [:cig, :cgs, :lbs, :H, :J, :B]
|
40
|
+
|
41
|
+
# Direction of a segment for links/containments
|
42
|
+
DIRECTION = [:from, :to]
|
43
|
+
|
44
|
+
# Orientation of segments in paths/links/containments
|
45
|
+
ORIENTATION = [:+, :-]
|
46
|
+
|
47
|
+
# @!macro rgfa_line
|
48
|
+
#
|
49
|
+
# @param data [Array<String>] the content of the line; if
|
50
|
+
# an array of strings, this is interpreted as the splitted content
|
51
|
+
# of a GFA file line; note: an hash
|
52
|
+
# is also allowed, but this is for internal usage and shall be considered
|
53
|
+
# private
|
54
|
+
# @param validate [Integer] see paragraph Validation
|
55
|
+
# @param virtual [Boolean] <i>(default: +false+)</i>
|
56
|
+
# mark the line as virtual, i.e. not yet found in the GFA file;
|
57
|
+
# e.g. a link is allowed to refer to a segment which is not
|
58
|
+
# yet created; in this case a segment marked as virtual is created,
|
59
|
+
# which is replaced by a non-virtual segment, when the segment
|
60
|
+
# line is later found
|
61
|
+
#
|
62
|
+
# <b> Constants defined by subclasses </b>
|
63
|
+
#
|
64
|
+
# Subclasses of RGFA::Line _must_ define the following constants:
|
65
|
+
# - RECORD_TYPE [RGFA::Line::RECORD_TYPES]
|
66
|
+
# - REQFIELDS [Array<Symbol>] required fields
|
67
|
+
# - PREDEFINED_OPTFIELDS [Array<Symbol>] predefined optional fields
|
68
|
+
# - DATATYPE [Hash{Symbol=>Symbol}]:
|
69
|
+
# datatypes for the required fields and the predefined optional fields
|
70
|
+
#
|
71
|
+
# @raise [RGFA::Line::RequiredFieldMissingError]
|
72
|
+
# if too less required fields are specified
|
73
|
+
# @raise [RGFA::Line::CustomOptfieldNameError]
|
74
|
+
# if a non-predefined optional field uses upcase letters
|
75
|
+
# @raise [RGFA::Line::DuplicatedOptfieldNameError]
|
76
|
+
# if an optional field tag name is used more than once
|
77
|
+
# @raise [RGFA::Line::PredefinedOptfieldTypeError]
|
78
|
+
# if the type of a predefined optional field does not
|
79
|
+
# respect the specified type.
|
80
|
+
#
|
81
|
+
# @return [RGFA::Line]
|
82
|
+
#
|
83
|
+
# <b>Validation levels</b>
|
84
|
+
#
|
85
|
+
# The default is 2, i.e. if a field content is changed, the user is
|
86
|
+
# responsible to call #validate_field!, if necessary.
|
87
|
+
#
|
88
|
+
# - 0: no validation
|
89
|
+
# - 1: the number of required fields must be correct; optional fields
|
90
|
+
# cannot be duplicated; custom optional field names must be correct;
|
91
|
+
# predefined optional fields must have the correct type; only some
|
92
|
+
# fields are validated on initialization or first-time access to
|
93
|
+
# the field content
|
94
|
+
# - 2: 1 + all fields are validated on initialization or first-time
|
95
|
+
# access to the field content
|
96
|
+
# - 3: 2 + all fields are validated on initialization and record-specific
|
97
|
+
# validations are run (e.g. compare segment LN tag and sequence lenght)
|
98
|
+
# - 4: 3 + all fields are validated on writing to string
|
99
|
+
# - 5: 4 + all fields are validated by get and set methods
|
100
|
+
#
|
101
|
+
def initialize(data, validate: 2, virtual: false)
|
102
|
+
unless self.class.const_defined?(:"RECORD_TYPE")
|
103
|
+
raise RuntimeError, "This class shall not be directly instantiated"
|
104
|
+
end
|
105
|
+
@validate = validate
|
106
|
+
@virtual = virtual
|
107
|
+
@datatype = {}
|
108
|
+
@data = {}
|
109
|
+
if data.kind_of?(Hash)
|
110
|
+
@data.merge!(data)
|
111
|
+
else
|
112
|
+
# normal initialization, data is an array of strings
|
113
|
+
initialize_required_fields(data)
|
114
|
+
initialize_optional_fields(data)
|
115
|
+
validate_record_type_specific_info! if @validate >= 3
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Select a subclass based on the record type
|
120
|
+
# @raise [RGFA::Line::UnknownRecordTypeError] if the record_type is not valid
|
121
|
+
# @return [Class] a subclass of RGFA::Line
|
122
|
+
def self.subclass(record_type)
|
123
|
+
case record_type.to_sym
|
124
|
+
when :H then RGFA::Line::Header
|
125
|
+
when :S then RGFA::Line::Segment
|
126
|
+
when :L then RGFA::Line::Link
|
127
|
+
when :C then RGFA::Line::Containment
|
128
|
+
when :P then RGFA::Line::Path
|
129
|
+
else
|
130
|
+
raise RGFA::Line::UnknownRecordTypeError,
|
131
|
+
"Record type unknown: '#{record_type}'"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# @return [Symbol] record type code
|
136
|
+
def record_type
|
137
|
+
self.class::RECORD_TYPE
|
138
|
+
end
|
139
|
+
|
140
|
+
# @return [Array<Symbol>] fields defined for this instance
|
141
|
+
def fieldnames
|
142
|
+
@data.keys
|
143
|
+
end
|
144
|
+
|
145
|
+
# @return [Array<Symbol>] name of the required fields
|
146
|
+
def required_fieldnames
|
147
|
+
self.class::REQFIELDS
|
148
|
+
end
|
149
|
+
|
150
|
+
# @return [Array<Symbol>] name of the optional fields
|
151
|
+
def optional_fieldnames
|
152
|
+
(@data.keys - self.class::REQFIELDS)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Deep copy of a RGFA::Line instance.
|
156
|
+
# @return [RGFA::Line]
|
157
|
+
def clone
|
158
|
+
data_cpy = {}
|
159
|
+
@data.each_pair do |k, v|
|
160
|
+
if field_datatype(k) == :J
|
161
|
+
data_cpy[k] = JSON.parse(v.to_json)
|
162
|
+
elsif v.kind_of?(Array) or v.kind_of?(String)
|
163
|
+
data_cpy[k] = v.clone
|
164
|
+
else
|
165
|
+
data_cpy[k] = v
|
166
|
+
end
|
167
|
+
end
|
168
|
+
cpy = self.class.new(data_cpy, validate: @validate, virtual: @virtual)
|
169
|
+
cpy.instance_variable_set("@datatype", @datatype.clone)
|
170
|
+
return cpy
|
171
|
+
end
|
172
|
+
|
173
|
+
# Is the line virtual?
|
174
|
+
#
|
175
|
+
# Is this RGFA::Line a virtual line repreentation
|
176
|
+
# (i.e. a placeholder for an expected but not encountered yet line)?
|
177
|
+
# @api private
|
178
|
+
# @return [Boolean]
|
179
|
+
def virtual?
|
180
|
+
@virtual
|
181
|
+
end
|
182
|
+
|
183
|
+
# Make a virtual line real.
|
184
|
+
# @api private
|
185
|
+
# This is called when a line which is expected, and for which a virtual
|
186
|
+
# line has been created, is finally found. So the line is converted into
|
187
|
+
# a real line, by merging in the line information from the found line.
|
188
|
+
# @param real_line [RGFA::Line] the real line fou
|
189
|
+
def real!(real_line)
|
190
|
+
@virtual = false
|
191
|
+
real_line.data.each_pair do |k, v|
|
192
|
+
@data[k] = v
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
# @return [String] a string representation of self
|
197
|
+
def to_s
|
198
|
+
to_a.join(SEPARATOR)
|
199
|
+
end
|
200
|
+
|
201
|
+
# @return [Array<String>] an array of string representations of the fields
|
202
|
+
def to_a
|
203
|
+
a = [record_type]
|
204
|
+
required_fieldnames.each {|fn| a << field_to_s(fn, optfield: false)}
|
205
|
+
optional_fieldnames.each {|fn| a << field_to_s(fn, optfield: true)}
|
206
|
+
return a
|
207
|
+
end
|
208
|
+
|
209
|
+
# Returns the optional fields as an array of [fieldname, datatype, value]
|
210
|
+
# arrays.
|
211
|
+
# @return [Array<[Symbol, Symbol, Object]>]
|
212
|
+
def tags
|
213
|
+
retval = []
|
214
|
+
optional_fieldnames.each do |of|
|
215
|
+
retval << [of, get_datatype(of), get(of)]
|
216
|
+
end
|
217
|
+
return retval
|
218
|
+
end
|
219
|
+
|
220
|
+
# Remove an optional field from the line, if it exists;
|
221
|
+
# do nothing if it does not
|
222
|
+
# @param fieldname [Symbol] the tag name of the optfield to remove
|
223
|
+
# @return [Object, nil] the deleted value or nil, if the field was not defined
|
224
|
+
def delete(fieldname)
|
225
|
+
if optional_fieldnames.include?(fieldname)
|
226
|
+
@datatype.delete(fieldname)
|
227
|
+
return @data.delete(fieldname)
|
228
|
+
else
|
229
|
+
return nil
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# Raises an error if the content of the field does not correspond to
|
234
|
+
# the field type
|
235
|
+
#
|
236
|
+
# @param fieldname [Symbol] the tag name of the field to validate
|
237
|
+
# @raise [RGFA::FieldParser::FormatError] if the content of the field is
|
238
|
+
# not valid, according to its required type
|
239
|
+
# @return [void]
|
240
|
+
def validate_field!(fieldname)
|
241
|
+
v = @data[fieldname]
|
242
|
+
t = field_or_default_datatype(fieldname, v)
|
243
|
+
v.validate_gfa_field!(t, fieldname)
|
244
|
+
return nil
|
245
|
+
end
|
246
|
+
|
247
|
+
# @!macro [new] field_to_s
|
248
|
+
# Compute the string representation of a field.
|
249
|
+
#
|
250
|
+
# @param fieldname [Symbol] the tag name of the field
|
251
|
+
# @param optfield [Boolean] <i>(defaults to: +false+)</i>
|
252
|
+
# return the tagname:datatype:value representation
|
253
|
+
#
|
254
|
+
# @raise [RGFA::Line::TagMissingError] if field is not defined
|
255
|
+
# @return [String] the string representation
|
256
|
+
def field_to_s(fieldname, optfield: false)
|
257
|
+
field = @data[fieldname]
|
258
|
+
raise RGFA::Line::TagMissingError,
|
259
|
+
"No value defined for tag #{fieldname}" if field.nil?
|
260
|
+
t = field_or_default_datatype(fieldname, field)
|
261
|
+
if !field.kind_of?(String)
|
262
|
+
field = field.to_gfa_field(datatype: t)
|
263
|
+
end
|
264
|
+
field.validate_gfa_field!(t, fieldname) if @validate >= 4
|
265
|
+
return optfield ? field.to_gfa_optfield(fieldname, datatype: t) : field
|
266
|
+
end
|
267
|
+
|
268
|
+
# Returns a symbol, which specifies the datatype of a field
|
269
|
+
#
|
270
|
+
# @param fieldname [Symbol] the tag name of the field
|
271
|
+
# @return [RGFA::Line::FIELD_DATATYPE] the datatype symbol
|
272
|
+
def get_datatype(fieldname)
|
273
|
+
field_or_default_datatype(fieldname, @data[fieldname])
|
274
|
+
end
|
275
|
+
|
276
|
+
# Set the datatype of a field.
|
277
|
+
#
|
278
|
+
# If an existing field datatype is changed, its content may become
|
279
|
+
# invalid (call #validate_field! if necessary).
|
280
|
+
#
|
281
|
+
# If the method is used for a required field or a predefined field,
|
282
|
+
# the line will use the specified datatype instead of the predefined
|
283
|
+
# one, resulting in a potentially invalid line.
|
284
|
+
#
|
285
|
+
# @param fieldname [Symbol] the field name (it is not required that
|
286
|
+
# the field exists already)
|
287
|
+
# @param datatype [RGFA::Line::FIELD_DATATYPE] the datatype
|
288
|
+
# @raise [RGFA::Line::UnknownDatatype] if +datatype+ is not
|
289
|
+
# a valid datatype for optional fields
|
290
|
+
# @return [RGFA::Line::FIELD_DATATYPE] the datatype
|
291
|
+
def set_datatype(fieldname, datatype)
|
292
|
+
unless OPTFIELD_DATATYPE.include?(datatype)
|
293
|
+
raise RGFA::Line::UnknownDatatype, "Unknown datatype: #{datatype}"
|
294
|
+
end
|
295
|
+
@datatype[fieldname] = datatype
|
296
|
+
end
|
297
|
+
|
298
|
+
# Set the value of a field.
|
299
|
+
#
|
300
|
+
# If a datatype for a new custom optional field is not set,
|
301
|
+
# the default for the value assigned to the field will be used
|
302
|
+
# (e.g. J for Hashes, i for Integer, etc).
|
303
|
+
#
|
304
|
+
# @param fieldname [Symbol] the name of the field to set
|
305
|
+
# (required field, predefined optional field (uppercase) or custom optional
|
306
|
+
# field name (lowercase))
|
307
|
+
# @raise [RGFA::Line::FieldnameError] if +fieldname+ is not a
|
308
|
+
# valid predefined or custom optional name (and +validate[:tags]+)
|
309
|
+
# @return [Object] +value+
|
310
|
+
def set(fieldname, value)
|
311
|
+
if @data.has_key?(fieldname) or predefined_optional_fieldname?(fieldname)
|
312
|
+
return set_existing_field(fieldname, value)
|
313
|
+
elsif (@validate == 0) or valid_custom_optional_fieldname?(fieldname)
|
314
|
+
define_field_methods(fieldname)
|
315
|
+
if !@datatype[fieldname].nil?
|
316
|
+
return set_existing_field(fieldname, value)
|
317
|
+
elsif !value.nil?
|
318
|
+
@datatype[fieldname] = value.default_gfa_datatype
|
319
|
+
return @data[fieldname] = value
|
320
|
+
end
|
321
|
+
else
|
322
|
+
raise RGFA::Line::FieldnameError,
|
323
|
+
"#{fieldname} is not an existing or predefined field or a "+
|
324
|
+
"valid custom optional field"
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
# Get the value of a field
|
329
|
+
# @param fieldname [Symbol] name of the field
|
330
|
+
# @param frozen [Boolean] <i>defaults to: +false+</i> return a frozen value;
|
331
|
+
# this guarantees that a validation will not be necessary on output
|
332
|
+
# if the field value has not been changed using #set
|
333
|
+
# @return [Object,nil] value of the field
|
334
|
+
# or +nil+ if field is not defined
|
335
|
+
def get(fieldname, frozen: false)
|
336
|
+
v = @data[fieldname]
|
337
|
+
if v.kind_of?(String)
|
338
|
+
t = field_datatype(fieldname)
|
339
|
+
if t != :Z and t != :seq
|
340
|
+
# value was not parsed or was set to a string by the user
|
341
|
+
return (@data[fieldname] = v.parse_gfa_field(datatype: t,
|
342
|
+
validate_strings:
|
343
|
+
@validate >= 2))
|
344
|
+
else
|
345
|
+
v.validate_gfa_field!(t, fieldname) if (@validate >= 5)
|
346
|
+
end
|
347
|
+
elsif !v.nil?
|
348
|
+
if (@validate >= 5)
|
349
|
+
t = field_datatype(fieldname)
|
350
|
+
v.validate_gfa_field!(t, fieldname)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
return v
|
354
|
+
end
|
355
|
+
|
356
|
+
# Value of a field, raising an exception if it is not defined
|
357
|
+
# @param fieldname [Symbol] name of the field
|
358
|
+
# @raise [RGFA::Line::TagMissingError] if field is not defined
|
359
|
+
# @return [Object,nil] value of the field
|
360
|
+
def get!(fieldname)
|
361
|
+
v = get(fieldname)
|
362
|
+
raise RGFA::Line::TagMissingError,
|
363
|
+
"No value defined for tag #{fieldname}" if v.nil?
|
364
|
+
return v
|
365
|
+
end
|
366
|
+
|
367
|
+
# Methods are dynamically created for non-existing but valid optional
|
368
|
+
# field names. Methods for predefined optional fields and required fields
|
369
|
+
# are created dynamically for each subclass; methods for existing optional
|
370
|
+
# fields are created on instance initialization.
|
371
|
+
#
|
372
|
+
# ---
|
373
|
+
# - (Object) <fieldname>(parse=true)
|
374
|
+
# The parsed content of a field. See also #get.
|
375
|
+
#
|
376
|
+
# <b>Parameters:</b>
|
377
|
+
#
|
378
|
+
# <b>Returns:</b>
|
379
|
+
# - (String, Hash, Array, Integer, Float) the parsed content of the field
|
380
|
+
# - (nil) if the field does not exist, but is a valid optional field name
|
381
|
+
#
|
382
|
+
# ---
|
383
|
+
# - (Object) <fieldname>!(parse=true)
|
384
|
+
# The parsed content of a field, raising an exception if not available.
|
385
|
+
# See also #get!.
|
386
|
+
#
|
387
|
+
# <b>Returns:</b>
|
388
|
+
# - (String, Hash, Array, Integer, Float) the parsed content of the field
|
389
|
+
#
|
390
|
+
# <b>Raises:</b>
|
391
|
+
# - (RGFA::Line::TagMissingError) if the field does not exist
|
392
|
+
#
|
393
|
+
# ---
|
394
|
+
#
|
395
|
+
# - (self) <fieldname>=(value)
|
396
|
+
# Sets the value of a required or optional
|
397
|
+
# field, or creates a new optional field if the fieldname is
|
398
|
+
# non-existing but valid. See also #set, #set_datatype.
|
399
|
+
#
|
400
|
+
# <b>Parameters:</b>
|
401
|
+
# - +*value*+ (String|Hash|Array|Integer|Float) value to set
|
402
|
+
#
|
403
|
+
# ---
|
404
|
+
#
|
405
|
+
def method_missing(m, *args, &block)
|
406
|
+
field_name, operation, state = split_method_name(m)
|
407
|
+
if ((operation == :get or operation == :get!) and args.size > 1) or
|
408
|
+
(operation == :set and args.size != 1)
|
409
|
+
raise ArgumentError, "wrong number of arguments"
|
410
|
+
end
|
411
|
+
case state
|
412
|
+
when :invalid
|
413
|
+
super
|
414
|
+
when :existing
|
415
|
+
case operation
|
416
|
+
when :get
|
417
|
+
if args[0] == false
|
418
|
+
field_to_s(field_name)
|
419
|
+
else
|
420
|
+
get(field_name)
|
421
|
+
end
|
422
|
+
when :get!
|
423
|
+
if args[0] == false
|
424
|
+
field_to_s!(field_name)
|
425
|
+
else
|
426
|
+
get!(field_name)
|
427
|
+
end
|
428
|
+
when :set
|
429
|
+
set_existing_field(field_name, args[0])
|
430
|
+
return nil
|
431
|
+
end
|
432
|
+
when :valid
|
433
|
+
case operation
|
434
|
+
when :get
|
435
|
+
return nil
|
436
|
+
when :get!
|
437
|
+
raise RGFA::Line::TagMissingError,
|
438
|
+
"No value defined for tag #{field_name}"
|
439
|
+
when :set
|
440
|
+
set(field_name, args[0])
|
441
|
+
return nil
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
# Redefines respond_to? to correctly handle dynamical methods.
|
447
|
+
# @see #method_missing
|
448
|
+
def respond_to?(m, include_all=false)
|
449
|
+
super || (split_method_name(m)[2] != :invalid)
|
450
|
+
end
|
451
|
+
|
452
|
+
# @return self
|
453
|
+
# @param validate [Boolean] ignored (compatibility reasons)
|
454
|
+
def to_rgfa_line(validate: nil)
|
455
|
+
self
|
456
|
+
end
|
457
|
+
|
458
|
+
# Equivalence check
|
459
|
+
# @return [Boolean] does the line has the same record type,
|
460
|
+
# contains the same optional fields
|
461
|
+
# and all required and optional fields contain the same field values?
|
462
|
+
# @see RGFA::Line::Link#==
|
463
|
+
def ==(o)
|
464
|
+
return self.to_sym == o.to_sym if o.kind_of?(Symbol)
|
465
|
+
return false if (o.record_type != self.record_type)
|
466
|
+
return false if o.data.keys.sort != data.keys.sort
|
467
|
+
o.data.each do |k, v|
|
468
|
+
if @data[k] != o.data[k]
|
469
|
+
if field_to_s(k) != o.field_to_s(k)
|
470
|
+
return false
|
471
|
+
end
|
472
|
+
end
|
473
|
+
end
|
474
|
+
return true
|
475
|
+
end
|
476
|
+
|
477
|
+
# Validate the RGFA::Line instance
|
478
|
+
# @raise [RGFA::FieldParser::FormatError] if any field content is not valid
|
479
|
+
# @return [void]
|
480
|
+
def validate!
|
481
|
+
fieldnames.each {|fieldname| validate_field!(fieldname) }
|
482
|
+
validate_record_type_specific_info!
|
483
|
+
end
|
484
|
+
|
485
|
+
protected
|
486
|
+
|
487
|
+
def data
|
488
|
+
@data
|
489
|
+
end
|
490
|
+
|
491
|
+
def datatype
|
492
|
+
@datatype
|
493
|
+
end
|
494
|
+
|
495
|
+
private
|
496
|
+
|
497
|
+
def n_required_fields
|
498
|
+
self.class::REQFIELDS.size
|
499
|
+
end
|
500
|
+
|
501
|
+
def field_datatype(fieldname)
|
502
|
+
@datatype.fetch(fieldname, self.class::DATATYPE[fieldname])
|
503
|
+
end
|
504
|
+
|
505
|
+
def field_or_default_datatype(fieldname, value)
|
506
|
+
t = field_datatype(fieldname)
|
507
|
+
if t.nil?
|
508
|
+
t = value.default_gfa_datatype
|
509
|
+
@datatype[fieldname] = t
|
510
|
+
end
|
511
|
+
return t
|
512
|
+
end
|
513
|
+
|
514
|
+
def init_field_value(n ,t, s)
|
515
|
+
if @validate >= 3
|
516
|
+
s = s.parse_gfa_field(datatype: t, validate_strings: true)
|
517
|
+
elsif !DELAYED_PARSING_DATATYPES.include?(t)
|
518
|
+
s = s.parse_gfa_field(datatype: t, validate_strings: false)
|
519
|
+
end
|
520
|
+
@data[n] = s
|
521
|
+
end
|
522
|
+
|
523
|
+
def set_existing_field(fieldname, value)
|
524
|
+
if value.nil?
|
525
|
+
@data.delete(fieldname)
|
526
|
+
else
|
527
|
+
if @validate >= 5
|
528
|
+
field_or_default_datatype(fieldname, value)
|
529
|
+
value.validate_gfa_field!(field_datatype(fieldname), fieldname)
|
530
|
+
end
|
531
|
+
@data[fieldname] = value
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
def initialize_required_fields(strings)
|
536
|
+
if (@validate >= 1) and (strings.size < n_required_fields)
|
537
|
+
raise RGFA::Line::RequiredFieldMissingError,
|
538
|
+
"#{n_required_fields} required fields expected, "+
|
539
|
+
"#{strings.size}) found\n#{strings.inspect}"
|
540
|
+
end
|
541
|
+
n_required_fields.times do |i|
|
542
|
+
n = self.class::REQFIELDS[i]
|
543
|
+
init_field_value(n, self.class::DATATYPE[n], strings[i])
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
def valid_custom_optional_fieldname?(fieldname)
|
548
|
+
/^[a-z][a-z0-9]$/ =~ fieldname
|
549
|
+
end
|
550
|
+
|
551
|
+
def validate_custom_optional_fieldname!(fieldname)
|
552
|
+
if not valid_custom_optional_fieldname?(fieldname)
|
553
|
+
raise RGFA::Line::CustomOptfieldNameError,
|
554
|
+
"#{fieldname} is not a valid custom optional field name"
|
555
|
+
end
|
556
|
+
end
|
557
|
+
|
558
|
+
def predefined_optional_fieldname?(fieldname)
|
559
|
+
self.class::PREDEFINED_OPTFIELDS.include?(fieldname)
|
560
|
+
end
|
561
|
+
|
562
|
+
def initialize_optional_fields(strings)
|
563
|
+
n_required_fields.upto(strings.size-1) do |i|
|
564
|
+
n, t, s = strings[i].parse_gfa_optfield
|
565
|
+
if (@validate > 0)
|
566
|
+
if @data.has_key?(n)
|
567
|
+
raise RGFA::Line::DuplicatedOptfieldNameError,
|
568
|
+
"Optional field #{n} found multiple times"
|
569
|
+
elsif predefined_optional_fieldname?(n)
|
570
|
+
unless t == self.class::DATATYPE[n]
|
571
|
+
raise RGFA::Line::PredefinedOptfieldTypeError,
|
572
|
+
"Optional field #{n} must be of type "+
|
573
|
+
"#{self.class::DATATYPE[n]}, #{t} found"
|
574
|
+
end
|
575
|
+
elsif not valid_custom_optional_fieldname?(n)
|
576
|
+
raise RGFA::Line::CustomOptfieldNameError,
|
577
|
+
"Custom-defined optional "+
|
578
|
+
"fields must be lower case; found: #{n}"
|
579
|
+
else
|
580
|
+
@datatype[n] = t
|
581
|
+
end
|
582
|
+
else
|
583
|
+
(@datatype[n] = t) if !field_datatype(t)
|
584
|
+
end
|
585
|
+
init_field_value(n, t, s)
|
586
|
+
end
|
587
|
+
end
|
588
|
+
|
589
|
+
def split_method_name(m)
|
590
|
+
if @data.has_key?(m)
|
591
|
+
return m, :get, :existing
|
592
|
+
else
|
593
|
+
case m[-1]
|
594
|
+
when "!"
|
595
|
+
var = :get!
|
596
|
+
m = m[0..-2].to_sym
|
597
|
+
when "="
|
598
|
+
var = :set
|
599
|
+
m = m[0..-2].to_sym
|
600
|
+
else
|
601
|
+
var = :get
|
602
|
+
end
|
603
|
+
if @data.has_key?(m)
|
604
|
+
state = :existing
|
605
|
+
elsif self.class::PREDEFINED_OPTFIELDS.include?(m) or
|
606
|
+
valid_custom_optional_fieldname?(m)
|
607
|
+
state = :valid
|
608
|
+
else
|
609
|
+
state = :invalid
|
610
|
+
end
|
611
|
+
return m, var, state
|
612
|
+
end
|
613
|
+
end
|
614
|
+
|
615
|
+
def validate_record_type_specific_info!
|
616
|
+
end
|
617
|
+
|
618
|
+
#
|
619
|
+
# Define field methods for a single field
|
620
|
+
#
|
621
|
+
def define_field_methods(fieldname)
|
622
|
+
define_singleton_method(fieldname) do
|
623
|
+
get(fieldname)
|
624
|
+
end
|
625
|
+
define_singleton_method :"#{fieldname}!" do
|
626
|
+
get!(fieldname)
|
627
|
+
end
|
628
|
+
define_singleton_method :"#{fieldname}=" do |value|
|
629
|
+
set_existing_field(fieldname, value)
|
630
|
+
end
|
631
|
+
end
|
632
|
+
|
633
|
+
#
|
634
|
+
# This avoids calls to method_missing for fields which are already defined
|
635
|
+
#
|
636
|
+
def self.define_field_methods!
|
637
|
+
(self::REQFIELDS+self::PREDEFINED_OPTFIELDS).each do |fieldname|
|
638
|
+
define_method(fieldname) do
|
639
|
+
get(fieldname)
|
640
|
+
end
|
641
|
+
define_method :"#{fieldname}!" do
|
642
|
+
get!(fieldname)
|
643
|
+
end
|
644
|
+
define_method :"#{fieldname}=" do |value|
|
645
|
+
set_existing_field(fieldname, value)
|
646
|
+
end
|
647
|
+
end
|
648
|
+
end
|
649
|
+
private_class_method :define_field_methods!
|
650
|
+
|
651
|
+
end
|
652
|
+
|
653
|
+
# Error raised if the record_type is not one of RGFA::Line::RECORD_TYPES
|
654
|
+
class RGFA::Line::UnknownRecordTypeError < RGFA::Error; end
|
655
|
+
|
656
|
+
# Error raised if an invalid datatype symbol is found
|
657
|
+
class RGFA::Line::UnknownDatatype < RGFA::Error; end
|
658
|
+
|
659
|
+
# Error raised if an invalid fieldname symbol is found
|
660
|
+
class RGFA::Line::FieldnameError < RGFA::Error; end
|
661
|
+
|
662
|
+
# Error raised if optional tag is not present
|
663
|
+
class RGFA::Line::TagMissingError < RGFA::Error; end
|
664
|
+
|
665
|
+
# Error raised if too less required fields are specified.
|
666
|
+
class RGFA::Line::RequiredFieldMissingError < RGFA::Error; end
|
667
|
+
|
668
|
+
# Error raised if a non-predefined optional field uses upcase
|
669
|
+
# letters.
|
670
|
+
class RGFA::Line::CustomOptfieldNameError < RGFA::Error; end
|
671
|
+
|
672
|
+
# Error raised if an optional field tag name is used more than once.
|
673
|
+
class RGFA::Line::DuplicatedOptfieldNameError < RGFA::Error; end
|
674
|
+
|
675
|
+
# Error raised if the type of a predefined optional field does not
|
676
|
+
# respect the specified type.
|
677
|
+
class RGFA::Line::PredefinedOptfieldTypeError < RGFA::Error; end
|
678
|
+
|
679
|
+
#
|
680
|
+
# Require the child classes
|
681
|
+
#
|
682
|
+
require_relative "line/header.rb"
|
683
|
+
require_relative "line/segment.rb"
|
684
|
+
require_relative "line/path.rb"
|
685
|
+
require_relative "line/link.rb"
|
686
|
+
require_relative "line/containment.rb"
|
687
|
+
|
688
|
+
# Extensions to the String core class.
|
689
|
+
#
|
690
|
+
class String
|
691
|
+
|
692
|
+
# Parses a line of a RGFA file and creates an object of the correct
|
693
|
+
# record type child class of {RGFA::Line}
|
694
|
+
# @return [subclass of RGFA::Line]
|
695
|
+
# @raise [RGFA::Error] if the fields do not comply to the RGFA specification
|
696
|
+
# @param validate [Integer] <i>(defaults to: 2)</i>
|
697
|
+
# see RGFA::Line#initialize
|
698
|
+
def to_rgfa_line(validate: 2)
|
699
|
+
split(RGFA::Line::SEPARATOR).to_rgfa_line(validate: validate)
|
700
|
+
end
|
701
|
+
|
702
|
+
end
|
703
|
+
|
704
|
+
# Extensions to the Array core class.
|
705
|
+
#
|
706
|
+
class Array
|
707
|
+
|
708
|
+
# Parses an array containing the fields of a RGFA file line and creates an
|
709
|
+
# object of the correct record type child class of {RGFA::Line}
|
710
|
+
# @note
|
711
|
+
# This method modifies the content of the array; if you still
|
712
|
+
# need the array, you must create a copy before calling it
|
713
|
+
# @return [subclass of RGFA::Line]
|
714
|
+
# @raise [RGFA::Error] if the fields do not comply to the RGFA specification
|
715
|
+
# @param validate [Integer] <i>(defaults to: 2)</i>
|
716
|
+
# see RGFA::Line#initialize
|
717
|
+
def to_rgfa_line(validate: 2)
|
718
|
+
RGFA::Line.subclass(shift).new(self, validate: validate)
|
719
|
+
end
|
720
|
+
|
721
|
+
end
|