rgfa 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/bin/gfadiff.rb +420 -0
  3. data/bin/rgfa-findcrisprs.rb +208 -0
  4. data/bin/rgfa-mergelinear.rb +14 -0
  5. data/bin/rgfa-simdebruijn.rb +86 -0
  6. data/lib/rgfa.rb +376 -0
  7. data/lib/rgfa/byte_array.rb +74 -0
  8. data/lib/rgfa/cigar.rb +157 -0
  9. data/lib/rgfa/connectivity.rb +131 -0
  10. data/lib/rgfa/containments.rb +97 -0
  11. data/lib/rgfa/error.rb +3 -0
  12. data/lib/rgfa/field_array.rb +87 -0
  13. data/lib/rgfa/field_parser.rb +109 -0
  14. data/lib/rgfa/field_validator.rb +241 -0
  15. data/lib/rgfa/field_writer.rb +108 -0
  16. data/lib/rgfa/headers.rb +76 -0
  17. data/lib/rgfa/line.rb +721 -0
  18. data/lib/rgfa/line/containment.rb +87 -0
  19. data/lib/rgfa/line/header.rb +92 -0
  20. data/lib/rgfa/line/link.rb +379 -0
  21. data/lib/rgfa/line/path.rb +106 -0
  22. data/lib/rgfa/line/segment.rb +209 -0
  23. data/lib/rgfa/linear_paths.rb +285 -0
  24. data/lib/rgfa/lines.rb +155 -0
  25. data/lib/rgfa/links.rb +242 -0
  26. data/lib/rgfa/logger.rb +192 -0
  27. data/lib/rgfa/multiplication.rb +156 -0
  28. data/lib/rgfa/numeric_array.rb +196 -0
  29. data/lib/rgfa/paths.rb +98 -0
  30. data/lib/rgfa/rgl.rb +194 -0
  31. data/lib/rgfa/segment_ends_path.rb +9 -0
  32. data/lib/rgfa/segment_info.rb +162 -0
  33. data/lib/rgfa/segments.rb +99 -0
  34. data/lib/rgfa/sequence.rb +65 -0
  35. data/lib/rgfatools.rb +102 -0
  36. data/lib/rgfatools/artifacts.rb +29 -0
  37. data/lib/rgfatools/copy_number.rb +126 -0
  38. data/lib/rgfatools/invertible_segments.rb +104 -0
  39. data/lib/rgfatools/linear_paths.rb +140 -0
  40. data/lib/rgfatools/multiplication.rb +194 -0
  41. data/lib/rgfatools/p_bubbles.rb +66 -0
  42. data/lib/rgfatools/superfluous_links.rb +64 -0
  43. metadata +97 -0
@@ -0,0 +1,76 @@
1
+ require_relative "error"
2
+ require_relative "field_array"
3
+
4
+ # Methods for accessing the GFA header information.
5
+ #
6
+ # The GFA header is accessed using {#header RGFA#header},
7
+ # which returns a {RGFA::Line::Header} object.
8
+ #
9
+ # @example Accessing the header information
10
+ # rgfa.header.VN # => “1.0”
11
+ # rgfa.header.co = “This the header comment”
12
+ # rgfa.header.ni = 100
13
+ # rgfa.header.field_to_s(:ni) # => “ni:i:100”
14
+ #
15
+ # == Multiple header lines defining the same tag
16
+ #
17
+ # The specification does not explicitely forbid to have the same tag on
18
+ # different lines. To represent this case, a "field array"
19
+ # ({RGFA::FieldArray RGFA::FieldArray}) is used, which is an array of
20
+ # instances of a tag, from different lines of the header.
21
+ #
22
+ # @example Header with tags repeated on different lines (see {RGFA::FieldArray})
23
+ # rgfa.header.ni # => RGFA::FieldArray<[100,200] @datatype: :i>
24
+ # rgfa.header.ni[0] # 100
25
+ # rgfa.header.ni << 200 # “200” is also OK
26
+ # rgfa.header.ni.map!{|i|i-10}
27
+ # rgfa.header.ni = [100,200,300].to_rgfa_field_array
28
+ #
29
+ # @example Adding instances of a tag (will go on different header lines)
30
+ # rgfa.header.add(:xx, 100) # => 100 # single i tag, if .xx did not exist yet
31
+ # rgfa.header.add(:xx, 100) # => RGFA::FieldArray<[100,100] @datatype: :i>
32
+ # rgfa.header.add(:xx, 100) # => RGFA::FieldArray<[100,100,100] @datatype :i>
33
+ #
34
+ module RGFA::Headers
35
+
36
+ # @return [RGFA::Line::Header] an header line representing the entire header
37
+ # information; if multiple header line were present, and they contain the
38
+ # same tag, the tag value is represented by a {RGFA::FieldArray}
39
+ def header
40
+ @headers
41
+ end
42
+
43
+ # Header information in single-tag-lines.
44
+ #
45
+ # Returns an array of RGFA::Line::Header
46
+ # objects, each containing a single field of the header.
47
+ # @!macro readonly
48
+ # @note Read-only! The returned array containes copies of the original
49
+ # values, i.e.\ changes in the lines will not affect the RGFA object; to
50
+ # update the values in the RGFA use the #header method.
51
+ # @return [Array<RGFA::Line::Header>]
52
+ # @api private
53
+ def headers
54
+ @headers.split
55
+ end
56
+
57
+ # Remove all information from the header.
58
+ # @return [RGFA] self
59
+ # @api private
60
+ def delete_headers
61
+ init_headers
62
+ return self
63
+ end
64
+
65
+ protected
66
+
67
+ # Add a GFA line to the header. This is useful for constructing the graph.
68
+ # For adding values to the header, see #header.
69
+ # @param gfa_line [String, RGFA::Line::Header] a string representing a valid
70
+ # header line, or a RGFA header line object
71
+ def add_header(gfa_line)
72
+ gfa_line = gfa_line.to_rgfa_line(validate: @validate)
73
+ @headers.merge(gfa_line)
74
+ end
75
+
76
+ end
@@ -0,0 +1,721 @@
1
+ require "set"
2
+ #
3
+ # Generic representation of a record of a RGFA file.
4
+ #
5
+ # @!macro [new] rgfa_line
6
+ # @note
7
+ # This class is usually not meant to be directly initialized by the user;
8
+ # initialize instead one of its child classes, which define the concrete
9
+ # different record types.
10
+ #
11
+ class RGFA::Line
12
+
13
+ # Separator in the string representation of RGFA lines
14
+ SEPARATOR = "\t"
15
+
16
+ # List of allowed record_type values
17
+ RECORD_TYPES = [ :H, :S, :L, :C, :P ]
18
+
19
+ # Full name of the record types
20
+ RECORD_TYPE_LABELS = {
21
+ :H => "header",
22
+ :S => "segment",
23
+ :L => "link",
24
+ :C => "containment",
25
+ :P => "path",
26
+ }
27
+
28
+ # A symbol representing a datatype for optional fields
29
+ OPTFIELD_DATATYPE = [:A, :i, :f, :Z, :J, :H, :B]
30
+
31
+ # A symbol representing a datatype for required fields
32
+ REQFIELD_DATATYPE = [:lbl, :orn, :lbs, :seq, :pos, :cig, :cgs]
33
+
34
+ # A symbol representing a valid datatype
35
+ FIELD_DATATYPE = OPTFIELD_DATATYPE + REQFIELD_DATATYPE
36
+
37
+ # List of data types which are parsed only on access;
38
+ # all other are parsed when read.
39
+ DELAYED_PARSING_DATATYPES = [:cig, :cgs, :lbs, :H, :J, :B]
40
+
41
+ # Direction of a segment for links/containments
42
+ DIRECTION = [:from, :to]
43
+
44
+ # Orientation of segments in paths/links/containments
45
+ ORIENTATION = [:+, :-]
46
+
47
+ # @!macro rgfa_line
48
+ #
49
+ # @param data [Array<String>] the content of the line; if
50
+ # an array of strings, this is interpreted as the splitted content
51
+ # of a GFA file line; note: an hash
52
+ # is also allowed, but this is for internal usage and shall be considered
53
+ # private
54
+ # @param validate [Integer] see paragraph Validation
55
+ # @param virtual [Boolean] <i>(default: +false+)</i>
56
+ # mark the line as virtual, i.e. not yet found in the GFA file;
57
+ # e.g. a link is allowed to refer to a segment which is not
58
+ # yet created; in this case a segment marked as virtual is created,
59
+ # which is replaced by a non-virtual segment, when the segment
60
+ # line is later found
61
+ #
62
+ # <b> Constants defined by subclasses </b>
63
+ #
64
+ # Subclasses of RGFA::Line _must_ define the following constants:
65
+ # - RECORD_TYPE [RGFA::Line::RECORD_TYPES]
66
+ # - REQFIELDS [Array<Symbol>] required fields
67
+ # - PREDEFINED_OPTFIELDS [Array<Symbol>] predefined optional fields
68
+ # - DATATYPE [Hash{Symbol=>Symbol}]:
69
+ # datatypes for the required fields and the predefined optional fields
70
+ #
71
+ # @raise [RGFA::Line::RequiredFieldMissingError]
72
+ # if too less required fields are specified
73
+ # @raise [RGFA::Line::CustomOptfieldNameError]
74
+ # if a non-predefined optional field uses upcase letters
75
+ # @raise [RGFA::Line::DuplicatedOptfieldNameError]
76
+ # if an optional field tag name is used more than once
77
+ # @raise [RGFA::Line::PredefinedOptfieldTypeError]
78
+ # if the type of a predefined optional field does not
79
+ # respect the specified type.
80
+ #
81
+ # @return [RGFA::Line]
82
+ #
83
+ # <b>Validation levels</b>
84
+ #
85
+ # The default is 2, i.e. if a field content is changed, the user is
86
+ # responsible to call #validate_field!, if necessary.
87
+ #
88
+ # - 0: no validation
89
+ # - 1: the number of required fields must be correct; optional fields
90
+ # cannot be duplicated; custom optional field names must be correct;
91
+ # predefined optional fields must have the correct type; only some
92
+ # fields are validated on initialization or first-time access to
93
+ # the field content
94
+ # - 2: 1 + all fields are validated on initialization or first-time
95
+ # access to the field content
96
+ # - 3: 2 + all fields are validated on initialization and record-specific
97
+ # validations are run (e.g. compare segment LN tag and sequence lenght)
98
+ # - 4: 3 + all fields are validated on writing to string
99
+ # - 5: 4 + all fields are validated by get and set methods
100
+ #
101
+ def initialize(data, validate: 2, virtual: false)
102
+ unless self.class.const_defined?(:"RECORD_TYPE")
103
+ raise RuntimeError, "This class shall not be directly instantiated"
104
+ end
105
+ @validate = validate
106
+ @virtual = virtual
107
+ @datatype = {}
108
+ @data = {}
109
+ if data.kind_of?(Hash)
110
+ @data.merge!(data)
111
+ else
112
+ # normal initialization, data is an array of strings
113
+ initialize_required_fields(data)
114
+ initialize_optional_fields(data)
115
+ validate_record_type_specific_info! if @validate >= 3
116
+ end
117
+ end
118
+
119
+ # Select a subclass based on the record type
120
+ # @raise [RGFA::Line::UnknownRecordTypeError] if the record_type is not valid
121
+ # @return [Class] a subclass of RGFA::Line
122
+ def self.subclass(record_type)
123
+ case record_type.to_sym
124
+ when :H then RGFA::Line::Header
125
+ when :S then RGFA::Line::Segment
126
+ when :L then RGFA::Line::Link
127
+ when :C then RGFA::Line::Containment
128
+ when :P then RGFA::Line::Path
129
+ else
130
+ raise RGFA::Line::UnknownRecordTypeError,
131
+ "Record type unknown: '#{record_type}'"
132
+ end
133
+ end
134
+
135
+ # @return [Symbol] record type code
136
+ def record_type
137
+ self.class::RECORD_TYPE
138
+ end
139
+
140
+ # @return [Array<Symbol>] fields defined for this instance
141
+ def fieldnames
142
+ @data.keys
143
+ end
144
+
145
+ # @return [Array<Symbol>] name of the required fields
146
+ def required_fieldnames
147
+ self.class::REQFIELDS
148
+ end
149
+
150
+ # @return [Array<Symbol>] name of the optional fields
151
+ def optional_fieldnames
152
+ (@data.keys - self.class::REQFIELDS)
153
+ end
154
+
155
+ # Deep copy of a RGFA::Line instance.
156
+ # @return [RGFA::Line]
157
+ def clone
158
+ data_cpy = {}
159
+ @data.each_pair do |k, v|
160
+ if field_datatype(k) == :J
161
+ data_cpy[k] = JSON.parse(v.to_json)
162
+ elsif v.kind_of?(Array) or v.kind_of?(String)
163
+ data_cpy[k] = v.clone
164
+ else
165
+ data_cpy[k] = v
166
+ end
167
+ end
168
+ cpy = self.class.new(data_cpy, validate: @validate, virtual: @virtual)
169
+ cpy.instance_variable_set("@datatype", @datatype.clone)
170
+ return cpy
171
+ end
172
+
173
+ # Is the line virtual?
174
+ #
175
+ # Is this RGFA::Line a virtual line repreentation
176
+ # (i.e. a placeholder for an expected but not encountered yet line)?
177
+ # @api private
178
+ # @return [Boolean]
179
+ def virtual?
180
+ @virtual
181
+ end
182
+
183
+ # Make a virtual line real.
184
+ # @api private
185
+ # This is called when a line which is expected, and for which a virtual
186
+ # line has been created, is finally found. So the line is converted into
187
+ # a real line, by merging in the line information from the found line.
188
+ # @param real_line [RGFA::Line] the real line fou
189
+ def real!(real_line)
190
+ @virtual = false
191
+ real_line.data.each_pair do |k, v|
192
+ @data[k] = v
193
+ end
194
+ end
195
+
196
+ # @return [String] a string representation of self
197
+ def to_s
198
+ to_a.join(SEPARATOR)
199
+ end
200
+
201
+ # @return [Array<String>] an array of string representations of the fields
202
+ def to_a
203
+ a = [record_type]
204
+ required_fieldnames.each {|fn| a << field_to_s(fn, optfield: false)}
205
+ optional_fieldnames.each {|fn| a << field_to_s(fn, optfield: true)}
206
+ return a
207
+ end
208
+
209
+ # Returns the optional fields as an array of [fieldname, datatype, value]
210
+ # arrays.
211
+ # @return [Array<[Symbol, Symbol, Object]>]
212
+ def tags
213
+ retval = []
214
+ optional_fieldnames.each do |of|
215
+ retval << [of, get_datatype(of), get(of)]
216
+ end
217
+ return retval
218
+ end
219
+
220
+ # Remove an optional field from the line, if it exists;
221
+ # do nothing if it does not
222
+ # @param fieldname [Symbol] the tag name of the optfield to remove
223
+ # @return [Object, nil] the deleted value or nil, if the field was not defined
224
+ def delete(fieldname)
225
+ if optional_fieldnames.include?(fieldname)
226
+ @datatype.delete(fieldname)
227
+ return @data.delete(fieldname)
228
+ else
229
+ return nil
230
+ end
231
+ end
232
+
233
+ # Raises an error if the content of the field does not correspond to
234
+ # the field type
235
+ #
236
+ # @param fieldname [Symbol] the tag name of the field to validate
237
+ # @raise [RGFA::FieldParser::FormatError] if the content of the field is
238
+ # not valid, according to its required type
239
+ # @return [void]
240
+ def validate_field!(fieldname)
241
+ v = @data[fieldname]
242
+ t = field_or_default_datatype(fieldname, v)
243
+ v.validate_gfa_field!(t, fieldname)
244
+ return nil
245
+ end
246
+
247
+ # @!macro [new] field_to_s
248
+ # Compute the string representation of a field.
249
+ #
250
+ # @param fieldname [Symbol] the tag name of the field
251
+ # @param optfield [Boolean] <i>(defaults to: +false+)</i>
252
+ # return the tagname:datatype:value representation
253
+ #
254
+ # @raise [RGFA::Line::TagMissingError] if field is not defined
255
+ # @return [String] the string representation
256
+ def field_to_s(fieldname, optfield: false)
257
+ field = @data[fieldname]
258
+ raise RGFA::Line::TagMissingError,
259
+ "No value defined for tag #{fieldname}" if field.nil?
260
+ t = field_or_default_datatype(fieldname, field)
261
+ if !field.kind_of?(String)
262
+ field = field.to_gfa_field(datatype: t)
263
+ end
264
+ field.validate_gfa_field!(t, fieldname) if @validate >= 4
265
+ return optfield ? field.to_gfa_optfield(fieldname, datatype: t) : field
266
+ end
267
+
268
+ # Returns a symbol, which specifies the datatype of a field
269
+ #
270
+ # @param fieldname [Symbol] the tag name of the field
271
+ # @return [RGFA::Line::FIELD_DATATYPE] the datatype symbol
272
+ def get_datatype(fieldname)
273
+ field_or_default_datatype(fieldname, @data[fieldname])
274
+ end
275
+
276
+ # Set the datatype of a field.
277
+ #
278
+ # If an existing field datatype is changed, its content may become
279
+ # invalid (call #validate_field! if necessary).
280
+ #
281
+ # If the method is used for a required field or a predefined field,
282
+ # the line will use the specified datatype instead of the predefined
283
+ # one, resulting in a potentially invalid line.
284
+ #
285
+ # @param fieldname [Symbol] the field name (it is not required that
286
+ # the field exists already)
287
+ # @param datatype [RGFA::Line::FIELD_DATATYPE] the datatype
288
+ # @raise [RGFA::Line::UnknownDatatype] if +datatype+ is not
289
+ # a valid datatype for optional fields
290
+ # @return [RGFA::Line::FIELD_DATATYPE] the datatype
291
+ def set_datatype(fieldname, datatype)
292
+ unless OPTFIELD_DATATYPE.include?(datatype)
293
+ raise RGFA::Line::UnknownDatatype, "Unknown datatype: #{datatype}"
294
+ end
295
+ @datatype[fieldname] = datatype
296
+ end
297
+
298
+ # Set the value of a field.
299
+ #
300
+ # If a datatype for a new custom optional field is not set,
301
+ # the default for the value assigned to the field will be used
302
+ # (e.g. J for Hashes, i for Integer, etc).
303
+ #
304
+ # @param fieldname [Symbol] the name of the field to set
305
+ # (required field, predefined optional field (uppercase) or custom optional
306
+ # field name (lowercase))
307
+ # @raise [RGFA::Line::FieldnameError] if +fieldname+ is not a
308
+ # valid predefined or custom optional name (and +validate[:tags]+)
309
+ # @return [Object] +value+
310
+ def set(fieldname, value)
311
+ if @data.has_key?(fieldname) or predefined_optional_fieldname?(fieldname)
312
+ return set_existing_field(fieldname, value)
313
+ elsif (@validate == 0) or valid_custom_optional_fieldname?(fieldname)
314
+ define_field_methods(fieldname)
315
+ if !@datatype[fieldname].nil?
316
+ return set_existing_field(fieldname, value)
317
+ elsif !value.nil?
318
+ @datatype[fieldname] = value.default_gfa_datatype
319
+ return @data[fieldname] = value
320
+ end
321
+ else
322
+ raise RGFA::Line::FieldnameError,
323
+ "#{fieldname} is not an existing or predefined field or a "+
324
+ "valid custom optional field"
325
+ end
326
+ end
327
+
328
+ # Get the value of a field
329
+ # @param fieldname [Symbol] name of the field
330
+ # @param frozen [Boolean] <i>defaults to: +false+</i> return a frozen value;
331
+ # this guarantees that a validation will not be necessary on output
332
+ # if the field value has not been changed using #set
333
+ # @return [Object,nil] value of the field
334
+ # or +nil+ if field is not defined
335
+ def get(fieldname, frozen: false)
336
+ v = @data[fieldname]
337
+ if v.kind_of?(String)
338
+ t = field_datatype(fieldname)
339
+ if t != :Z and t != :seq
340
+ # value was not parsed or was set to a string by the user
341
+ return (@data[fieldname] = v.parse_gfa_field(datatype: t,
342
+ validate_strings:
343
+ @validate >= 2))
344
+ else
345
+ v.validate_gfa_field!(t, fieldname) if (@validate >= 5)
346
+ end
347
+ elsif !v.nil?
348
+ if (@validate >= 5)
349
+ t = field_datatype(fieldname)
350
+ v.validate_gfa_field!(t, fieldname)
351
+ end
352
+ end
353
+ return v
354
+ end
355
+
356
+ # Value of a field, raising an exception if it is not defined
357
+ # @param fieldname [Symbol] name of the field
358
+ # @raise [RGFA::Line::TagMissingError] if field is not defined
359
+ # @return [Object,nil] value of the field
360
+ def get!(fieldname)
361
+ v = get(fieldname)
362
+ raise RGFA::Line::TagMissingError,
363
+ "No value defined for tag #{fieldname}" if v.nil?
364
+ return v
365
+ end
366
+
367
+ # Methods are dynamically created for non-existing but valid optional
368
+ # field names. Methods for predefined optional fields and required fields
369
+ # are created dynamically for each subclass; methods for existing optional
370
+ # fields are created on instance initialization.
371
+ #
372
+ # ---
373
+ # - (Object) <fieldname>(parse=true)
374
+ # The parsed content of a field. See also #get.
375
+ #
376
+ # <b>Parameters:</b>
377
+ #
378
+ # <b>Returns:</b>
379
+ # - (String, Hash, Array, Integer, Float) the parsed content of the field
380
+ # - (nil) if the field does not exist, but is a valid optional field name
381
+ #
382
+ # ---
383
+ # - (Object) <fieldname>!(parse=true)
384
+ # The parsed content of a field, raising an exception if not available.
385
+ # See also #get!.
386
+ #
387
+ # <b>Returns:</b>
388
+ # - (String, Hash, Array, Integer, Float) the parsed content of the field
389
+ #
390
+ # <b>Raises:</b>
391
+ # - (RGFA::Line::TagMissingError) if the field does not exist
392
+ #
393
+ # ---
394
+ #
395
+ # - (self) <fieldname>=(value)
396
+ # Sets the value of a required or optional
397
+ # field, or creates a new optional field if the fieldname is
398
+ # non-existing but valid. See also #set, #set_datatype.
399
+ #
400
+ # <b>Parameters:</b>
401
+ # - +*value*+ (String|Hash|Array|Integer|Float) value to set
402
+ #
403
+ # ---
404
+ #
405
+ def method_missing(m, *args, &block)
406
+ field_name, operation, state = split_method_name(m)
407
+ if ((operation == :get or operation == :get!) and args.size > 1) or
408
+ (operation == :set and args.size != 1)
409
+ raise ArgumentError, "wrong number of arguments"
410
+ end
411
+ case state
412
+ when :invalid
413
+ super
414
+ when :existing
415
+ case operation
416
+ when :get
417
+ if args[0] == false
418
+ field_to_s(field_name)
419
+ else
420
+ get(field_name)
421
+ end
422
+ when :get!
423
+ if args[0] == false
424
+ field_to_s!(field_name)
425
+ else
426
+ get!(field_name)
427
+ end
428
+ when :set
429
+ set_existing_field(field_name, args[0])
430
+ return nil
431
+ end
432
+ when :valid
433
+ case operation
434
+ when :get
435
+ return nil
436
+ when :get!
437
+ raise RGFA::Line::TagMissingError,
438
+ "No value defined for tag #{field_name}"
439
+ when :set
440
+ set(field_name, args[0])
441
+ return nil
442
+ end
443
+ end
444
+ end
445
+
446
+ # Redefines respond_to? to correctly handle dynamical methods.
447
+ # @see #method_missing
448
+ def respond_to?(m, include_all=false)
449
+ super || (split_method_name(m)[2] != :invalid)
450
+ end
451
+
452
+ # @return self
453
+ # @param validate [Boolean] ignored (compatibility reasons)
454
+ def to_rgfa_line(validate: nil)
455
+ self
456
+ end
457
+
458
+ # Equivalence check
459
+ # @return [Boolean] does the line has the same record type,
460
+ # contains the same optional fields
461
+ # and all required and optional fields contain the same field values?
462
+ # @see RGFA::Line::Link#==
463
+ def ==(o)
464
+ return self.to_sym == o.to_sym if o.kind_of?(Symbol)
465
+ return false if (o.record_type != self.record_type)
466
+ return false if o.data.keys.sort != data.keys.sort
467
+ o.data.each do |k, v|
468
+ if @data[k] != o.data[k]
469
+ if field_to_s(k) != o.field_to_s(k)
470
+ return false
471
+ end
472
+ end
473
+ end
474
+ return true
475
+ end
476
+
477
+ # Validate the RGFA::Line instance
478
+ # @raise [RGFA::FieldParser::FormatError] if any field content is not valid
479
+ # @return [void]
480
+ def validate!
481
+ fieldnames.each {|fieldname| validate_field!(fieldname) }
482
+ validate_record_type_specific_info!
483
+ end
484
+
485
+ protected
486
+
487
+ def data
488
+ @data
489
+ end
490
+
491
+ def datatype
492
+ @datatype
493
+ end
494
+
495
+ private
496
+
497
+ def n_required_fields
498
+ self.class::REQFIELDS.size
499
+ end
500
+
501
+ def field_datatype(fieldname)
502
+ @datatype.fetch(fieldname, self.class::DATATYPE[fieldname])
503
+ end
504
+
505
+ def field_or_default_datatype(fieldname, value)
506
+ t = field_datatype(fieldname)
507
+ if t.nil?
508
+ t = value.default_gfa_datatype
509
+ @datatype[fieldname] = t
510
+ end
511
+ return t
512
+ end
513
+
514
+ def init_field_value(n ,t, s)
515
+ if @validate >= 3
516
+ s = s.parse_gfa_field(datatype: t, validate_strings: true)
517
+ elsif !DELAYED_PARSING_DATATYPES.include?(t)
518
+ s = s.parse_gfa_field(datatype: t, validate_strings: false)
519
+ end
520
+ @data[n] = s
521
+ end
522
+
523
+ def set_existing_field(fieldname, value)
524
+ if value.nil?
525
+ @data.delete(fieldname)
526
+ else
527
+ if @validate >= 5
528
+ field_or_default_datatype(fieldname, value)
529
+ value.validate_gfa_field!(field_datatype(fieldname), fieldname)
530
+ end
531
+ @data[fieldname] = value
532
+ end
533
+ end
534
+
535
+ def initialize_required_fields(strings)
536
+ if (@validate >= 1) and (strings.size < n_required_fields)
537
+ raise RGFA::Line::RequiredFieldMissingError,
538
+ "#{n_required_fields} required fields expected, "+
539
+ "#{strings.size}) found\n#{strings.inspect}"
540
+ end
541
+ n_required_fields.times do |i|
542
+ n = self.class::REQFIELDS[i]
543
+ init_field_value(n, self.class::DATATYPE[n], strings[i])
544
+ end
545
+ end
546
+
547
+ def valid_custom_optional_fieldname?(fieldname)
548
+ /^[a-z][a-z0-9]$/ =~ fieldname
549
+ end
550
+
551
+ def validate_custom_optional_fieldname!(fieldname)
552
+ if not valid_custom_optional_fieldname?(fieldname)
553
+ raise RGFA::Line::CustomOptfieldNameError,
554
+ "#{fieldname} is not a valid custom optional field name"
555
+ end
556
+ end
557
+
558
+ def predefined_optional_fieldname?(fieldname)
559
+ self.class::PREDEFINED_OPTFIELDS.include?(fieldname)
560
+ end
561
+
562
+ def initialize_optional_fields(strings)
563
+ n_required_fields.upto(strings.size-1) do |i|
564
+ n, t, s = strings[i].parse_gfa_optfield
565
+ if (@validate > 0)
566
+ if @data.has_key?(n)
567
+ raise RGFA::Line::DuplicatedOptfieldNameError,
568
+ "Optional field #{n} found multiple times"
569
+ elsif predefined_optional_fieldname?(n)
570
+ unless t == self.class::DATATYPE[n]
571
+ raise RGFA::Line::PredefinedOptfieldTypeError,
572
+ "Optional field #{n} must be of type "+
573
+ "#{self.class::DATATYPE[n]}, #{t} found"
574
+ end
575
+ elsif not valid_custom_optional_fieldname?(n)
576
+ raise RGFA::Line::CustomOptfieldNameError,
577
+ "Custom-defined optional "+
578
+ "fields must be lower case; found: #{n}"
579
+ else
580
+ @datatype[n] = t
581
+ end
582
+ else
583
+ (@datatype[n] = t) if !field_datatype(t)
584
+ end
585
+ init_field_value(n, t, s)
586
+ end
587
+ end
588
+
589
+ def split_method_name(m)
590
+ if @data.has_key?(m)
591
+ return m, :get, :existing
592
+ else
593
+ case m[-1]
594
+ when "!"
595
+ var = :get!
596
+ m = m[0..-2].to_sym
597
+ when "="
598
+ var = :set
599
+ m = m[0..-2].to_sym
600
+ else
601
+ var = :get
602
+ end
603
+ if @data.has_key?(m)
604
+ state = :existing
605
+ elsif self.class::PREDEFINED_OPTFIELDS.include?(m) or
606
+ valid_custom_optional_fieldname?(m)
607
+ state = :valid
608
+ else
609
+ state = :invalid
610
+ end
611
+ return m, var, state
612
+ end
613
+ end
614
+
615
+ def validate_record_type_specific_info!
616
+ end
617
+
618
+ #
619
+ # Define field methods for a single field
620
+ #
621
+ def define_field_methods(fieldname)
622
+ define_singleton_method(fieldname) do
623
+ get(fieldname)
624
+ end
625
+ define_singleton_method :"#{fieldname}!" do
626
+ get!(fieldname)
627
+ end
628
+ define_singleton_method :"#{fieldname}=" do |value|
629
+ set_existing_field(fieldname, value)
630
+ end
631
+ end
632
+
633
+ #
634
+ # This avoids calls to method_missing for fields which are already defined
635
+ #
636
+ def self.define_field_methods!
637
+ (self::REQFIELDS+self::PREDEFINED_OPTFIELDS).each do |fieldname|
638
+ define_method(fieldname) do
639
+ get(fieldname)
640
+ end
641
+ define_method :"#{fieldname}!" do
642
+ get!(fieldname)
643
+ end
644
+ define_method :"#{fieldname}=" do |value|
645
+ set_existing_field(fieldname, value)
646
+ end
647
+ end
648
+ end
649
+ private_class_method :define_field_methods!
650
+
651
+ end
652
+
653
+ # Error raised if the record_type is not one of RGFA::Line::RECORD_TYPES
654
+ class RGFA::Line::UnknownRecordTypeError < RGFA::Error; end
655
+
656
+ # Error raised if an invalid datatype symbol is found
657
+ class RGFA::Line::UnknownDatatype < RGFA::Error; end
658
+
659
+ # Error raised if an invalid fieldname symbol is found
660
+ class RGFA::Line::FieldnameError < RGFA::Error; end
661
+
662
+ # Error raised if optional tag is not present
663
+ class RGFA::Line::TagMissingError < RGFA::Error; end
664
+
665
+ # Error raised if too less required fields are specified.
666
+ class RGFA::Line::RequiredFieldMissingError < RGFA::Error; end
667
+
668
+ # Error raised if a non-predefined optional field uses upcase
669
+ # letters.
670
+ class RGFA::Line::CustomOptfieldNameError < RGFA::Error; end
671
+
672
+ # Error raised if an optional field tag name is used more than once.
673
+ class RGFA::Line::DuplicatedOptfieldNameError < RGFA::Error; end
674
+
675
+ # Error raised if the type of a predefined optional field does not
676
+ # respect the specified type.
677
+ class RGFA::Line::PredefinedOptfieldTypeError < RGFA::Error; end
678
+
679
+ #
680
+ # Require the child classes
681
+ #
682
+ require_relative "line/header.rb"
683
+ require_relative "line/segment.rb"
684
+ require_relative "line/path.rb"
685
+ require_relative "line/link.rb"
686
+ require_relative "line/containment.rb"
687
+
688
+ # Extensions to the String core class.
689
+ #
690
+ class String
691
+
692
+ # Parses a line of a RGFA file and creates an object of the correct
693
+ # record type child class of {RGFA::Line}
694
+ # @return [subclass of RGFA::Line]
695
+ # @raise [RGFA::Error] if the fields do not comply to the RGFA specification
696
+ # @param validate [Integer] <i>(defaults to: 2)</i>
697
+ # see RGFA::Line#initialize
698
+ def to_rgfa_line(validate: 2)
699
+ split(RGFA::Line::SEPARATOR).to_rgfa_line(validate: validate)
700
+ end
701
+
702
+ end
703
+
704
+ # Extensions to the Array core class.
705
+ #
706
+ class Array
707
+
708
+ # Parses an array containing the fields of a RGFA file line and creates an
709
+ # object of the correct record type child class of {RGFA::Line}
710
+ # @note
711
+ # This method modifies the content of the array; if you still
712
+ # need the array, you must create a copy before calling it
713
+ # @return [subclass of RGFA::Line]
714
+ # @raise [RGFA::Error] if the fields do not comply to the RGFA specification
715
+ # @param validate [Integer] <i>(defaults to: 2)</i>
716
+ # see RGFA::Line#initialize
717
+ def to_rgfa_line(validate: 2)
718
+ RGFA::Line.subclass(shift).new(self, validate: validate)
719
+ end
720
+
721
+ end