rgfa 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/bin/gfadiff.rb +420 -0
  3. data/bin/rgfa-findcrisprs.rb +208 -0
  4. data/bin/rgfa-mergelinear.rb +14 -0
  5. data/bin/rgfa-simdebruijn.rb +86 -0
  6. data/lib/rgfa.rb +376 -0
  7. data/lib/rgfa/byte_array.rb +74 -0
  8. data/lib/rgfa/cigar.rb +157 -0
  9. data/lib/rgfa/connectivity.rb +131 -0
  10. data/lib/rgfa/containments.rb +97 -0
  11. data/lib/rgfa/error.rb +3 -0
  12. data/lib/rgfa/field_array.rb +87 -0
  13. data/lib/rgfa/field_parser.rb +109 -0
  14. data/lib/rgfa/field_validator.rb +241 -0
  15. data/lib/rgfa/field_writer.rb +108 -0
  16. data/lib/rgfa/headers.rb +76 -0
  17. data/lib/rgfa/line.rb +721 -0
  18. data/lib/rgfa/line/containment.rb +87 -0
  19. data/lib/rgfa/line/header.rb +92 -0
  20. data/lib/rgfa/line/link.rb +379 -0
  21. data/lib/rgfa/line/path.rb +106 -0
  22. data/lib/rgfa/line/segment.rb +209 -0
  23. data/lib/rgfa/linear_paths.rb +285 -0
  24. data/lib/rgfa/lines.rb +155 -0
  25. data/lib/rgfa/links.rb +242 -0
  26. data/lib/rgfa/logger.rb +192 -0
  27. data/lib/rgfa/multiplication.rb +156 -0
  28. data/lib/rgfa/numeric_array.rb +196 -0
  29. data/lib/rgfa/paths.rb +98 -0
  30. data/lib/rgfa/rgl.rb +194 -0
  31. data/lib/rgfa/segment_ends_path.rb +9 -0
  32. data/lib/rgfa/segment_info.rb +162 -0
  33. data/lib/rgfa/segments.rb +99 -0
  34. data/lib/rgfa/sequence.rb +65 -0
  35. data/lib/rgfatools.rb +102 -0
  36. data/lib/rgfatools/artifacts.rb +29 -0
  37. data/lib/rgfatools/copy_number.rb +126 -0
  38. data/lib/rgfatools/invertible_segments.rb +104 -0
  39. data/lib/rgfatools/linear_paths.rb +140 -0
  40. data/lib/rgfatools/multiplication.rb +194 -0
  41. data/lib/rgfatools/p_bubbles.rb +66 -0
  42. data/lib/rgfatools/superfluous_links.rb +64 -0
  43. metadata +97 -0
@@ -0,0 +1,194 @@
1
+ begin
2
+ require "rgl/adjacency"
3
+ require "rgl/implicit"
4
+ require_relative "error"
5
+
6
+ #
7
+ # Conversion to RGL graphs
8
+ #
9
+ module RGFA::RGL
10
+
11
+ # Creates an RGL graph.
12
+ #
13
+ # @param oriented [Boolean] (defaults to: <i>+true+</i>) may the graph
14
+ # contain links of segments in different orientation?
15
+ # @return [RGL::ImplicitGraph] an rgl implicit directed graph
16
+ def to_rgl(oriented: true)
17
+ if oriented
18
+ to_rgl_oriented
19
+ else
20
+ to_rgl_unoriented
21
+ end
22
+ end
23
+
24
+ # Creates an RGL graph, including links orientations.
25
+ #
26
+ # @return [RGL::ImplicitGraph] an rgl implicit directed graph;
27
+ # where vertices are [RGFA::Segment, orientation] pairs
28
+ # (instances of the RGFA::OrientedSegment subclass of Array)
29
+ def to_rgl_oriented
30
+ RGL::ImplicitGraph.new do |g|
31
+ g.vertex_iterator do |block|
32
+ self.each_segment do |segment|
33
+ [:+, :-].each do |orient|
34
+ block.call([segment, orient].to_oriented_segment)
35
+ end
36
+ end
37
+ end
38
+ g.adjacent_iterator do |oriented_segment, block|
39
+ s = segment(oriented_segment.segment)
40
+ o = oriented_segment.orient
41
+ s.links[:from][o].each do |l|
42
+ os = [segment(l.to), l.to_orient].to_oriented_segment
43
+ block.call(os)
44
+ end
45
+ o = oriented_segment.invert_orient
46
+ s.links[:to][o].each do |l|
47
+ os = [segment(l.from), l.from_orient].to_oriented_segment
48
+ block.call(os.invert_orient)
49
+ end
50
+ end
51
+ g.directed = true
52
+ end
53
+ end
54
+
55
+ # Creates an RGL graph, assuming that all links orientations
56
+ # are "+".
57
+ #
58
+ # @raise [RGFA::RGL::ValueError] if the graph contains any link where
59
+ # from_orient or to_orient is :-
60
+ # @return [RGL::ImplicitGraph] an rgl implicit directed graph;
61
+ # where vertices are RGFA::Segment objects
62
+ def to_rgl_unoriented
63
+ RGL::ImplicitGraph.new do |g|
64
+ g.vertex_iterator {|block| self.each_segment {|s| block.call(s)}}
65
+ g.adjacent_iterator do |s, bl|
66
+ s = segment(s)
67
+ s.links[:from][:+].each do |l|
68
+ if l.to_orient == :-
69
+ raise RGFA::RGL::ValueError,
70
+ "Graph contains links with segments in reverse orientations"
71
+ end
72
+ bl.call(segment(l.to))
73
+ end
74
+ if s.links[:from][:-].size > 0
75
+ raise RGFA::RGL::ValueError,
76
+ "Graph contains links with segments in reverse orientations"
77
+ end
78
+ end
79
+ g.directed = true
80
+ end
81
+ end
82
+
83
+ def self.included(base)
84
+ base.extend(ClassMethods)
85
+ end
86
+
87
+ module ClassMethods
88
+
89
+ # @param g [RGL::ImplicitGraph, RGL::DirectedAdjacencyGraph] an RGL graph.
90
+ #
91
+ # @!macro[new] from_rgl
92
+ # <b>Accepted vertex formats</b>:
93
+ #
94
+ # - RGFA::OrientedSegment, or Array which can be converted to it;
95
+ # where the first element is a <i>segment specifier</i> (see below)
96
+ # - <i>segment specifier</i> alone: the orientation is assumed to be :+
97
+ #
98
+ # The <i>segment specifier</i> can be:
99
+ # - RGFA::Segment instance
100
+ # - String, segment representation (e.g. "S\tsegment\t*")
101
+ # - String, valid segment name (e.g. "segment")
102
+ #
103
+ # @raise [RGFA::RGL::InvalidFormatError] if the graph cannot be
104
+ # converted
105
+ #
106
+ # @return [RGFA] a new RGFA instance
107
+ def from_rgl(g)
108
+ gfa = RGFA.new
109
+ if not (g.respond_to?(:each_vertex) and
110
+ g.respond_to?(:each_edge))
111
+ raise RGFA::RGL::InvalidFormatError,
112
+ "#{g} is not a valid RGL graph"
113
+ end
114
+ if not g.directed?
115
+ raise RGFA::RGL::InvalidFormatError,
116
+ "#{g} is not a directed graph"
117
+ end
118
+ g.each_vertex {|v| add_segment_if_new(gfa, v)}
119
+ g.each_edge do |s, t|
120
+ gfa << RGFA::Line::Link.new(segment_name_and_orient(s) +
121
+ segment_name_and_orient(t) +
122
+ ["*"])
123
+ end
124
+ gfa
125
+ end
126
+
127
+ private
128
+
129
+ def add_segment_if_new(gfa, v)
130
+ # RGFA::OrientedSegment or GFA::GraphVertex
131
+ v = v.segment if v.respond_to?(:segment)
132
+ if v.kind_of?(Symbol)
133
+ # segment name as symbol
134
+ return if gfa.segment_names.include?(v)
135
+ v = RGFA::Line::Segment.new([v.to_s, "*"])
136
+ elsif v.kind_of?(String)
137
+ a = v.split("\t")
138
+ if a[0] == "S"
139
+ # string representation of segment
140
+ return if gfa.segment_names.include?(a[1].to_sym)
141
+ v = RGFA::Line::Segment.new(a[1..-1])
142
+ else
143
+ # segment name as string
144
+ return if gfa.segment_names.include?(v.to_sym)
145
+ v = RGFA::Line::Segment.new([v, "*"])
146
+ end
147
+ end
148
+ return if gfa.segment_names.include?(v.name)
149
+ gfa << v
150
+ end
151
+
152
+ def segment_name_and_orient(s)
153
+ # default orientation
154
+ o = s.respond_to?(:orient) ? s.orient.to_s : "+"
155
+ # RGFA::Line::Segment (also embedded in RGFA::OrientedSegment)
156
+ if s.respond_to?(:name)
157
+ s = s.name.to_s
158
+ elsif s.respond_to?(:segment)
159
+ # GFA::GraphVertex
160
+ s = s.segment.to_s
161
+ elsif s.respond_to?(:split)
162
+ a = s.split("\t")
163
+ s = a[1] if a[0] == "S"
164
+ else
165
+ s = s.to_s
166
+ end
167
+ return s, o
168
+ end
169
+
170
+ end
171
+
172
+ end
173
+
174
+ module RGL::Graph
175
+
176
+ # @!macro from_rgl
177
+ def to_rgfa
178
+ RGFA.from_rgl(self)
179
+ end
180
+
181
+ end
182
+
183
+ # Exception raised if conversion is impossible due to unexpected values
184
+ class RGFA::RGL::ValueError < RGFA::Error; end
185
+
186
+ # Exception raised if conversion is impossible due to general format problems
187
+ class RGFA::RGL::InvalidFormatError < RGFA::Error; end
188
+
189
+ rescue LoadError
190
+
191
+ module RGFA::RGL
192
+ end
193
+
194
+ end
@@ -0,0 +1,9 @@
1
+ # An array containing {RGFA::SegmentEnd} elements, which defines a path
2
+ # in the graph
3
+ class RGFA::SegmentEndsPath < Array
4
+ # Create a reverse direction path
5
+ # @return [RGFA::SegmentEndsPath]
6
+ def reverse
7
+ super.map {|segment_end| segment_end.to_segment_end.invert_end_type}
8
+ end
9
+ end
@@ -0,0 +1,162 @@
1
+ require_relative "error"
2
+
3
+ # A segment or segment name plus an additional boolean attribute
4
+ #
5
+ # This class shall not be initialized directly.
6
+ # @api private
7
+ #
8
+ class RGFA::SegmentInfo < Array
9
+
10
+ # Check that the elements of the array are compatible with the definition.
11
+ #
12
+ # @!macro [new] segment_info_validation_errors
13
+ # @raise [RGFA::SegmentInfo::InvalidSizeError] if size is not 2
14
+ # @raise [RGFA::SegmentInfo::InvalidAttributeError] if second element
15
+ # is not a valid info
16
+ # @return [void]
17
+ def validate!
18
+ if size != 2
19
+ raise RGFA::SegmentInfo::InvalidSizeError,
20
+ "Wrong n of elements, 2 expected (#{inspect})"
21
+ end
22
+ if !self.class::ATTR.include?(self[1])
23
+ raise RGFA::SegmentInfo::InvalidAttributeError,
24
+ "Invalid attribute (#{self[1].inspect})"
25
+ end
26
+ return nil
27
+ end
28
+
29
+ # @return [Symbol, RGFA::Line::Segment] the segment instance or name
30
+ def segment
31
+ self[0]
32
+ end
33
+
34
+ # Set the segment
35
+ # @param value [Symbol, RGFA::Line::Segment] the segment instance or name
36
+ # @return Symbol, RGFA::Line::Segment] +value+
37
+ def segment=(value)
38
+ self[0]=value
39
+ end
40
+
41
+ # @return [Symbol] the segment name
42
+ def name
43
+ self[0].kind_of?(RGFA::Line::Segment) ? self[0].name : self[0].to_sym
44
+ end
45
+
46
+ # @return [Symbol] the attribute
47
+ def attribute
48
+ self[1]
49
+ end
50
+
51
+ # Set the attribute
52
+ # @param value [Symbol] the attribute
53
+ # @return [Symbol] +value+
54
+ def attribute=(value)
55
+ self[1]=(value)
56
+ end
57
+
58
+ # @return [Symbol] the other possible value of the attribute
59
+ def attribute_inverted
60
+ self.class::ATTR[self.class::ATTR[0] == self[1] ? 1 : 0]
61
+ end
62
+
63
+ # @return [RGFA::SegmentInfo] same segment, inverted attribute
64
+ def invert_attribute
65
+ self.class.new([self[0], self.attribute_inverted])
66
+ end
67
+
68
+ # @param [Symbol] attribute an attribute value
69
+ # @return [Symbol] the other attribute value
70
+ def self.invert(attribute)
71
+ i = self::ATTR.index(attribute.to_sym)
72
+ if i.nil?
73
+ raise RGFA::SegmentInfo::InvalidAttributeError,
74
+ "Invalid attribute (#{self[1].inspect})"
75
+ end
76
+ return self::ATTR[i-1]
77
+ end
78
+
79
+ # @return [String] name of the segment and attribute
80
+ def to_s
81
+ "#{name}#{attribute}"
82
+ end
83
+
84
+ # @return [Symbol] name of the segment and attribute
85
+ def to_sym
86
+ to_s.to_sym
87
+ end
88
+
89
+ # Compare the segment names and attributes of two instances
90
+ #
91
+ # @param [RGFA::SegmentInfo] other the other instance
92
+ # @return [Boolean]
93
+ def ==(other)
94
+ to_s == other.to_segment_info(self.class).to_s
95
+ end
96
+
97
+ # Compare the segment names and attributes of two instances
98
+ #
99
+ # @param [RGFA::SegmentInfo] other the other instance
100
+ # @return [Boolean]
101
+ def <=>(other)
102
+ to_s <=> other.to_segment_info(self.class).to_s
103
+ end
104
+
105
+ end
106
+
107
+ # Error raised if the size of the array is wrong
108
+ class RGFA::SegmentInfo::InvalidSizeError < RGFA::Error; end
109
+
110
+ # Error raised if an unknown value for attribute is used
111
+ class RGFA::SegmentInfo::InvalidAttributeError < RGFA::Error; end
112
+
113
+ # A representation of a segment end
114
+ class RGFA::SegmentEnd < RGFA::SegmentInfo
115
+ # Segment end type (begin or end)
116
+ ATTR = [ END_TYPE_BEGIN = :B, END_TYPE_END = :E ]
117
+ alias_method :end_type, :attribute
118
+ alias_method :end_type=, :attribute=
119
+ alias_method :invert_end_type, :invert_attribute
120
+ alias_method :end_type_inverted, :attribute_inverted
121
+ end
122
+
123
+ # A segment plus orientation
124
+ class RGFA::OrientedSegment < RGFA::SegmentInfo
125
+ # Segment orientation
126
+ ATTR = [ ORIENT_FWD = :+, ORIENT_REV = :- ]
127
+ alias_method :orient, :attribute
128
+ alias_method :orient=, :attribute=
129
+ alias_method :invert_orient, :invert_attribute
130
+ alias_method :orient_inverted, :attribute_inverted
131
+ end
132
+
133
+ class Array
134
+
135
+ # Create and validate a segment end from an array
136
+ # @!macro segment_info_validation_errors
137
+ # @return [RGFA::SegmentEnd]
138
+ def to_segment_end
139
+ to_segment_info(RGFA::SegmentEnd)
140
+ end
141
+
142
+ # Create and validate a segment end from an array
143
+ # @!macro segment_info_validation_errors
144
+ # @return [RGFA::OrientedSegment]
145
+ def to_oriented_segment
146
+ to_segment_info(RGFA::OrientedSegment)
147
+ end
148
+
149
+ protected
150
+
151
+ def to_segment_info(subclass)
152
+ return self if self.kind_of?(subclass)
153
+ # support converting from gfa gem GraphVertex objects:
154
+ if respond_to?(:segment) and respond_to?(:orient)
155
+ return RGFA::OrientedSegment.new([segment.to_sym, orient.to_sym])
156
+ end
157
+ se = subclass.new(map {|e| e.kind_of?(String) ? e.to_sym : e})
158
+ se.validate!
159
+ return se
160
+ end
161
+
162
+ end
@@ -0,0 +1,99 @@
1
+ require_relative "error"
2
+
3
+ #
4
+ # Methods for the RGFA class, which allow to handle segments in the graph.
5
+ #
6
+ module RGFA::Segments
7
+
8
+ def add_segment(gfa_line)
9
+ gfa_line = gfa_line.to_rgfa_line(validate: @validate)
10
+ segment_name = gfa_line.name
11
+ if @paths.has_key?(segment_name)
12
+ raise RGFA::DuplicatedLabelError,
13
+ "Error when adding line: #{gfa_line}\n"+
14
+ "a path already exists with the name: #{segment_name}\n"+
15
+ "Path: #{@paths[segment_name]}"
16
+ elsif @segments.has_key?(segment_name)
17
+ if @segments[segment_name].virtual?
18
+ @segments[segment_name].real!(gfa_line)
19
+ else
20
+ raise RGFA::DuplicatedLabelError,
21
+ "Error when adding line: #{gfa_line}\n"+
22
+ "a segment already exists with the name: #{segment_name}\n"+
23
+ "Segment: #{@segments[segment_name]}"
24
+ end
25
+ else
26
+ @segments[segment_name] = gfa_line
27
+ end
28
+ end
29
+ protected :add_segment
30
+
31
+ # Delete a segment from the RGFA graph
32
+ # @return [RGFA] self
33
+ # @param s [String, RGFA::Line::Segment] segment name or instance
34
+ def delete_segment(s, cascade=true)
35
+ s = segment!(s)
36
+ if cascade
37
+ connected_segments(s).each {|cs| unconnect_segments(s, cs)}
38
+ [:+, :-].each do |o|
39
+ s.paths[o].each {|pt| delete_path(pt)}
40
+ end
41
+ end
42
+ @segments.delete(s.name)
43
+ return self
44
+ end
45
+
46
+ # All segment lines of the graph
47
+ # @return [Array<RGFA::Line::Segment>]
48
+ def segments
49
+ @segments.values
50
+ end
51
+
52
+ # @!macro [new] segment
53
+ # Searches the segment with name equal to +segment_name+.
54
+ # @param s [String, RGFA::Line::Segment] a segment or segment name
55
+ # @return [RGFA::Line::Segment] if a segment is found
56
+ # @return [nil] if no such segment exists in the RGFA instance
57
+ #
58
+ def segment(s)
59
+ return s if s.kind_of?(RGFA::Line)
60
+ @segments[s.to_sym]
61
+ end
62
+
63
+ # @!macro segment
64
+ # @raise [RGFA::LineMissingError] if no such segment exists
65
+ def segment!(s)
66
+ seg = segment(s)
67
+ if seg.nil?
68
+ raise RGFA::LineMissingError, "No segment has name #{s}"+
69
+ "#{segment_names.size < 10 ?
70
+ "\nSegment names: "+segment_names.inspect : ''}"
71
+ end
72
+ seg
73
+ end
74
+
75
+ # @return [Array<String>] list of names of segments connected to +segment+
76
+ # by links or containments
77
+ def connected_segments(segment)
78
+ (neighbours([segment, :B]).map{|s, e| s} +
79
+ neighbours([segment, :E]).map{|s, e| s} +
80
+ contained_in(segment).map{|c| c.to} +
81
+ containing(segment).map{|c| c.from}).uniq
82
+ end
83
+
84
+ # Delete all links/containments involving two segments
85
+ # @return [RGFA] self
86
+ # @param segment1 [String, RGFA::Line::Segment] segment 1 name or instance
87
+ # @param segment2 [String, RGFA::Line::Segment] segment 2 name or instance
88
+ def unconnect_segments(segment1, segment2)
89
+ containments_between(segment1, segment2).each {|c| delete_containment(c)}
90
+ containments_between(segment2, segment1).each {|c| delete_containment(c)}
91
+ [[:B, :E], [:B, :B], [:E, :B], [:E, :E]].each do |end1, end2|
92
+ links_between([segment1, end1], [segment2, end2]).each do |l|
93
+ delete_link(l)
94
+ end
95
+ end
96
+ return self
97
+ end
98
+
99
+ end