rgfa 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/gfadiff.rb +420 -0
- data/bin/rgfa-findcrisprs.rb +208 -0
- data/bin/rgfa-mergelinear.rb +14 -0
- data/bin/rgfa-simdebruijn.rb +86 -0
- data/lib/rgfa.rb +376 -0
- data/lib/rgfa/byte_array.rb +74 -0
- data/lib/rgfa/cigar.rb +157 -0
- data/lib/rgfa/connectivity.rb +131 -0
- data/lib/rgfa/containments.rb +97 -0
- data/lib/rgfa/error.rb +3 -0
- data/lib/rgfa/field_array.rb +87 -0
- data/lib/rgfa/field_parser.rb +109 -0
- data/lib/rgfa/field_validator.rb +241 -0
- data/lib/rgfa/field_writer.rb +108 -0
- data/lib/rgfa/headers.rb +76 -0
- data/lib/rgfa/line.rb +721 -0
- data/lib/rgfa/line/containment.rb +87 -0
- data/lib/rgfa/line/header.rb +92 -0
- data/lib/rgfa/line/link.rb +379 -0
- data/lib/rgfa/line/path.rb +106 -0
- data/lib/rgfa/line/segment.rb +209 -0
- data/lib/rgfa/linear_paths.rb +285 -0
- data/lib/rgfa/lines.rb +155 -0
- data/lib/rgfa/links.rb +242 -0
- data/lib/rgfa/logger.rb +192 -0
- data/lib/rgfa/multiplication.rb +156 -0
- data/lib/rgfa/numeric_array.rb +196 -0
- data/lib/rgfa/paths.rb +98 -0
- data/lib/rgfa/rgl.rb +194 -0
- data/lib/rgfa/segment_ends_path.rb +9 -0
- data/lib/rgfa/segment_info.rb +162 -0
- data/lib/rgfa/segments.rb +99 -0
- data/lib/rgfa/sequence.rb +65 -0
- data/lib/rgfatools.rb +102 -0
- data/lib/rgfatools/artifacts.rb +29 -0
- data/lib/rgfatools/copy_number.rb +126 -0
- data/lib/rgfatools/invertible_segments.rb +104 -0
- data/lib/rgfatools/linear_paths.rb +140 -0
- data/lib/rgfatools/multiplication.rb +194 -0
- data/lib/rgfatools/p_bubbles.rb +66 -0
- data/lib/rgfatools/superfluous_links.rb +64 -0
- metadata +97 -0
data/lib/rgfa/rgl.rb
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
begin
|
2
|
+
require "rgl/adjacency"
|
3
|
+
require "rgl/implicit"
|
4
|
+
require_relative "error"
|
5
|
+
|
6
|
+
#
|
7
|
+
# Conversion to RGL graphs
|
8
|
+
#
|
9
|
+
module RGFA::RGL
|
10
|
+
|
11
|
+
# Creates an RGL graph.
|
12
|
+
#
|
13
|
+
# @param oriented [Boolean] (defaults to: <i>+true+</i>) may the graph
|
14
|
+
# contain links of segments in different orientation?
|
15
|
+
# @return [RGL::ImplicitGraph] an rgl implicit directed graph
|
16
|
+
def to_rgl(oriented: true)
|
17
|
+
if oriented
|
18
|
+
to_rgl_oriented
|
19
|
+
else
|
20
|
+
to_rgl_unoriented
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Creates an RGL graph, including links orientations.
|
25
|
+
#
|
26
|
+
# @return [RGL::ImplicitGraph] an rgl implicit directed graph;
|
27
|
+
# where vertices are [RGFA::Segment, orientation] pairs
|
28
|
+
# (instances of the RGFA::OrientedSegment subclass of Array)
|
29
|
+
def to_rgl_oriented
|
30
|
+
RGL::ImplicitGraph.new do |g|
|
31
|
+
g.vertex_iterator do |block|
|
32
|
+
self.each_segment do |segment|
|
33
|
+
[:+, :-].each do |orient|
|
34
|
+
block.call([segment, orient].to_oriented_segment)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
g.adjacent_iterator do |oriented_segment, block|
|
39
|
+
s = segment(oriented_segment.segment)
|
40
|
+
o = oriented_segment.orient
|
41
|
+
s.links[:from][o].each do |l|
|
42
|
+
os = [segment(l.to), l.to_orient].to_oriented_segment
|
43
|
+
block.call(os)
|
44
|
+
end
|
45
|
+
o = oriented_segment.invert_orient
|
46
|
+
s.links[:to][o].each do |l|
|
47
|
+
os = [segment(l.from), l.from_orient].to_oriented_segment
|
48
|
+
block.call(os.invert_orient)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
g.directed = true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Creates an RGL graph, assuming that all links orientations
|
56
|
+
# are "+".
|
57
|
+
#
|
58
|
+
# @raise [RGFA::RGL::ValueError] if the graph contains any link where
|
59
|
+
# from_orient or to_orient is :-
|
60
|
+
# @return [RGL::ImplicitGraph] an rgl implicit directed graph;
|
61
|
+
# where vertices are RGFA::Segment objects
|
62
|
+
def to_rgl_unoriented
|
63
|
+
RGL::ImplicitGraph.new do |g|
|
64
|
+
g.vertex_iterator {|block| self.each_segment {|s| block.call(s)}}
|
65
|
+
g.adjacent_iterator do |s, bl|
|
66
|
+
s = segment(s)
|
67
|
+
s.links[:from][:+].each do |l|
|
68
|
+
if l.to_orient == :-
|
69
|
+
raise RGFA::RGL::ValueError,
|
70
|
+
"Graph contains links with segments in reverse orientations"
|
71
|
+
end
|
72
|
+
bl.call(segment(l.to))
|
73
|
+
end
|
74
|
+
if s.links[:from][:-].size > 0
|
75
|
+
raise RGFA::RGL::ValueError,
|
76
|
+
"Graph contains links with segments in reverse orientations"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
g.directed = true
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.included(base)
|
84
|
+
base.extend(ClassMethods)
|
85
|
+
end
|
86
|
+
|
87
|
+
module ClassMethods
|
88
|
+
|
89
|
+
# @param g [RGL::ImplicitGraph, RGL::DirectedAdjacencyGraph] an RGL graph.
|
90
|
+
#
|
91
|
+
# @!macro[new] from_rgl
|
92
|
+
# <b>Accepted vertex formats</b>:
|
93
|
+
#
|
94
|
+
# - RGFA::OrientedSegment, or Array which can be converted to it;
|
95
|
+
# where the first element is a <i>segment specifier</i> (see below)
|
96
|
+
# - <i>segment specifier</i> alone: the orientation is assumed to be :+
|
97
|
+
#
|
98
|
+
# The <i>segment specifier</i> can be:
|
99
|
+
# - RGFA::Segment instance
|
100
|
+
# - String, segment representation (e.g. "S\tsegment\t*")
|
101
|
+
# - String, valid segment name (e.g. "segment")
|
102
|
+
#
|
103
|
+
# @raise [RGFA::RGL::InvalidFormatError] if the graph cannot be
|
104
|
+
# converted
|
105
|
+
#
|
106
|
+
# @return [RGFA] a new RGFA instance
|
107
|
+
def from_rgl(g)
|
108
|
+
gfa = RGFA.new
|
109
|
+
if not (g.respond_to?(:each_vertex) and
|
110
|
+
g.respond_to?(:each_edge))
|
111
|
+
raise RGFA::RGL::InvalidFormatError,
|
112
|
+
"#{g} is not a valid RGL graph"
|
113
|
+
end
|
114
|
+
if not g.directed?
|
115
|
+
raise RGFA::RGL::InvalidFormatError,
|
116
|
+
"#{g} is not a directed graph"
|
117
|
+
end
|
118
|
+
g.each_vertex {|v| add_segment_if_new(gfa, v)}
|
119
|
+
g.each_edge do |s, t|
|
120
|
+
gfa << RGFA::Line::Link.new(segment_name_and_orient(s) +
|
121
|
+
segment_name_and_orient(t) +
|
122
|
+
["*"])
|
123
|
+
end
|
124
|
+
gfa
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
def add_segment_if_new(gfa, v)
|
130
|
+
# RGFA::OrientedSegment or GFA::GraphVertex
|
131
|
+
v = v.segment if v.respond_to?(:segment)
|
132
|
+
if v.kind_of?(Symbol)
|
133
|
+
# segment name as symbol
|
134
|
+
return if gfa.segment_names.include?(v)
|
135
|
+
v = RGFA::Line::Segment.new([v.to_s, "*"])
|
136
|
+
elsif v.kind_of?(String)
|
137
|
+
a = v.split("\t")
|
138
|
+
if a[0] == "S"
|
139
|
+
# string representation of segment
|
140
|
+
return if gfa.segment_names.include?(a[1].to_sym)
|
141
|
+
v = RGFA::Line::Segment.new(a[1..-1])
|
142
|
+
else
|
143
|
+
# segment name as string
|
144
|
+
return if gfa.segment_names.include?(v.to_sym)
|
145
|
+
v = RGFA::Line::Segment.new([v, "*"])
|
146
|
+
end
|
147
|
+
end
|
148
|
+
return if gfa.segment_names.include?(v.name)
|
149
|
+
gfa << v
|
150
|
+
end
|
151
|
+
|
152
|
+
def segment_name_and_orient(s)
|
153
|
+
# default orientation
|
154
|
+
o = s.respond_to?(:orient) ? s.orient.to_s : "+"
|
155
|
+
# RGFA::Line::Segment (also embedded in RGFA::OrientedSegment)
|
156
|
+
if s.respond_to?(:name)
|
157
|
+
s = s.name.to_s
|
158
|
+
elsif s.respond_to?(:segment)
|
159
|
+
# GFA::GraphVertex
|
160
|
+
s = s.segment.to_s
|
161
|
+
elsif s.respond_to?(:split)
|
162
|
+
a = s.split("\t")
|
163
|
+
s = a[1] if a[0] == "S"
|
164
|
+
else
|
165
|
+
s = s.to_s
|
166
|
+
end
|
167
|
+
return s, o
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
module RGL::Graph
|
175
|
+
|
176
|
+
# @!macro from_rgl
|
177
|
+
def to_rgfa
|
178
|
+
RGFA.from_rgl(self)
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
# Exception raised if conversion is impossible due to unexpected values
|
184
|
+
class RGFA::RGL::ValueError < RGFA::Error; end
|
185
|
+
|
186
|
+
# Exception raised if conversion is impossible due to general format problems
|
187
|
+
class RGFA::RGL::InvalidFormatError < RGFA::Error; end
|
188
|
+
|
189
|
+
rescue LoadError
|
190
|
+
|
191
|
+
module RGFA::RGL
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# An array containing {RGFA::SegmentEnd} elements, which defines a path
|
2
|
+
# in the graph
|
3
|
+
class RGFA::SegmentEndsPath < Array
|
4
|
+
# Create a reverse direction path
|
5
|
+
# @return [RGFA::SegmentEndsPath]
|
6
|
+
def reverse
|
7
|
+
super.map {|segment_end| segment_end.to_segment_end.invert_end_type}
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require_relative "error"
|
2
|
+
|
3
|
+
# A segment or segment name plus an additional boolean attribute
|
4
|
+
#
|
5
|
+
# This class shall not be initialized directly.
|
6
|
+
# @api private
|
7
|
+
#
|
8
|
+
class RGFA::SegmentInfo < Array
|
9
|
+
|
10
|
+
# Check that the elements of the array are compatible with the definition.
|
11
|
+
#
|
12
|
+
# @!macro [new] segment_info_validation_errors
|
13
|
+
# @raise [RGFA::SegmentInfo::InvalidSizeError] if size is not 2
|
14
|
+
# @raise [RGFA::SegmentInfo::InvalidAttributeError] if second element
|
15
|
+
# is not a valid info
|
16
|
+
# @return [void]
|
17
|
+
def validate!
|
18
|
+
if size != 2
|
19
|
+
raise RGFA::SegmentInfo::InvalidSizeError,
|
20
|
+
"Wrong n of elements, 2 expected (#{inspect})"
|
21
|
+
end
|
22
|
+
if !self.class::ATTR.include?(self[1])
|
23
|
+
raise RGFA::SegmentInfo::InvalidAttributeError,
|
24
|
+
"Invalid attribute (#{self[1].inspect})"
|
25
|
+
end
|
26
|
+
return nil
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Symbol, RGFA::Line::Segment] the segment instance or name
|
30
|
+
def segment
|
31
|
+
self[0]
|
32
|
+
end
|
33
|
+
|
34
|
+
# Set the segment
|
35
|
+
# @param value [Symbol, RGFA::Line::Segment] the segment instance or name
|
36
|
+
# @return Symbol, RGFA::Line::Segment] +value+
|
37
|
+
def segment=(value)
|
38
|
+
self[0]=value
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [Symbol] the segment name
|
42
|
+
def name
|
43
|
+
self[0].kind_of?(RGFA::Line::Segment) ? self[0].name : self[0].to_sym
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Symbol] the attribute
|
47
|
+
def attribute
|
48
|
+
self[1]
|
49
|
+
end
|
50
|
+
|
51
|
+
# Set the attribute
|
52
|
+
# @param value [Symbol] the attribute
|
53
|
+
# @return [Symbol] +value+
|
54
|
+
def attribute=(value)
|
55
|
+
self[1]=(value)
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Symbol] the other possible value of the attribute
|
59
|
+
def attribute_inverted
|
60
|
+
self.class::ATTR[self.class::ATTR[0] == self[1] ? 1 : 0]
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [RGFA::SegmentInfo] same segment, inverted attribute
|
64
|
+
def invert_attribute
|
65
|
+
self.class.new([self[0], self.attribute_inverted])
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param [Symbol] attribute an attribute value
|
69
|
+
# @return [Symbol] the other attribute value
|
70
|
+
def self.invert(attribute)
|
71
|
+
i = self::ATTR.index(attribute.to_sym)
|
72
|
+
if i.nil?
|
73
|
+
raise RGFA::SegmentInfo::InvalidAttributeError,
|
74
|
+
"Invalid attribute (#{self[1].inspect})"
|
75
|
+
end
|
76
|
+
return self::ATTR[i-1]
|
77
|
+
end
|
78
|
+
|
79
|
+
# @return [String] name of the segment and attribute
|
80
|
+
def to_s
|
81
|
+
"#{name}#{attribute}"
|
82
|
+
end
|
83
|
+
|
84
|
+
# @return [Symbol] name of the segment and attribute
|
85
|
+
def to_sym
|
86
|
+
to_s.to_sym
|
87
|
+
end
|
88
|
+
|
89
|
+
# Compare the segment names and attributes of two instances
|
90
|
+
#
|
91
|
+
# @param [RGFA::SegmentInfo] other the other instance
|
92
|
+
# @return [Boolean]
|
93
|
+
def ==(other)
|
94
|
+
to_s == other.to_segment_info(self.class).to_s
|
95
|
+
end
|
96
|
+
|
97
|
+
# Compare the segment names and attributes of two instances
|
98
|
+
#
|
99
|
+
# @param [RGFA::SegmentInfo] other the other instance
|
100
|
+
# @return [Boolean]
|
101
|
+
def <=>(other)
|
102
|
+
to_s <=> other.to_segment_info(self.class).to_s
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
# Error raised if the size of the array is wrong
|
108
|
+
class RGFA::SegmentInfo::InvalidSizeError < RGFA::Error; end
|
109
|
+
|
110
|
+
# Error raised if an unknown value for attribute is used
|
111
|
+
class RGFA::SegmentInfo::InvalidAttributeError < RGFA::Error; end
|
112
|
+
|
113
|
+
# A representation of a segment end
|
114
|
+
class RGFA::SegmentEnd < RGFA::SegmentInfo
|
115
|
+
# Segment end type (begin or end)
|
116
|
+
ATTR = [ END_TYPE_BEGIN = :B, END_TYPE_END = :E ]
|
117
|
+
alias_method :end_type, :attribute
|
118
|
+
alias_method :end_type=, :attribute=
|
119
|
+
alias_method :invert_end_type, :invert_attribute
|
120
|
+
alias_method :end_type_inverted, :attribute_inverted
|
121
|
+
end
|
122
|
+
|
123
|
+
# A segment plus orientation
|
124
|
+
class RGFA::OrientedSegment < RGFA::SegmentInfo
|
125
|
+
# Segment orientation
|
126
|
+
ATTR = [ ORIENT_FWD = :+, ORIENT_REV = :- ]
|
127
|
+
alias_method :orient, :attribute
|
128
|
+
alias_method :orient=, :attribute=
|
129
|
+
alias_method :invert_orient, :invert_attribute
|
130
|
+
alias_method :orient_inverted, :attribute_inverted
|
131
|
+
end
|
132
|
+
|
133
|
+
class Array
|
134
|
+
|
135
|
+
# Create and validate a segment end from an array
|
136
|
+
# @!macro segment_info_validation_errors
|
137
|
+
# @return [RGFA::SegmentEnd]
|
138
|
+
def to_segment_end
|
139
|
+
to_segment_info(RGFA::SegmentEnd)
|
140
|
+
end
|
141
|
+
|
142
|
+
# Create and validate a segment end from an array
|
143
|
+
# @!macro segment_info_validation_errors
|
144
|
+
# @return [RGFA::OrientedSegment]
|
145
|
+
def to_oriented_segment
|
146
|
+
to_segment_info(RGFA::OrientedSegment)
|
147
|
+
end
|
148
|
+
|
149
|
+
protected
|
150
|
+
|
151
|
+
def to_segment_info(subclass)
|
152
|
+
return self if self.kind_of?(subclass)
|
153
|
+
# support converting from gfa gem GraphVertex objects:
|
154
|
+
if respond_to?(:segment) and respond_to?(:orient)
|
155
|
+
return RGFA::OrientedSegment.new([segment.to_sym, orient.to_sym])
|
156
|
+
end
|
157
|
+
se = subclass.new(map {|e| e.kind_of?(String) ? e.to_sym : e})
|
158
|
+
se.validate!
|
159
|
+
return se
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require_relative "error"
|
2
|
+
|
3
|
+
#
|
4
|
+
# Methods for the RGFA class, which allow to handle segments in the graph.
|
5
|
+
#
|
6
|
+
module RGFA::Segments
|
7
|
+
|
8
|
+
def add_segment(gfa_line)
|
9
|
+
gfa_line = gfa_line.to_rgfa_line(validate: @validate)
|
10
|
+
segment_name = gfa_line.name
|
11
|
+
if @paths.has_key?(segment_name)
|
12
|
+
raise RGFA::DuplicatedLabelError,
|
13
|
+
"Error when adding line: #{gfa_line}\n"+
|
14
|
+
"a path already exists with the name: #{segment_name}\n"+
|
15
|
+
"Path: #{@paths[segment_name]}"
|
16
|
+
elsif @segments.has_key?(segment_name)
|
17
|
+
if @segments[segment_name].virtual?
|
18
|
+
@segments[segment_name].real!(gfa_line)
|
19
|
+
else
|
20
|
+
raise RGFA::DuplicatedLabelError,
|
21
|
+
"Error when adding line: #{gfa_line}\n"+
|
22
|
+
"a segment already exists with the name: #{segment_name}\n"+
|
23
|
+
"Segment: #{@segments[segment_name]}"
|
24
|
+
end
|
25
|
+
else
|
26
|
+
@segments[segment_name] = gfa_line
|
27
|
+
end
|
28
|
+
end
|
29
|
+
protected :add_segment
|
30
|
+
|
31
|
+
# Delete a segment from the RGFA graph
|
32
|
+
# @return [RGFA] self
|
33
|
+
# @param s [String, RGFA::Line::Segment] segment name or instance
|
34
|
+
def delete_segment(s, cascade=true)
|
35
|
+
s = segment!(s)
|
36
|
+
if cascade
|
37
|
+
connected_segments(s).each {|cs| unconnect_segments(s, cs)}
|
38
|
+
[:+, :-].each do |o|
|
39
|
+
s.paths[o].each {|pt| delete_path(pt)}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@segments.delete(s.name)
|
43
|
+
return self
|
44
|
+
end
|
45
|
+
|
46
|
+
# All segment lines of the graph
|
47
|
+
# @return [Array<RGFA::Line::Segment>]
|
48
|
+
def segments
|
49
|
+
@segments.values
|
50
|
+
end
|
51
|
+
|
52
|
+
# @!macro [new] segment
|
53
|
+
# Searches the segment with name equal to +segment_name+.
|
54
|
+
# @param s [String, RGFA::Line::Segment] a segment or segment name
|
55
|
+
# @return [RGFA::Line::Segment] if a segment is found
|
56
|
+
# @return [nil] if no such segment exists in the RGFA instance
|
57
|
+
#
|
58
|
+
def segment(s)
|
59
|
+
return s if s.kind_of?(RGFA::Line)
|
60
|
+
@segments[s.to_sym]
|
61
|
+
end
|
62
|
+
|
63
|
+
# @!macro segment
|
64
|
+
# @raise [RGFA::LineMissingError] if no such segment exists
|
65
|
+
def segment!(s)
|
66
|
+
seg = segment(s)
|
67
|
+
if seg.nil?
|
68
|
+
raise RGFA::LineMissingError, "No segment has name #{s}"+
|
69
|
+
"#{segment_names.size < 10 ?
|
70
|
+
"\nSegment names: "+segment_names.inspect : ''}"
|
71
|
+
end
|
72
|
+
seg
|
73
|
+
end
|
74
|
+
|
75
|
+
# @return [Array<String>] list of names of segments connected to +segment+
|
76
|
+
# by links or containments
|
77
|
+
def connected_segments(segment)
|
78
|
+
(neighbours([segment, :B]).map{|s, e| s} +
|
79
|
+
neighbours([segment, :E]).map{|s, e| s} +
|
80
|
+
contained_in(segment).map{|c| c.to} +
|
81
|
+
containing(segment).map{|c| c.from}).uniq
|
82
|
+
end
|
83
|
+
|
84
|
+
# Delete all links/containments involving two segments
|
85
|
+
# @return [RGFA] self
|
86
|
+
# @param segment1 [String, RGFA::Line::Segment] segment 1 name or instance
|
87
|
+
# @param segment2 [String, RGFA::Line::Segment] segment 2 name or instance
|
88
|
+
def unconnect_segments(segment1, segment2)
|
89
|
+
containments_between(segment1, segment2).each {|c| delete_containment(c)}
|
90
|
+
containments_between(segment2, segment1).each {|c| delete_containment(c)}
|
91
|
+
[[:B, :E], [:B, :B], [:E, :B], [:E, :E]].each do |end1, end2|
|
92
|
+
links_between([segment1, end1], [segment2, end2]).each do |l|
|
93
|
+
delete_link(l)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
return self
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|