gfa 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -18
- data/bin/gfa-add-gaf +70 -0
- data/bin/gfa-subgraph +41 -0
- data/lib/gfa/common.rb +33 -8
- data/lib/gfa/field/char.rb +2 -1
- data/lib/gfa/field/float.rb +18 -1
- data/lib/gfa/field/hex.rb +18 -1
- data/lib/gfa/field/json.rb +10 -1
- data/lib/gfa/field/numarray.rb +29 -4
- data/lib/gfa/field/sigint.rb +14 -1
- data/lib/gfa/field/string.rb +10 -1
- data/lib/gfa/field.rb +82 -10
- data/lib/gfa/generator.rb +3 -3
- data/lib/gfa/graph.rb +139 -4
- data/lib/gfa/parser.rb +78 -22
- data/lib/gfa/record/comment.rb +7 -2
- data/lib/gfa/record/containment.rb +12 -7
- data/lib/gfa/record/has_from_to.rb +47 -0
- data/lib/gfa/record/header.rb +2 -0
- data/lib/gfa/record/jump.rb +11 -30
- data/lib/gfa/record/link.rb +11 -29
- data/lib/gfa/record/path.rb +32 -6
- data/lib/gfa/record/segment.rb +8 -4
- data/lib/gfa/record/walk.rb +6 -6
- data/lib/gfa/record.rb +34 -14
- data/lib/gfa/record_set/comment_set.rb +3 -0
- data/lib/gfa/record_set/containment_set.rb +4 -0
- data/lib/gfa/record_set/header_set.rb +3 -0
- data/lib/gfa/record_set/jump_set.rb +3 -0
- data/lib/gfa/record_set/link_set.rb +3 -0
- data/lib/gfa/record_set/path_set.rb +4 -0
- data/lib/gfa/record_set/segment_set.rb +4 -0
- data/lib/gfa/record_set/walk_set.rb +3 -0
- data/lib/gfa/record_set.rb +121 -0
- data/lib/gfa/version.rb +1 -1
- data/test/common_test.rb +5 -5
- data/test/field_test.rb +52 -26
- data/test/parser_test.rb +52 -13
- data/test/record_test.rb +7 -0
- data/test/test_helper.rb +5 -0
- metadata +18 -6
data/lib/gfa/graph.rb
CHANGED
@@ -2,7 +2,6 @@ require 'rgl/adjacency'
|
|
2
2
|
require 'rgl/implicit'
|
3
3
|
|
4
4
|
class GFA
|
5
|
-
|
6
5
|
##
|
7
6
|
# Generates a RGL::ImplicitGraph object describing the links in the GFA.
|
8
7
|
# The +opts+ argument is a hash with any of the following key-value pairs:
|
@@ -22,9 +21,145 @@ class GFA
|
|
22
21
|
def adjacency_graph(opts = {})
|
23
22
|
implicit_graph(opts).to_adjacency
|
24
23
|
end
|
25
|
-
|
24
|
+
|
25
|
+
##
|
26
|
+
# Extracts the subset of records associated to +segments+, which is an Array
|
27
|
+
# with values of any class in: Integer (segment index),
|
28
|
+
# String or GFA::Field::String (segment names), or GFA::Record::Segment.
|
29
|
+
#
|
30
|
+
# +degree+ indicates the maximum degree of separation between the original
|
31
|
+
# segment set and any additional segments. Use 0 to include only the segments
|
32
|
+
# in the set. Use 1 to include those, the records linking to them, and the
|
33
|
+
# additional segments linked by those records. Use any integer greater than 1
|
34
|
+
# to prompt additional rounds of greedy graph expansion.
|
35
|
+
#
|
36
|
+
# If +headers+, it includes all the original headers. Otherwise it only
|
37
|
+
# only includes the version header (might be inferred).
|
38
|
+
#
|
39
|
+
# All comments are ignored even if originally parsed. Walks are currently
|
40
|
+
# ignored too. If the current GFA object doesn't have an index, it builds one
|
41
|
+
# and forces +index: true+. The output object inherits all options.
|
42
|
+
def subgraph(segments, degree: 1, headers: true)
|
43
|
+
# Prepare objects
|
44
|
+
unless opts[:index]
|
45
|
+
opts[:index] = true
|
46
|
+
rebuild_index!
|
47
|
+
end
|
48
|
+
gfa = GFA.new(opts)
|
49
|
+
segments =
|
50
|
+
segments.map do |i|
|
51
|
+
i.is_a?(GFA::Record::Segment) ? i :
|
52
|
+
segment(i) or raise "Cannot find segment: #{i}"
|
53
|
+
end
|
54
|
+
|
55
|
+
# Headers
|
56
|
+
if headers
|
57
|
+
self.headers.set.each { |record| gfa << record }
|
58
|
+
else
|
59
|
+
gfa << GFA::Record::Header.new("VN:Z:#{gfa_version}")
|
60
|
+
end
|
61
|
+
|
62
|
+
# Original segments
|
63
|
+
segments.each { |segment| gfa << segment }
|
64
|
+
|
65
|
+
# Expand graph
|
66
|
+
linking, edges = linking_records(gfa.segments, degree: degree)
|
67
|
+
linking += internally_linking_records(segments, edges)
|
68
|
+
linking.each { |record| gfa << record }
|
69
|
+
|
70
|
+
# Return
|
71
|
+
gfa
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Finds all the records linking to any segments in +segments+, a
|
76
|
+
# GFA::RecordSet::SegmentSet object, and expands to links with up to
|
77
|
+
# +degree+ degrees of separation
|
78
|
+
#
|
79
|
+
# It only evaluates the edges given in the +edges+ Array of GFA::Record
|
80
|
+
# values. If +edges+ is +nil+, it uses the full set of edges in the gfa.
|
81
|
+
# Edge GFA::Record objects can be of type Link, Containment, Jump, or Path
|
82
|
+
#
|
83
|
+
# If +_ignore+ is passed, it ignores this number of segments at the beginning
|
84
|
+
# of the +segments+ set (assumes they have already been evaluated). This is
|
85
|
+
# only used for internal heuristics
|
86
|
+
#
|
87
|
+
# Returns an Array of with two elements:
|
88
|
+
# 0. An array of GFA::Record objects with all the identified linking records
|
89
|
+
# 1. An array of GFA::Record objects with all edges that were not identified
|
90
|
+
#
|
91
|
+
# IMPORTANT NOTE 1: The object +segments+ will be modified to include all
|
92
|
+
# linked segments. If you don't want this behaviour, please make sure to pass
|
93
|
+
# a duplicate of the object instead.
|
94
|
+
#
|
95
|
+
# IMPORTANT NOTE 2: The list of linking records may not comprehensively
|
96
|
+
# include all records linking the identified expanded segment set. To ensure
|
97
|
+
# a consistent set is identified, use:
|
98
|
+
# linking, edges = gfa.linking_records(segments)
|
99
|
+
# linking += gfa.internally_linking_records(segments, edges)
|
100
|
+
#
|
101
|
+
def linking_records(segments, degree: 1, edges: nil, _ignore: 0)
|
102
|
+
unless segments.is_a? GFA::RecordSet::SegmentSet
|
103
|
+
raise "Unrecognised class: #{segments.class}"
|
104
|
+
end
|
105
|
+
|
106
|
+
# Gather edges to evaluate
|
107
|
+
edges ||= all_edges
|
108
|
+
return [[], edges] if degree <= 0
|
109
|
+
|
110
|
+
# Links, Containments, Jumps (from, to) and Paths (segment_names)
|
111
|
+
linking = []
|
112
|
+
eval_set = _ignore == 0 ? segments.set : segments.set[_ignore..]
|
113
|
+
edges.delete_if do |record|
|
114
|
+
if eval_set.any? { |segment| record.include? segment }
|
115
|
+
linking << record
|
116
|
+
true # Remove from the edge set to speed up future recursions
|
117
|
+
else
|
118
|
+
false # Keep it, possibly linking future recursions
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Recurse and return
|
123
|
+
if degree >= 1
|
124
|
+
pre = segments.size
|
125
|
+
|
126
|
+
# Add additional linked segments
|
127
|
+
linking.each do |record|
|
128
|
+
record.segments(self).each do |other_seg|
|
129
|
+
segments << other_seg unless segments[other_seg.name]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Recurse only if new segments were discovered
|
134
|
+
if segments.size > pre
|
135
|
+
$stderr.puts "- Recursion [#{degree}]: " \
|
136
|
+
"#{pre} -> #{segments.size}\t(#{edges.size})"
|
137
|
+
linking +=
|
138
|
+
linking_records(
|
139
|
+
segments,
|
140
|
+
degree: degree - 1, edges: edges, _ignore: pre
|
141
|
+
)[0]
|
142
|
+
end
|
143
|
+
end
|
144
|
+
[linking, edges]
|
145
|
+
end
|
146
|
+
|
147
|
+
def internally_linking_records(segments, edges)
|
148
|
+
$stderr.puts '- Gathering internally linking records'
|
149
|
+
segments = Hash[segments.set.map { |i| [i.name.value, true]}]
|
150
|
+
edges.select { |record| record.segment_names_a.all? { |s| segments[s] } }
|
151
|
+
end
|
152
|
+
|
153
|
+
##
|
154
|
+
# Returns an array of GFA::Record objects including all possible edges
|
155
|
+
# from the GFA. I.e., all links, jumps, containments, and paths.
|
156
|
+
def all_edges
|
157
|
+
edge_t = %i[Link Jump Containment Path]
|
158
|
+
edges = edge_t.flat_map { |t| records[t].set } if edges.nil?
|
159
|
+
end
|
160
|
+
|
26
161
|
private
|
27
|
-
|
162
|
+
|
28
163
|
def segment_names_with_orient
|
29
164
|
segments.flat_map do |s|
|
30
165
|
%w[+ -].map { |orient| GFA::GraphVertex.idx(s, orient) }
|
@@ -57,7 +192,7 @@ class GFA
|
|
57
192
|
opts
|
58
193
|
end
|
59
194
|
|
60
|
-
def rgl_implicit_adjacent_iterator(x,b,opts)
|
195
|
+
def rgl_implicit_adjacent_iterator(x, b, opts)
|
61
196
|
links.each do |l|
|
62
197
|
if l.from?(x.segment, x.orient)
|
63
198
|
orient = opts[:orient] ? l.to_orient : nil
|
data/lib/gfa/parser.rb
CHANGED
@@ -4,43 +4,99 @@ class GFA
|
|
4
4
|
# Class-level
|
5
5
|
MIN_VERSION = '1.0'
|
6
6
|
MAX_VERSION = '1.2'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
|
8
|
+
##
|
9
|
+
# Load a GFA object from a gfa +file+ with options +opts+:
|
10
|
+
# - index: If the records should be indexed as loaded (default: true)
|
11
|
+
# - index_id: If the records should also be index by ID (default: false)
|
12
|
+
# - comments: If the comment records should be saved (default: false)
|
13
|
+
# - line_range: Two-integer array indicating the first and last lines to read
|
14
|
+
# (default: nil, read the entire file)
|
15
|
+
def self.load(file, opts = {})
|
16
|
+
gfa = GFA.new(opts)
|
17
|
+
read_records(file, opts) do |record|
|
18
|
+
gfa << record
|
19
|
+
end
|
13
20
|
gfa
|
14
21
|
end
|
15
|
-
|
22
|
+
|
23
|
+
def self.read_records(file, opts = {})
|
24
|
+
rng = opts[:line_range]
|
25
|
+
File.open(file, 'r') do |fh|
|
26
|
+
lno = -1
|
27
|
+
fh.each do |ln|
|
28
|
+
lno += 1
|
29
|
+
next if !rng.nil? && (lno < rng[0] || lno > rng[1])
|
30
|
+
next if !opts[:comments] && ln[0] == '#'
|
31
|
+
|
32
|
+
yield(GFA::Record[ln])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Load a GFA object from a gfa +file+ in parallel using +thr+ threads,
|
39
|
+
# and the same +opts+ supported by +load+. Defaults to the +load+ method
|
40
|
+
# instead if +thr <= 1+.
|
41
|
+
def self.load_parallel(file, thr, opts = {})
|
42
|
+
return self.load(file, opts) if thr <= 1
|
43
|
+
|
44
|
+
# Prepare data
|
45
|
+
lno = 0
|
46
|
+
File.open(file, 'r') { |fh| fh.each { lno += 1 } }
|
47
|
+
thr = lno if thr > lno
|
48
|
+
blk = (lno.to_f / thr).ceil
|
49
|
+
|
50
|
+
# Launch children processes
|
51
|
+
io = []
|
52
|
+
pid = []
|
53
|
+
thr.times do |i|
|
54
|
+
io[i] = IO.pipe
|
55
|
+
pid << fork do
|
56
|
+
io[i][0].close
|
57
|
+
o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
|
58
|
+
records = []
|
59
|
+
read_records(file, o) { |record| records << record }
|
60
|
+
Marshal.dump(records, io[i][1])
|
61
|
+
exit!(0)
|
62
|
+
end
|
63
|
+
io[i][1].close
|
64
|
+
end
|
65
|
+
|
66
|
+
# Collect and merge results
|
67
|
+
gfa = GFA.new(opts)
|
68
|
+
io.each_with_index do |pipe, k|
|
69
|
+
result = pipe[0].read
|
70
|
+
Process.wait(pid[k])
|
71
|
+
raise "Child process failed: #{k}" if result.empty?
|
72
|
+
Marshal.load(result).each { |record| gfa << record }
|
73
|
+
pipe[0].close
|
74
|
+
end
|
75
|
+
|
76
|
+
return gfa
|
77
|
+
end
|
78
|
+
|
16
79
|
def self.supported_version?(v)
|
17
80
|
v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
|
18
81
|
end
|
19
82
|
|
20
83
|
# Instance-level
|
21
84
|
def <<(obj)
|
22
|
-
obj =
|
85
|
+
obj = GFA::Record[obj] unless obj.is_a? GFA::Record
|
23
86
|
return if obj.nil? || obj.empty?
|
24
87
|
@records[obj.type] << obj
|
25
88
|
|
26
|
-
if obj.type == :Header && !obj.
|
27
|
-
set_gfa_version(obj.
|
89
|
+
if obj.type == :Header && !obj.VN.nil?
|
90
|
+
set_gfa_version(obj.VN.value)
|
28
91
|
end
|
29
92
|
end
|
30
93
|
|
31
94
|
def set_gfa_version(v)
|
32
|
-
|
33
|
-
unless GFA::supported_version?
|
34
|
-
raise "GFA version currently unsupported: #{v}
|
95
|
+
v = v.value if v.is_a? GFA::Field
|
96
|
+
unless GFA::supported_version? v
|
97
|
+
raise "GFA version currently unsupported: #{v}"
|
35
98
|
end
|
99
|
+
|
100
|
+
@gfa_version = v
|
36
101
|
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def parse_line(ln)
|
41
|
-
ln.chomp!
|
42
|
-
return nil if ln =~ /^\s*$/
|
43
|
-
cols = ln.split("\t")
|
44
|
-
GFA::Record.code_class(cols.shift).new(*cols)
|
45
|
-
end
|
46
102
|
end
|
data/lib/gfa/record/comment.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
class GFA::Record::Comment < GFA::Record
|
2
2
|
CODE = :'#'
|
3
|
-
REQ_FIELDS = []
|
3
|
+
REQ_FIELDS = %i[comment]
|
4
4
|
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
5
9
|
|
6
|
-
def initialize(*opt_fields)
|
10
|
+
def initialize(comment, *opt_fields)
|
7
11
|
@fields = {}
|
12
|
+
add_field(2, :Z, comment, /.*/)
|
8
13
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
9
14
|
end
|
10
15
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'gfa/record/has_from_to'
|
2
|
+
|
1
3
|
class GFA::Record::Containment < GFA::Record
|
2
4
|
CODE = :C
|
3
5
|
REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
|
@@ -10,20 +12,23 @@ class GFA::Record::Containment < GFA::Record
|
|
10
12
|
REQ_FIELDS.each_index do |i|
|
11
13
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
12
14
|
end
|
15
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
16
|
+
|
17
|
+
include GFA::Record::HasFromTo
|
13
18
|
|
14
19
|
alias container from
|
15
20
|
alias container_orient from_orient
|
16
21
|
alias contained to
|
17
22
|
alias contained_orient to_orient
|
18
|
-
|
23
|
+
|
19
24
|
def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
|
20
25
|
@fields = {}
|
21
|
-
add_field(2, :Z, from,
|
22
|
-
add_field(3, :Z, from_orient,
|
23
|
-
add_field(4, :Z, to,
|
24
|
-
add_field(5, :Z, to_orient,
|
25
|
-
add_field(6, :i, pos,
|
26
|
-
add_field(7, :Z, overlap,
|
26
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
27
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
28
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
29
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
30
|
+
add_field(6, :i, pos, /[0-9]*/)
|
31
|
+
add_field(7, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
27
32
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
28
33
|
end
|
29
34
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module GFA::Record::HasFromTo
|
2
|
+
def from?(segment, orient = nil)
|
3
|
+
links_from_to?(segment, orient, true)
|
4
|
+
end
|
5
|
+
|
6
|
+
def to?(segment, orient = nil)
|
7
|
+
links_from_to?(segment, orient, false)
|
8
|
+
end
|
9
|
+
|
10
|
+
##
|
11
|
+
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
12
|
+
def segments(gfa)
|
13
|
+
raise "Unindexed GFA" unless gfa.indexed?
|
14
|
+
[gfa.segments[from.value], gfa.segments[to.value]]
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# Include a GFA::Record::Segment +segment+?
|
19
|
+
def include?(segment)
|
20
|
+
# unless segment.is_a? GFA::Record::Segment
|
21
|
+
# raise "Unrecognized class: #{segment.class}"
|
22
|
+
# end
|
23
|
+
segment.name == from || segment.name == to
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Array of strings with the names of the segments linked by the
|
28
|
+
# record
|
29
|
+
def segment_names_a
|
30
|
+
[from.value, to.value]
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def links_from_to?(segment, orient, from)
|
36
|
+
segment = segment_name(segment)
|
37
|
+
orient = orient.value if orient.is_a? GFA::Field
|
38
|
+
base_k = from ? 2 : 4
|
39
|
+
segment == fields[base_k].value &&
|
40
|
+
(orient.nil? || orient == fields[base_k + 1].value)
|
41
|
+
end
|
42
|
+
|
43
|
+
def segment_name(segment)
|
44
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
45
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
46
|
+
end
|
47
|
+
end
|
data/lib/gfa/record/header.rb
CHANGED
data/lib/gfa/record/jump.rb
CHANGED
@@ -1,45 +1,26 @@
|
|
1
|
+
require 'gfa/record/has_from_to'
|
2
|
+
|
1
3
|
class GFA::Record::Jump < GFA::Record
|
2
4
|
CODE = :J
|
3
5
|
REQ_FIELDS = %i[from from_orient to to_orient distance]
|
4
6
|
OPT_FIELDS = {
|
5
7
|
SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
|
6
8
|
}
|
7
|
-
|
9
|
+
|
8
10
|
REQ_FIELDS.each_index do |i|
|
9
11
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
10
12
|
end
|
13
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
14
|
+
|
15
|
+
include GFA::Record::HasFromTo
|
11
16
|
|
12
17
|
def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
|
13
18
|
@fields = {}
|
14
|
-
add_field(2, :Z, from,
|
15
|
-
add_field(3, :Z, from_orient,
|
16
|
-
add_field(4, :Z, to,
|
17
|
-
add_field(5, :Z, to_orient,
|
18
|
-
add_field(6, :Z, distance,
|
19
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
20
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
21
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
22
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
23
|
+
add_field(6, :Z, distance, /\*|[-+]?[0-9]+/)
|
19
24
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
20
25
|
end
|
21
|
-
|
22
|
-
|
23
|
-
def from?(segment, orient = nil)
|
24
|
-
links_from_to?(segment, orient, true)
|
25
|
-
end
|
26
|
-
|
27
|
-
def to?(segment, orient = nil)
|
28
|
-
links_from_to?(segment, orient, false)
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def links_from_to?(segment, orient, from)
|
34
|
-
segment = segment_name(segment)
|
35
|
-
orient = orient.value if orient.is_a? GFA::Field
|
36
|
-
base_k = from ? 2 : 4
|
37
|
-
segment==fields[base_k].value &&
|
38
|
-
(orient.nil? || orient==fields[base_k + 1].value)
|
39
|
-
end
|
40
|
-
|
41
|
-
def segment_name(segment)
|
42
|
-
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
43
|
-
segment.is_a?(GFA::Field) ? segment.value : segment
|
44
|
-
end
|
45
26
|
end
|
data/lib/gfa/record/link.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'gfa/record/has_from_to'
|
2
|
+
|
1
3
|
class GFA::Record::Link < GFA::Record
|
2
4
|
CODE = :L
|
3
5
|
REQ_FIELDS = %i[from from_orient to to_orient overlap]
|
@@ -9,41 +11,21 @@ class GFA::Record::Link < GFA::Record
|
|
9
11
|
KC: :i, # k-mer count
|
10
12
|
ID: :Z # Edge identifier
|
11
13
|
}
|
12
|
-
|
14
|
+
|
13
15
|
REQ_FIELDS.each_index do |i|
|
14
16
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
15
17
|
end
|
18
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
19
|
+
|
20
|
+
include GFA::Record::HasFromTo
|
16
21
|
|
17
22
|
def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
|
18
23
|
@fields = {}
|
19
|
-
add_field(2, :Z, from,
|
20
|
-
add_field(3, :Z, from_orient,
|
21
|
-
add_field(4, :Z, to,
|
22
|
-
add_field(5, :Z, to_orient,
|
23
|
-
add_field(6, :Z, overlap,
|
24
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
25
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
26
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
27
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
28
|
+
add_field(6, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
24
29
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
25
30
|
end
|
26
|
-
|
27
|
-
def from?(segment, orient = nil)
|
28
|
-
links_from_to?(segment, orient, true)
|
29
|
-
end
|
30
|
-
|
31
|
-
def to?(segment, orient = nil)
|
32
|
-
links_from_to?(segment, orient, false)
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def links_from_to?(segment, orient, from)
|
38
|
-
segment = segment_name(segment)
|
39
|
-
orient = orient.value if orient.is_a? GFA::Field
|
40
|
-
base_k = from ? 2 : 4
|
41
|
-
segment==fields[base_k].value &&
|
42
|
-
(orient.nil? || orient==fields[base_k + 1].value)
|
43
|
-
end
|
44
|
-
|
45
|
-
def segment_name(segment)
|
46
|
-
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
47
|
-
segment.is_a?(GFA::Field) ? segment.value : segment
|
48
|
-
end
|
49
31
|
end
|
data/lib/gfa/record/path.rb
CHANGED
@@ -1,19 +1,45 @@
|
|
1
1
|
class GFA::Record::Path < GFA::Record
|
2
2
|
CODE = :P
|
3
|
-
REQ_FIELDS = %i[path_name
|
3
|
+
REQ_FIELDS = %i[path_name segment_names overlaps]
|
4
4
|
OPT_FIELDS = {}
|
5
5
|
|
6
6
|
REQ_FIELDS.each_index do |i|
|
7
7
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
8
|
end
|
9
9
|
|
10
|
-
alias
|
10
|
+
alias segment_name segment_names
|
11
|
+
alias cigar overlaps
|
11
12
|
|
12
|
-
def initialize(path_name,
|
13
|
+
def initialize(path_name, segment_names, overlaps, *opt_fields)
|
13
14
|
@fields = {}
|
14
|
-
add_field(2, :Z, path_name,
|
15
|
-
add_field(3, :Z,
|
16
|
-
add_field(4, :Z,
|
15
|
+
add_field(2, :Z, path_name, /[!-)+-<>-~][!-~]*/)
|
16
|
+
add_field(3, :Z, segment_names, /[!-)+-<>-~][!-~]*/)
|
17
|
+
add_field(4, :Z, overlaps, /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/)
|
17
18
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
18
19
|
end
|
20
|
+
|
21
|
+
##
|
22
|
+
# Array of segment names (without orientations) as strings
|
23
|
+
def segment_names_a
|
24
|
+
segment_names.value.split(/[,;]/).map { |i| i.gsub(/[+-]$/, '') }
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
29
|
+
def segments(gfa)
|
30
|
+
raise "Unindexed GFA" unless gfa.indexed?
|
31
|
+
segment_names_a.map do |name|
|
32
|
+
gfa.segments[name]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Includes a GFA::Record::Segment +segment+?
|
38
|
+
def include?(segment)
|
39
|
+
# unless segment.is_a? GFA::Record::Segment
|
40
|
+
# raise "Unrecognized class: #{segment.class}"
|
41
|
+
# end
|
42
|
+
|
43
|
+
segment_names_a.any? { |name| segment.name == name }
|
44
|
+
end
|
19
45
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -8,18 +8,22 @@ class GFA::Record::Segment < GFA::Record
|
|
8
8
|
KC: :i, # k-mer count
|
9
9
|
SH: :H, # SHA-256 checksum of the sequence
|
10
10
|
UR: :Z, # URI or local file-system path of the sequence
|
11
|
-
# Non-cannonical
|
12
|
-
DP: :f
|
11
|
+
# Non-cannonical but uppercase (thus, reserved)
|
12
|
+
DP: :f, # SAM
|
13
|
+
SN: :Z, # rGFA: Name of stable sequence from which the segment is derived
|
14
|
+
SO: :i, # rGFA: Offset on the stable sequence
|
15
|
+
SR: :i # rGFA: Rank. 0 if on a linear reference genome; >0 otherwise
|
13
16
|
}
|
14
17
|
|
15
18
|
REQ_FIELDS.each_index do |i|
|
16
19
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
17
20
|
end
|
21
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
18
22
|
|
19
23
|
def initialize(name, sequence, *opt_fields)
|
20
24
|
@fields = {}
|
21
|
-
add_field(2, :Z, name,
|
22
|
-
add_field(3, :Z, sequence,
|
25
|
+
add_field(2, :Z, name, /[!-)+-<>-~][!-~]*/)
|
26
|
+
add_field(3, :Z, sequence, /\*|[A-Za-z=.]+/)
|
23
27
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
24
28
|
end
|
25
29
|
end
|
data/lib/gfa/record/walk.rb
CHANGED
@@ -9,12 +9,12 @@ class GFA::Record::Walk < GFA::Record
|
|
9
9
|
|
10
10
|
def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
|
11
11
|
@fields = {}
|
12
|
-
add_field(2, :Z, sample_id,
|
13
|
-
add_field(3, :i, hap_index,
|
14
|
-
add_field(4, :Z, seq_id,
|
15
|
-
add_field(5, :i, seq_start,
|
16
|
-
add_field(6, :i, seq_end,
|
17
|
-
add_field(7, :Z, walk,
|
12
|
+
add_field(2, :Z, sample_id, /[!-)+-<>-~][!-~]*/)
|
13
|
+
add_field(3, :i, hap_index, /[0-9]+/)
|
14
|
+
add_field(4, :Z, seq_id, /[!-)+-<>-~][!-~]*/)
|
15
|
+
add_field(5, :i, seq_start, /\*|[0-9]+/)
|
16
|
+
add_field(6, :i, seq_end, /\*|[0-9]+/)
|
17
|
+
add_field(7, :Z, walk, /([><][!-;=?-~]+)+/)
|
18
18
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
19
19
|
end
|
20
20
|
end
|