gfa 0.2.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +22 -18
- data/bin/gfa-add-gaf +70 -0
- data/bin/gfa-subgraph +41 -0
- data/lib/gfa/common.rb +33 -8
- data/lib/gfa/field/char.rb +2 -1
- data/lib/gfa/field/float.rb +18 -1
- data/lib/gfa/field/hex.rb +18 -1
- data/lib/gfa/field/json.rb +10 -1
- data/lib/gfa/field/numarray.rb +29 -4
- data/lib/gfa/field/sigint.rb +14 -1
- data/lib/gfa/field/string.rb +10 -1
- data/lib/gfa/field.rb +82 -10
- data/lib/gfa/generator.rb +3 -3
- data/lib/gfa/graph.rb +139 -4
- data/lib/gfa/parser.rb +78 -22
- data/lib/gfa/record/comment.rb +7 -2
- data/lib/gfa/record/containment.rb +12 -7
- data/lib/gfa/record/has_from_to.rb +47 -0
- data/lib/gfa/record/header.rb +2 -0
- data/lib/gfa/record/jump.rb +11 -30
- data/lib/gfa/record/link.rb +11 -29
- data/lib/gfa/record/path.rb +32 -6
- data/lib/gfa/record/segment.rb +8 -4
- data/lib/gfa/record/walk.rb +6 -6
- data/lib/gfa/record.rb +34 -14
- data/lib/gfa/record_set/comment_set.rb +3 -0
- data/lib/gfa/record_set/containment_set.rb +4 -0
- data/lib/gfa/record_set/header_set.rb +3 -0
- data/lib/gfa/record_set/jump_set.rb +3 -0
- data/lib/gfa/record_set/link_set.rb +3 -0
- data/lib/gfa/record_set/path_set.rb +4 -0
- data/lib/gfa/record_set/segment_set.rb +4 -0
- data/lib/gfa/record_set/walk_set.rb +3 -0
- data/lib/gfa/record_set.rb +121 -0
- data/lib/gfa/version.rb +1 -1
- data/test/common_test.rb +5 -5
- data/test/field_test.rb +52 -26
- data/test/parser_test.rb +52 -13
- data/test/record_test.rb +7 -0
- data/test/test_helper.rb +5 -0
- metadata +18 -6
data/lib/gfa/graph.rb
CHANGED
@@ -2,7 +2,6 @@ require 'rgl/adjacency'
|
|
2
2
|
require 'rgl/implicit'
|
3
3
|
|
4
4
|
class GFA
|
5
|
-
|
6
5
|
##
|
7
6
|
# Generates a RGL::ImplicitGraph object describing the links in the GFA.
|
8
7
|
# The +opts+ argument is a hash with any of the following key-value pairs:
|
@@ -22,9 +21,145 @@ class GFA
|
|
22
21
|
def adjacency_graph(opts = {})
|
23
22
|
implicit_graph(opts).to_adjacency
|
24
23
|
end
|
25
|
-
|
24
|
+
|
25
|
+
##
|
26
|
+
# Extracts the subset of records associated to +segments+, which is an Array
|
27
|
+
# with values of any class in: Integer (segment index),
|
28
|
+
# String or GFA::Field::String (segment names), or GFA::Record::Segment.
|
29
|
+
#
|
30
|
+
# +degree+ indicates the maximum degree of separation between the original
|
31
|
+
# segment set and any additional segments. Use 0 to include only the segments
|
32
|
+
# in the set. Use 1 to include those, the records linking to them, and the
|
33
|
+
# additional segments linked by those records. Use any integer greater than 1
|
34
|
+
# to prompt additional rounds of greedy graph expansion.
|
35
|
+
#
|
36
|
+
# If +headers+, it includes all the original headers. Otherwise it only
|
37
|
+
# only includes the version header (might be inferred).
|
38
|
+
#
|
39
|
+
# All comments are ignored even if originally parsed. Walks are currently
|
40
|
+
# ignored too. If the current GFA object doesn't have an index, it builds one
|
41
|
+
# and forces +index: true+. The output object inherits all options.
|
42
|
+
def subgraph(segments, degree: 1, headers: true)
|
43
|
+
# Prepare objects
|
44
|
+
unless opts[:index]
|
45
|
+
opts[:index] = true
|
46
|
+
rebuild_index!
|
47
|
+
end
|
48
|
+
gfa = GFA.new(opts)
|
49
|
+
segments =
|
50
|
+
segments.map do |i|
|
51
|
+
i.is_a?(GFA::Record::Segment) ? i :
|
52
|
+
segment(i) or raise "Cannot find segment: #{i}"
|
53
|
+
end
|
54
|
+
|
55
|
+
# Headers
|
56
|
+
if headers
|
57
|
+
self.headers.set.each { |record| gfa << record }
|
58
|
+
else
|
59
|
+
gfa << GFA::Record::Header.new("VN:Z:#{gfa_version}")
|
60
|
+
end
|
61
|
+
|
62
|
+
# Original segments
|
63
|
+
segments.each { |segment| gfa << segment }
|
64
|
+
|
65
|
+
# Expand graph
|
66
|
+
linking, edges = linking_records(gfa.segments, degree: degree)
|
67
|
+
linking += internally_linking_records(segments, edges)
|
68
|
+
linking.each { |record| gfa << record }
|
69
|
+
|
70
|
+
# Return
|
71
|
+
gfa
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Finds all the records linking to any segments in +segments+, a
|
76
|
+
# GFA::RecordSet::SegmentSet object, and expands to links with up to
|
77
|
+
# +degree+ degrees of separation
|
78
|
+
#
|
79
|
+
# It only evaluates the edges given in the +edges+ Array of GFA::Record
|
80
|
+
# values. If +edges+ is +nil+, it uses the full set of edges in the gfa.
|
81
|
+
# Edge GFA::Record objects can be of type Link, Containment, Jump, or Path
|
82
|
+
#
|
83
|
+
# If +_ignore+ is passed, it ignores this number of segments at the beginning
|
84
|
+
# of the +segments+ set (assumes they have already been evaluated). This is
|
85
|
+
# only used for internal heuristics
|
86
|
+
#
|
87
|
+
# Returns an Array of with two elements:
|
88
|
+
# 0. An array of GFA::Record objects with all the identified linking records
|
89
|
+
# 1. An array of GFA::Record objects with all edges that were not identified
|
90
|
+
#
|
91
|
+
# IMPORTANT NOTE 1: The object +segments+ will be modified to include all
|
92
|
+
# linked segments. If you don't want this behaviour, please make sure to pass
|
93
|
+
# a duplicate of the object instead.
|
94
|
+
#
|
95
|
+
# IMPORTANT NOTE 2: The list of linking records may not comprehensively
|
96
|
+
# include all records linking the identified expanded segment set. To ensure
|
97
|
+
# a consistent set is identified, use:
|
98
|
+
# linking, edges = gfa.linking_records(segments)
|
99
|
+
# linking += gfa.internally_linking_records(segments, edges)
|
100
|
+
#
|
101
|
+
def linking_records(segments, degree: 1, edges: nil, _ignore: 0)
|
102
|
+
unless segments.is_a? GFA::RecordSet::SegmentSet
|
103
|
+
raise "Unrecognised class: #{segments.class}"
|
104
|
+
end
|
105
|
+
|
106
|
+
# Gather edges to evaluate
|
107
|
+
edges ||= all_edges
|
108
|
+
return [[], edges] if degree <= 0
|
109
|
+
|
110
|
+
# Links, Containments, Jumps (from, to) and Paths (segment_names)
|
111
|
+
linking = []
|
112
|
+
eval_set = _ignore == 0 ? segments.set : segments.set[_ignore..]
|
113
|
+
edges.delete_if do |record|
|
114
|
+
if eval_set.any? { |segment| record.include? segment }
|
115
|
+
linking << record
|
116
|
+
true # Remove from the edge set to speed up future recursions
|
117
|
+
else
|
118
|
+
false # Keep it, possibly linking future recursions
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Recurse and return
|
123
|
+
if degree >= 1
|
124
|
+
pre = segments.size
|
125
|
+
|
126
|
+
# Add additional linked segments
|
127
|
+
linking.each do |record|
|
128
|
+
record.segments(self).each do |other_seg|
|
129
|
+
segments << other_seg unless segments[other_seg.name]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Recurse only if new segments were discovered
|
134
|
+
if segments.size > pre
|
135
|
+
$stderr.puts "- Recursion [#{degree}]: " \
|
136
|
+
"#{pre} -> #{segments.size}\t(#{edges.size})"
|
137
|
+
linking +=
|
138
|
+
linking_records(
|
139
|
+
segments,
|
140
|
+
degree: degree - 1, edges: edges, _ignore: pre
|
141
|
+
)[0]
|
142
|
+
end
|
143
|
+
end
|
144
|
+
[linking, edges]
|
145
|
+
end
|
146
|
+
|
147
|
+
def internally_linking_records(segments, edges)
|
148
|
+
$stderr.puts '- Gathering internally linking records'
|
149
|
+
segments = Hash[segments.set.map { |i| [i.name.value, true]}]
|
150
|
+
edges.select { |record| record.segment_names_a.all? { |s| segments[s] } }
|
151
|
+
end
|
152
|
+
|
153
|
+
##
|
154
|
+
# Returns an array of GFA::Record objects including all possible edges
|
155
|
+
# from the GFA. I.e., all links, jumps, containments, and paths.
|
156
|
+
def all_edges
|
157
|
+
edge_t = %i[Link Jump Containment Path]
|
158
|
+
edges = edge_t.flat_map { |t| records[t].set } if edges.nil?
|
159
|
+
end
|
160
|
+
|
26
161
|
private
|
27
|
-
|
162
|
+
|
28
163
|
def segment_names_with_orient
|
29
164
|
segments.flat_map do |s|
|
30
165
|
%w[+ -].map { |orient| GFA::GraphVertex.idx(s, orient) }
|
@@ -57,7 +192,7 @@ class GFA
|
|
57
192
|
opts
|
58
193
|
end
|
59
194
|
|
60
|
-
def rgl_implicit_adjacent_iterator(x,b,opts)
|
195
|
+
def rgl_implicit_adjacent_iterator(x, b, opts)
|
61
196
|
links.each do |l|
|
62
197
|
if l.from?(x.segment, x.orient)
|
63
198
|
orient = opts[:orient] ? l.to_orient : nil
|
data/lib/gfa/parser.rb
CHANGED
@@ -4,43 +4,99 @@ class GFA
|
|
4
4
|
# Class-level
|
5
5
|
MIN_VERSION = '1.0'
|
6
6
|
MAX_VERSION = '1.2'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
|
8
|
+
##
|
9
|
+
# Load a GFA object from a gfa +file+ with options +opts+:
|
10
|
+
# - index: If the records should be indexed as loaded (default: true)
|
11
|
+
# - index_id: If the records should also be index by ID (default: false)
|
12
|
+
# - comments: If the comment records should be saved (default: false)
|
13
|
+
# - line_range: Two-integer array indicating the first and last lines to read
|
14
|
+
# (default: nil, read the entire file)
|
15
|
+
def self.load(file, opts = {})
|
16
|
+
gfa = GFA.new(opts)
|
17
|
+
read_records(file, opts) do |record|
|
18
|
+
gfa << record
|
19
|
+
end
|
13
20
|
gfa
|
14
21
|
end
|
15
|
-
|
22
|
+
|
23
|
+
def self.read_records(file, opts = {})
|
24
|
+
rng = opts[:line_range]
|
25
|
+
File.open(file, 'r') do |fh|
|
26
|
+
lno = -1
|
27
|
+
fh.each do |ln|
|
28
|
+
lno += 1
|
29
|
+
next if !rng.nil? && (lno < rng[0] || lno > rng[1])
|
30
|
+
next if !opts[:comments] && ln[0] == '#'
|
31
|
+
|
32
|
+
yield(GFA::Record[ln])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Load a GFA object from a gfa +file+ in parallel using +thr+ threads,
|
39
|
+
# and the same +opts+ supported by +load+. Defaults to the +load+ method
|
40
|
+
# instead if +thr <= 1+.
|
41
|
+
def self.load_parallel(file, thr, opts = {})
|
42
|
+
return self.load(file, opts) if thr <= 1
|
43
|
+
|
44
|
+
# Prepare data
|
45
|
+
lno = 0
|
46
|
+
File.open(file, 'r') { |fh| fh.each { lno += 1 } }
|
47
|
+
thr = lno if thr > lno
|
48
|
+
blk = (lno.to_f / thr).ceil
|
49
|
+
|
50
|
+
# Launch children processes
|
51
|
+
io = []
|
52
|
+
pid = []
|
53
|
+
thr.times do |i|
|
54
|
+
io[i] = IO.pipe
|
55
|
+
pid << fork do
|
56
|
+
io[i][0].close
|
57
|
+
o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
|
58
|
+
records = []
|
59
|
+
read_records(file, o) { |record| records << record }
|
60
|
+
Marshal.dump(records, io[i][1])
|
61
|
+
exit!(0)
|
62
|
+
end
|
63
|
+
io[i][1].close
|
64
|
+
end
|
65
|
+
|
66
|
+
# Collect and merge results
|
67
|
+
gfa = GFA.new(opts)
|
68
|
+
io.each_with_index do |pipe, k|
|
69
|
+
result = pipe[0].read
|
70
|
+
Process.wait(pid[k])
|
71
|
+
raise "Child process failed: #{k}" if result.empty?
|
72
|
+
Marshal.load(result).each { |record| gfa << record }
|
73
|
+
pipe[0].close
|
74
|
+
end
|
75
|
+
|
76
|
+
return gfa
|
77
|
+
end
|
78
|
+
|
16
79
|
def self.supported_version?(v)
|
17
80
|
v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
|
18
81
|
end
|
19
82
|
|
20
83
|
# Instance-level
|
21
84
|
def <<(obj)
|
22
|
-
obj =
|
85
|
+
obj = GFA::Record[obj] unless obj.is_a? GFA::Record
|
23
86
|
return if obj.nil? || obj.empty?
|
24
87
|
@records[obj.type] << obj
|
25
88
|
|
26
|
-
if obj.type == :Header && !obj.
|
27
|
-
set_gfa_version(obj.
|
89
|
+
if obj.type == :Header && !obj.VN.nil?
|
90
|
+
set_gfa_version(obj.VN.value)
|
28
91
|
end
|
29
92
|
end
|
30
93
|
|
31
94
|
def set_gfa_version(v)
|
32
|
-
|
33
|
-
unless GFA::supported_version?
|
34
|
-
raise "GFA version currently unsupported: #{v}
|
95
|
+
v = v.value if v.is_a? GFA::Field
|
96
|
+
unless GFA::supported_version? v
|
97
|
+
raise "GFA version currently unsupported: #{v}"
|
35
98
|
end
|
99
|
+
|
100
|
+
@gfa_version = v
|
36
101
|
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def parse_line(ln)
|
41
|
-
ln.chomp!
|
42
|
-
return nil if ln =~ /^\s*$/
|
43
|
-
cols = ln.split("\t")
|
44
|
-
GFA::Record.code_class(cols.shift).new(*cols)
|
45
|
-
end
|
46
102
|
end
|
data/lib/gfa/record/comment.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
class GFA::Record::Comment < GFA::Record
|
2
2
|
CODE = :'#'
|
3
|
-
REQ_FIELDS = []
|
3
|
+
REQ_FIELDS = %i[comment]
|
4
4
|
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
5
9
|
|
6
|
-
def initialize(*opt_fields)
|
10
|
+
def initialize(comment, *opt_fields)
|
7
11
|
@fields = {}
|
12
|
+
add_field(2, :Z, comment, /.*/)
|
8
13
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
9
14
|
end
|
10
15
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'gfa/record/has_from_to'
|
2
|
+
|
1
3
|
class GFA::Record::Containment < GFA::Record
|
2
4
|
CODE = :C
|
3
5
|
REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
|
@@ -10,20 +12,23 @@ class GFA::Record::Containment < GFA::Record
|
|
10
12
|
REQ_FIELDS.each_index do |i|
|
11
13
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
12
14
|
end
|
15
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
16
|
+
|
17
|
+
include GFA::Record::HasFromTo
|
13
18
|
|
14
19
|
alias container from
|
15
20
|
alias container_orient from_orient
|
16
21
|
alias contained to
|
17
22
|
alias contained_orient to_orient
|
18
|
-
|
23
|
+
|
19
24
|
def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
|
20
25
|
@fields = {}
|
21
|
-
add_field(2, :Z, from,
|
22
|
-
add_field(3, :Z, from_orient,
|
23
|
-
add_field(4, :Z, to,
|
24
|
-
add_field(5, :Z, to_orient,
|
25
|
-
add_field(6, :i, pos,
|
26
|
-
add_field(7, :Z, overlap,
|
26
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
27
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
28
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
29
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
30
|
+
add_field(6, :i, pos, /[0-9]*/)
|
31
|
+
add_field(7, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
27
32
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
28
33
|
end
|
29
34
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module GFA::Record::HasFromTo
|
2
|
+
def from?(segment, orient = nil)
|
3
|
+
links_from_to?(segment, orient, true)
|
4
|
+
end
|
5
|
+
|
6
|
+
def to?(segment, orient = nil)
|
7
|
+
links_from_to?(segment, orient, false)
|
8
|
+
end
|
9
|
+
|
10
|
+
##
|
11
|
+
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
12
|
+
def segments(gfa)
|
13
|
+
raise "Unindexed GFA" unless gfa.indexed?
|
14
|
+
[gfa.segments[from.value], gfa.segments[to.value]]
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# Include a GFA::Record::Segment +segment+?
|
19
|
+
def include?(segment)
|
20
|
+
# unless segment.is_a? GFA::Record::Segment
|
21
|
+
# raise "Unrecognized class: #{segment.class}"
|
22
|
+
# end
|
23
|
+
segment.name == from || segment.name == to
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Array of strings with the names of the segments linked by the
|
28
|
+
# record
|
29
|
+
def segment_names_a
|
30
|
+
[from.value, to.value]
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def links_from_to?(segment, orient, from)
|
36
|
+
segment = segment_name(segment)
|
37
|
+
orient = orient.value if orient.is_a? GFA::Field
|
38
|
+
base_k = from ? 2 : 4
|
39
|
+
segment == fields[base_k].value &&
|
40
|
+
(orient.nil? || orient == fields[base_k + 1].value)
|
41
|
+
end
|
42
|
+
|
43
|
+
def segment_name(segment)
|
44
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
45
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
46
|
+
end
|
47
|
+
end
|
data/lib/gfa/record/header.rb
CHANGED
data/lib/gfa/record/jump.rb
CHANGED
@@ -1,45 +1,26 @@
|
|
1
|
+
require 'gfa/record/has_from_to'
|
2
|
+
|
1
3
|
class GFA::Record::Jump < GFA::Record
|
2
4
|
CODE = :J
|
3
5
|
REQ_FIELDS = %i[from from_orient to to_orient distance]
|
4
6
|
OPT_FIELDS = {
|
5
7
|
SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
|
6
8
|
}
|
7
|
-
|
9
|
+
|
8
10
|
REQ_FIELDS.each_index do |i|
|
9
11
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
10
12
|
end
|
13
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
14
|
+
|
15
|
+
include GFA::Record::HasFromTo
|
11
16
|
|
12
17
|
def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
|
13
18
|
@fields = {}
|
14
|
-
add_field(2, :Z, from,
|
15
|
-
add_field(3, :Z, from_orient,
|
16
|
-
add_field(4, :Z, to,
|
17
|
-
add_field(5, :Z, to_orient,
|
18
|
-
add_field(6, :Z, distance,
|
19
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
20
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
21
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
22
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
23
|
+
add_field(6, :Z, distance, /\*|[-+]?[0-9]+/)
|
19
24
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
20
25
|
end
|
21
|
-
|
22
|
-
|
23
|
-
def from?(segment, orient = nil)
|
24
|
-
links_from_to?(segment, orient, true)
|
25
|
-
end
|
26
|
-
|
27
|
-
def to?(segment, orient = nil)
|
28
|
-
links_from_to?(segment, orient, false)
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def links_from_to?(segment, orient, from)
|
34
|
-
segment = segment_name(segment)
|
35
|
-
orient = orient.value if orient.is_a? GFA::Field
|
36
|
-
base_k = from ? 2 : 4
|
37
|
-
segment==fields[base_k].value &&
|
38
|
-
(orient.nil? || orient==fields[base_k + 1].value)
|
39
|
-
end
|
40
|
-
|
41
|
-
def segment_name(segment)
|
42
|
-
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
43
|
-
segment.is_a?(GFA::Field) ? segment.value : segment
|
44
|
-
end
|
45
26
|
end
|
data/lib/gfa/record/link.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'gfa/record/has_from_to'
|
2
|
+
|
1
3
|
class GFA::Record::Link < GFA::Record
|
2
4
|
CODE = :L
|
3
5
|
REQ_FIELDS = %i[from from_orient to to_orient overlap]
|
@@ -9,41 +11,21 @@ class GFA::Record::Link < GFA::Record
|
|
9
11
|
KC: :i, # k-mer count
|
10
12
|
ID: :Z # Edge identifier
|
11
13
|
}
|
12
|
-
|
14
|
+
|
13
15
|
REQ_FIELDS.each_index do |i|
|
14
16
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
15
17
|
end
|
18
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
19
|
+
|
20
|
+
include GFA::Record::HasFromTo
|
16
21
|
|
17
22
|
def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
|
18
23
|
@fields = {}
|
19
|
-
add_field(2, :Z, from,
|
20
|
-
add_field(3, :Z, from_orient,
|
21
|
-
add_field(4, :Z, to,
|
22
|
-
add_field(5, :Z, to_orient,
|
23
|
-
add_field(6, :Z, overlap,
|
24
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
25
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
26
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
27
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
28
|
+
add_field(6, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
24
29
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
25
30
|
end
|
26
|
-
|
27
|
-
def from?(segment, orient = nil)
|
28
|
-
links_from_to?(segment, orient, true)
|
29
|
-
end
|
30
|
-
|
31
|
-
def to?(segment, orient = nil)
|
32
|
-
links_from_to?(segment, orient, false)
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def links_from_to?(segment, orient, from)
|
38
|
-
segment = segment_name(segment)
|
39
|
-
orient = orient.value if orient.is_a? GFA::Field
|
40
|
-
base_k = from ? 2 : 4
|
41
|
-
segment==fields[base_k].value &&
|
42
|
-
(orient.nil? || orient==fields[base_k + 1].value)
|
43
|
-
end
|
44
|
-
|
45
|
-
def segment_name(segment)
|
46
|
-
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
47
|
-
segment.is_a?(GFA::Field) ? segment.value : segment
|
48
|
-
end
|
49
31
|
end
|
data/lib/gfa/record/path.rb
CHANGED
@@ -1,19 +1,45 @@
|
|
1
1
|
class GFA::Record::Path < GFA::Record
|
2
2
|
CODE = :P
|
3
|
-
REQ_FIELDS = %i[path_name
|
3
|
+
REQ_FIELDS = %i[path_name segment_names overlaps]
|
4
4
|
OPT_FIELDS = {}
|
5
5
|
|
6
6
|
REQ_FIELDS.each_index do |i|
|
7
7
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
8
|
end
|
9
9
|
|
10
|
-
alias
|
10
|
+
alias segment_name segment_names
|
11
|
+
alias cigar overlaps
|
11
12
|
|
12
|
-
def initialize(path_name,
|
13
|
+
def initialize(path_name, segment_names, overlaps, *opt_fields)
|
13
14
|
@fields = {}
|
14
|
-
add_field(2, :Z, path_name,
|
15
|
-
add_field(3, :Z,
|
16
|
-
add_field(4, :Z,
|
15
|
+
add_field(2, :Z, path_name, /[!-)+-<>-~][!-~]*/)
|
16
|
+
add_field(3, :Z, segment_names, /[!-)+-<>-~][!-~]*/)
|
17
|
+
add_field(4, :Z, overlaps, /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/)
|
17
18
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
18
19
|
end
|
20
|
+
|
21
|
+
##
|
22
|
+
# Array of segment names (without orientations) as strings
|
23
|
+
def segment_names_a
|
24
|
+
segment_names.value.split(/[,;]/).map { |i| i.gsub(/[+-]$/, '') }
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
29
|
+
def segments(gfa)
|
30
|
+
raise "Unindexed GFA" unless gfa.indexed?
|
31
|
+
segment_names_a.map do |name|
|
32
|
+
gfa.segments[name]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Includes a GFA::Record::Segment +segment+?
|
38
|
+
def include?(segment)
|
39
|
+
# unless segment.is_a? GFA::Record::Segment
|
40
|
+
# raise "Unrecognized class: #{segment.class}"
|
41
|
+
# end
|
42
|
+
|
43
|
+
segment_names_a.any? { |name| segment.name == name }
|
44
|
+
end
|
19
45
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -8,18 +8,22 @@ class GFA::Record::Segment < GFA::Record
|
|
8
8
|
KC: :i, # k-mer count
|
9
9
|
SH: :H, # SHA-256 checksum of the sequence
|
10
10
|
UR: :Z, # URI or local file-system path of the sequence
|
11
|
-
# Non-cannonical
|
12
|
-
DP: :f
|
11
|
+
# Non-cannonical but uppercase (thus, reserved)
|
12
|
+
DP: :f, # SAM
|
13
|
+
SN: :Z, # rGFA: Name of stable sequence from which the segment is derived
|
14
|
+
SO: :i, # rGFA: Offset on the stable sequence
|
15
|
+
SR: :i # rGFA: Rank. 0 if on a linear reference genome; >0 otherwise
|
13
16
|
}
|
14
17
|
|
15
18
|
REQ_FIELDS.each_index do |i|
|
16
19
|
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
17
20
|
end
|
21
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
18
22
|
|
19
23
|
def initialize(name, sequence, *opt_fields)
|
20
24
|
@fields = {}
|
21
|
-
add_field(2, :Z, name,
|
22
|
-
add_field(3, :Z, sequence,
|
25
|
+
add_field(2, :Z, name, /[!-)+-<>-~][!-~]*/)
|
26
|
+
add_field(3, :Z, sequence, /\*|[A-Za-z=.]+/)
|
23
27
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
24
28
|
end
|
25
29
|
end
|
data/lib/gfa/record/walk.rb
CHANGED
@@ -9,12 +9,12 @@ class GFA::Record::Walk < GFA::Record
|
|
9
9
|
|
10
10
|
def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
|
11
11
|
@fields = {}
|
12
|
-
add_field(2, :Z, sample_id,
|
13
|
-
add_field(3, :i, hap_index,
|
14
|
-
add_field(4, :Z, seq_id,
|
15
|
-
add_field(5, :i, seq_start,
|
16
|
-
add_field(6, :i, seq_end,
|
17
|
-
add_field(7, :Z, walk,
|
12
|
+
add_field(2, :Z, sample_id, /[!-)+-<>-~][!-~]*/)
|
13
|
+
add_field(3, :i, hap_index, /[0-9]+/)
|
14
|
+
add_field(4, :Z, seq_id, /[!-)+-<>-~][!-~]*/)
|
15
|
+
add_field(5, :i, seq_start, /\*|[0-9]+/)
|
16
|
+
add_field(6, :i, seq_end, /\*|[0-9]+/)
|
17
|
+
add_field(7, :Z, walk, /([><][!-;=?-~]+)+/)
|
18
18
|
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
19
19
|
end
|
20
20
|
end
|