gfa 0.1.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +8 -2
- data/LICENSE +2 -2
- data/README.md +23 -15
- data/Rakefile +9 -8
- data/lib/gfa/common.rb +39 -42
- data/lib/gfa/field/char.rb +2 -2
- data/lib/gfa/field/float.rb +17 -1
- data/lib/gfa/field/hex.rb +17 -1
- data/lib/gfa/field/json.rb +18 -0
- data/lib/gfa/field/numarray.rb +38 -5
- data/lib/gfa/field/sigint.rb +14 -2
- data/lib/gfa/field/string.rb +11 -3
- data/lib/gfa/field.rb +89 -19
- data/lib/gfa/generator.rb +32 -28
- data/lib/gfa/graph.rb +14 -16
- data/lib/gfa/parser.rb +31 -22
- data/lib/gfa/record/comment.rb +15 -0
- data/lib/gfa/record/containment.rb +19 -13
- data/lib/gfa/record/header.rb +11 -10
- data/lib/gfa/record/jump.rb +45 -0
- data/lib/gfa/record/link.rb +42 -42
- data/lib/gfa/record/path.rb +14 -13
- data/lib/gfa/record/segment.rb +22 -18
- data/lib/gfa/record/walk.rb +20 -0
- data/lib/gfa/record.rb +53 -33
- data/lib/gfa/record_set/comment_set.rb +3 -0
- data/lib/gfa/record_set/containment_set.rb +4 -0
- data/lib/gfa/record_set/header_set.rb +3 -0
- data/lib/gfa/record_set/jump_set.rb +3 -0
- data/lib/gfa/record_set/link_set.rb +3 -0
- data/lib/gfa/record_set/path_set.rb +4 -0
- data/lib/gfa/record_set/segment_set.rb +4 -0
- data/lib/gfa/record_set/walk_set.rb +3 -0
- data/lib/gfa/record_set.rb +99 -0
- data/lib/gfa/version.rb +1 -1
- data/lib/gfa.rb +4 -4
- data/test/common_test.rb +5 -5
- data/test/field_test.rb +52 -26
- data/test/parser_test.rb +57 -20
- data/test/record_test.rb +7 -0
- data/test/test_helper.rb +10 -5
- metadata +33 -21
data/lib/gfa/graph.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'rgl/adjacency'
|
2
|
+
require 'rgl/implicit'
|
3
3
|
|
4
4
|
class GFA
|
5
|
-
|
6
5
|
##
|
7
6
|
# Generates a RGL::ImplicitGraph object describing the links in the GFA.
|
8
7
|
# The +opts+ argument is a hash with any of the following key-value pairs:
|
@@ -11,7 +10,7 @@ class GFA
|
|
11
10
|
# true.
|
12
11
|
# * :directed => bool. If false, ignores direction of the links. By defaut
|
13
12
|
# the same value as :orient.
|
14
|
-
def implicit_graph(opts={})
|
13
|
+
def implicit_graph(opts = {})
|
15
14
|
rgl_implicit_graph(opts)
|
16
15
|
end
|
17
16
|
|
@@ -19,7 +18,7 @@ class GFA
|
|
19
18
|
# Generates a RGL::DirectedAdjacencyGraph or RGL::AdjacencyGraph object.
|
20
19
|
# The +opts+ argument is a hash with the same supported key-value pairs as
|
21
20
|
# in #implicit_graph.
|
22
|
-
def adjacency_graph(opts={})
|
21
|
+
def adjacency_graph(opts = {})
|
23
22
|
implicit_graph(opts).to_adjacency
|
24
23
|
end
|
25
24
|
|
@@ -27,7 +26,7 @@ class GFA
|
|
27
26
|
|
28
27
|
def segment_names_with_orient
|
29
28
|
segments.flat_map do |s|
|
30
|
-
%w[+ -].map{ |orient| GFA::GraphVertex.idx(s, orient) }
|
29
|
+
%w[+ -].map { |orient| GFA::GraphVertex.idx(s, orient) }
|
31
30
|
end.to_set
|
32
31
|
end
|
33
32
|
|
@@ -44,8 +43,8 @@ class GFA
|
|
44
43
|
(opts[:orient] ? segment_names_with_orient :
|
45
44
|
segment_names).each(&b)
|
46
45
|
end
|
47
|
-
g.adjacent_iterator do |x,b|
|
48
|
-
rgl_implicit_adjacent_iterator(x,b,opts)
|
46
|
+
g.adjacent_iterator do |x, b|
|
47
|
+
rgl_implicit_adjacent_iterator(x, b, opts)
|
49
48
|
end
|
50
49
|
g.directed = opts[:directed]
|
51
50
|
end
|
@@ -57,25 +56,25 @@ class GFA
|
|
57
56
|
opts
|
58
57
|
end
|
59
58
|
|
60
|
-
def rgl_implicit_adjacent_iterator(x,b,opts)
|
59
|
+
def rgl_implicit_adjacent_iterator(x, b, opts)
|
61
60
|
links.each do |l|
|
62
61
|
if l.from?(x.segment, x.orient)
|
63
62
|
orient = opts[:orient] ? l.to_orient : nil
|
64
63
|
b.call(GFA::GraphVertex.idx(l.to, orient))
|
65
|
-
elsif opts[:orient]
|
64
|
+
elsif opts[:orient] && l.to?(x.segment, orient_rc(x.orient))
|
66
65
|
orient = orient_rc(l.from_orient.value)
|
67
66
|
b.call(GFA::GraphVertex.idx(l.from, orient))
|
68
67
|
end
|
69
68
|
end
|
70
69
|
end
|
71
70
|
|
72
|
-
def orient_rc(o)
|
73
|
-
|
71
|
+
def orient_rc(o)
|
72
|
+
o == '+' ? '-' : '+'
|
73
|
+
end
|
74
74
|
end
|
75
75
|
|
76
76
|
|
77
77
|
class GFA::GraphVertex # :nodoc:
|
78
|
-
|
79
78
|
# Class-level
|
80
79
|
@@idx = {}
|
81
80
|
def self.idx(segment, orient)
|
@@ -83,10 +82,10 @@ class GFA::GraphVertex # :nodoc:
|
|
83
82
|
@@idx[n.to_s] ||= n
|
84
83
|
@@idx[n.to_s]
|
85
84
|
end
|
86
|
-
|
85
|
+
|
87
86
|
# Instance-level
|
88
87
|
attr :segment, :orient
|
89
|
-
|
88
|
+
|
90
89
|
def initialize(segment, orient)
|
91
90
|
@segment = segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
92
91
|
segment.is_a?(GFA::Field) ? segment.value : segment
|
@@ -96,5 +95,4 @@ class GFA::GraphVertex # :nodoc:
|
|
96
95
|
def to_s
|
97
96
|
"#{segment}#{orient}"
|
98
97
|
end
|
99
|
-
|
100
98
|
end
|
data/lib/gfa/parser.rb
CHANGED
@@ -1,18 +1,23 @@
|
|
1
|
-
require
|
1
|
+
require 'gfa/record'
|
2
2
|
|
3
3
|
class GFA
|
4
4
|
# Class-level
|
5
|
-
MIN_VERSION =
|
6
|
-
MAX_VERSION =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
MIN_VERSION = '1.0'
|
6
|
+
MAX_VERSION = '1.2'
|
7
|
+
|
8
|
+
##
|
9
|
+
# Load a GFA object from a +gfa+ file with options +opts+:
|
10
|
+
# - index: If the records should be indexed as loaded (default: true)
|
11
|
+
# - comments: If the comment records should be saved (default: false)
|
12
|
+
def self.load(file, opts = {})
|
13
|
+
gfa = GFA.new(opts)
|
14
|
+
fh = File.open(file, 'r')
|
11
15
|
fh.each { |ln| gfa << ln }
|
12
|
-
fh.close
|
13
16
|
gfa
|
17
|
+
ensure
|
18
|
+
fh&.close
|
14
19
|
end
|
15
|
-
|
20
|
+
|
16
21
|
def self.supported_version?(v)
|
17
22
|
v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
|
18
23
|
end
|
@@ -20,26 +25,30 @@ class GFA
|
|
20
25
|
# Instance-level
|
21
26
|
def <<(obj)
|
22
27
|
obj = parse_line(obj) unless obj.is_a? GFA::Record
|
23
|
-
return if obj.nil?
|
28
|
+
return if obj.nil? || obj.empty?
|
24
29
|
@records[obj.type] << obj
|
25
|
-
|
26
|
-
|
30
|
+
|
31
|
+
if obj.type == :Header && !obj.VN.nil?
|
32
|
+
set_gfa_version(obj.VN.value)
|
27
33
|
end
|
28
34
|
end
|
29
35
|
|
30
36
|
def set_gfa_version(v)
|
37
|
+
v = v.value if v.is_a? GFA::Field
|
38
|
+
unless GFA::supported_version? v
|
39
|
+
raise "GFA version currently unsupported: #{v}"
|
40
|
+
end
|
41
|
+
|
31
42
|
@gfa_version = v
|
32
|
-
raise "GFA version currently unsupported: #{v}." unless
|
33
|
-
GFA::supported_version? gfa_version
|
34
43
|
end
|
35
|
-
|
44
|
+
|
36
45
|
private
|
37
|
-
|
38
|
-
def parse_line(ln)
|
39
|
-
ln.chomp!
|
40
|
-
return nil if ln =~ /^\s*$/
|
41
|
-
cols = ln.split("\t")
|
42
|
-
GFA::Record.code_class(cols.shift).new(*cols)
|
43
|
-
end
|
44
46
|
|
47
|
+
def parse_line(string)
|
48
|
+
string = string.chomp
|
49
|
+
return nil if string =~ /^\s*$/
|
50
|
+
return nil if !opts[:comments] && string[0] == '#'
|
51
|
+
|
52
|
+
GFA::Record[string]
|
53
|
+
end
|
45
54
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class GFA::Record::Comment < GFA::Record
|
2
|
+
CODE = :'#'
|
3
|
+
REQ_FIELDS = %i[comment]
|
4
|
+
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(comment, *opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
add_field(2, :Z, comment, /.*/)
|
13
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
14
|
+
end
|
15
|
+
end
|
@@ -1,24 +1,30 @@
|
|
1
1
|
class GFA::Record::Containment < GFA::Record
|
2
2
|
CODE = :C
|
3
|
-
REQ_FIELDS = [
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
|
4
4
|
OPT_FIELDS = {
|
5
|
-
:
|
6
|
-
:
|
5
|
+
RC: :i, # Read coverage
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
ID: :Z # Edge identifier
|
7
8
|
}
|
8
9
|
|
9
10
|
REQ_FIELDS.each_index do |i|
|
10
|
-
define_method(REQ_FIELDS[i]) { fields[i+2] }
|
11
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
11
12
|
end
|
12
|
-
|
13
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
14
|
+
|
15
|
+
alias container from
|
16
|
+
alias container_orient from_orient
|
17
|
+
alias contained to
|
18
|
+
alias contained_orient to_orient
|
19
|
+
|
13
20
|
def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
|
14
21
|
@fields = {}
|
15
|
-
add_field(2, :Z, from,
|
16
|
-
add_field(3, :Z, from_orient,
|
17
|
-
add_field(4, :Z, to,
|
18
|
-
add_field(5, :Z, to_orient,
|
19
|
-
add_field(6, :i, pos,
|
20
|
-
add_field(7, :Z, overlap,
|
21
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
22
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
23
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
24
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
25
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
26
|
+
add_field(6, :i, pos, /[0-9]*/)
|
27
|
+
add_field(7, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
28
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
22
29
|
end
|
23
|
-
|
24
30
|
end
|
data/lib/gfa/record/header.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
class GFA::Record::Header < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(*opt_fields)
|
9
|
-
@fields = {}
|
10
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
11
|
-
end
|
2
|
+
CODE = :H
|
3
|
+
REQ_FIELDS = []
|
4
|
+
OPT_FIELDS = {
|
5
|
+
VN: :Z # Version number
|
6
|
+
}
|
12
7
|
|
8
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
9
|
+
|
10
|
+
def initialize(*opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
13
|
+
end
|
13
14
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class GFA::Record::Jump < GFA::Record
|
2
|
+
CODE = :J
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient distance]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
|
6
|
+
}
|
7
|
+
|
8
|
+
REQ_FIELDS.each_index do |i|
|
9
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
10
|
+
end
|
11
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
12
|
+
|
13
|
+
def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
|
14
|
+
@fields = {}
|
15
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
16
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
17
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
18
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
19
|
+
add_field(6, :Z, distance, /\*|[-+]?[0-9]+/)
|
20
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
21
|
+
end
|
22
|
+
|
23
|
+
def from?(segment, orient = nil)
|
24
|
+
links_from_to?(segment, orient, true)
|
25
|
+
end
|
26
|
+
|
27
|
+
def to?(segment, orient = nil)
|
28
|
+
links_from_to?(segment, orient, false)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def links_from_to?(segment, orient, from)
|
34
|
+
segment = segment_name(segment)
|
35
|
+
orient = orient.value if orient.is_a? GFA::Field
|
36
|
+
base_k = from ? 2 : 4
|
37
|
+
segment==fields[base_k].value &&
|
38
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
39
|
+
end
|
40
|
+
|
41
|
+
def segment_name(segment)
|
42
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
43
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
44
|
+
end
|
45
|
+
end
|
data/lib/gfa/record/link.rb
CHANGED
@@ -1,50 +1,50 @@
|
|
1
1
|
class GFA::Record::Link < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
REQ_FIELDS.each_index do |i|
|
13
|
-
define_method(REQ_FIELDS[i]) { fields[i+2] }
|
14
|
-
end
|
2
|
+
CODE = :L
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient overlap]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
MQ: :i, # Mapping quality
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
EC: :i, # Read count
|
8
|
+
FC: :i, # Fragment count
|
9
|
+
KC: :i, # k-mer count
|
10
|
+
ID: :Z # Edge identifier
|
11
|
+
}
|
15
12
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
|
21
|
-
add_field(5, :Z, to_orient, /^+|-$/)
|
22
|
-
add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
23
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
24
|
-
end
|
13
|
+
REQ_FIELDS.each_index do |i|
|
14
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
15
|
+
end
|
16
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
25
17
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
18
|
+
def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
|
19
|
+
@fields = {}
|
20
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
21
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
22
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
23
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
24
|
+
add_field(6, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
25
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
26
|
+
end
|
34
27
|
|
35
|
-
|
28
|
+
def from?(segment, orient = nil)
|
29
|
+
links_from_to?(segment, orient, true)
|
30
|
+
end
|
36
31
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
base_k = from ? 2 : 4
|
41
|
-
segment==fields[base_k].value and
|
42
|
-
(orient.nil? or orient==fields[base_k + 1].value)
|
43
|
-
end
|
32
|
+
def to?(segment, orient = nil)
|
33
|
+
links_from_to?(segment, orient, false)
|
34
|
+
end
|
44
35
|
|
45
|
-
|
46
|
-
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
47
|
-
segment.is_a?(GFA::Field) ? segment.value : segment
|
48
|
-
end
|
36
|
+
private
|
49
37
|
|
38
|
+
def links_from_to?(segment, orient, from)
|
39
|
+
segment = segment_name(segment)
|
40
|
+
orient = orient.value if orient.is_a? GFA::Field
|
41
|
+
base_k = from ? 2 : 4
|
42
|
+
segment==fields[base_k].value &&
|
43
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
44
|
+
end
|
45
|
+
|
46
|
+
def segment_name(segment)
|
47
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
48
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
49
|
+
end
|
50
50
|
end
|
data/lib/gfa/record/path.rb
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
class GFA::Record::Path < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
CODE = :P
|
3
|
+
REQ_FIELDS = %i[path_name segment_name overlaps]
|
4
|
+
OPT_FIELDS = {}
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
@fields = {}
|
12
|
-
add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
|
13
|
-
add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
|
14
|
-
add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
15
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
16
|
-
end
|
10
|
+
alias cigar overlaps
|
17
11
|
|
12
|
+
def initialize(path_name, segment_name, overlaps, *opt_fields)
|
13
|
+
@fields = {}
|
14
|
+
add_field(2, :Z, path_name, /[!-)+-<>-~][!-~]*/)
|
15
|
+
add_field(3, :Z, segment_name, /[!-)+-<>-~][!-~]*/)
|
16
|
+
add_field(4, :Z, overlaps, /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/)
|
17
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
18
|
+
end
|
18
19
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -1,22 +1,26 @@
|
|
1
1
|
class GFA::Record::Segment < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
CODE = :S
|
3
|
+
REQ_FIELDS = %i[name sequence]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
LN: :i, # Segment length
|
6
|
+
RC: :i, # Read count
|
7
|
+
FC: :i, # Fragment count
|
8
|
+
KC: :i, # k-mer count
|
9
|
+
SH: :H, # SHA-256 checksum of the sequence
|
10
|
+
UR: :Z, # URI or local file-system path of the sequence
|
11
|
+
# Non-cannonical
|
12
|
+
DP: :f # (From SAM)
|
13
|
+
}
|
10
14
|
|
11
|
-
|
12
|
-
|
13
|
-
|
15
|
+
REQ_FIELDS.each_index do |i|
|
16
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
17
|
+
end
|
18
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
14
19
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
def initialize(name, sequence, *opt_fields)
|
21
|
+
@fields = {}
|
22
|
+
add_field(2, :Z, name, /[!-)+-<>-~][!-~]*/)
|
23
|
+
add_field(3, :Z, sequence, /\*|[A-Za-z=.]+/)
|
24
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
25
|
+
end
|
22
26
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class GFA::Record::Walk < GFA::Record
|
2
|
+
CODE = :W
|
3
|
+
REQ_FIELDS = %i[sample_id hap_index seq_id seq_start seq_end walk]
|
4
|
+
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
add_field(2, :Z, sample_id, /[!-)+-<>-~][!-~]*/)
|
13
|
+
add_field(3, :i, hap_index, /[0-9]+/)
|
14
|
+
add_field(4, :Z, seq_id, /[!-)+-<>-~][!-~]*/)
|
15
|
+
add_field(5, :i, seq_start, /\*|[0-9]+/)
|
16
|
+
add_field(6, :i, seq_end, /\*|[0-9]+/)
|
17
|
+
add_field(7, :Z, walk, /([><][!-;=?-~]+)+/)
|
18
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
19
|
+
end
|
20
|
+
end
|
data/lib/gfa/record.rb
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
class GFA::Record
|
2
|
-
|
3
2
|
# Class-level
|
4
|
-
|
5
3
|
CODES = {
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
4
|
+
:'#' => :Comment,
|
5
|
+
H: :Header,
|
6
|
+
S: :Segment,
|
7
|
+
L: :Link,
|
8
|
+
J: :Jump, # Since 1.2
|
9
|
+
C: :Containment,
|
10
|
+
P: :Path,
|
11
|
+
W: :Walk # Since 1.1
|
11
12
|
}
|
12
13
|
REQ_FIELDS = []
|
13
14
|
OPT_FIELDS = {}
|
14
15
|
TYPES = CODES.values
|
15
|
-
|
16
16
|
TYPES.each { |t| require "gfa/record/#{t.downcase}" }
|
17
17
|
|
18
|
-
[
|
18
|
+
%i[CODES REQ_FIELDS OPT_FIELDS TYPES].each do |x|
|
19
19
|
define_singleton_method(x) { const_get(x) }
|
20
20
|
end
|
21
21
|
|
@@ -29,32 +29,46 @@ class GFA::Record
|
|
29
29
|
const_get(name)
|
30
30
|
end
|
31
31
|
|
32
|
+
def self.[](string)
|
33
|
+
split = string[0] == '#' ? ['', 2] : ["\t", 0]
|
34
|
+
code, *values = string.chomp.split(*split)
|
35
|
+
code_class(code).new(*values)
|
36
|
+
end
|
37
|
+
|
32
38
|
# Instance-level
|
33
39
|
|
34
40
|
attr :fields
|
35
41
|
|
36
|
-
def [](k)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
def
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
def [](k)
|
43
|
+
fields[k]
|
44
|
+
end
|
45
|
+
|
46
|
+
def type
|
47
|
+
CODES[code]
|
48
|
+
end
|
49
|
+
|
50
|
+
def code
|
51
|
+
self.class.const_get(:CODE)
|
52
|
+
end
|
53
|
+
|
54
|
+
def empty?
|
55
|
+
fields.empty?
|
56
|
+
end
|
57
|
+
|
44
58
|
def to_s
|
45
59
|
o = [code.to_s]
|
46
60
|
self.class.REQ_FIELDS.each_index do |i|
|
47
|
-
o << fields[i+2].to_s(false)
|
61
|
+
o << fields[i + 2].to_s(false)
|
48
62
|
end
|
49
|
-
fields.each do |k,v|
|
63
|
+
fields.each do |k, v|
|
50
64
|
next if k.is_a? Integer
|
51
65
|
o << "#{k}:#{v}"
|
52
66
|
end
|
53
67
|
o.join("\t")
|
54
68
|
end
|
55
|
-
|
69
|
+
|
56
70
|
def hash
|
57
|
-
{code => fields}.hash
|
71
|
+
{ code => fields }.hash
|
58
72
|
end
|
59
73
|
|
60
74
|
def eql?(rec)
|
@@ -64,27 +78,33 @@ class GFA::Record
|
|
64
78
|
alias == eql?
|
65
79
|
|
66
80
|
private
|
67
|
-
|
68
|
-
def add_field(f_tag, f_type, f_value, format=nil)
|
81
|
+
|
82
|
+
def add_field(f_tag, f_type, f_value, format = nil)
|
69
83
|
unless format.nil?
|
70
84
|
msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
|
71
85
|
GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
|
72
86
|
end
|
73
|
-
|
87
|
+
|
88
|
+
@fields[f_tag] = GFA::Field.code_class(f_type).new(f_value)
|
74
89
|
end
|
75
|
-
|
90
|
+
|
76
91
|
def add_opt_field(f, known)
|
77
|
-
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
78
|
-
|
92
|
+
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
93
|
+
raise "Cannot parse field: '#{f}'" unless m
|
94
|
+
|
79
95
|
f_tag = m[1].to_sym
|
80
96
|
f_type = m[2].to_sym
|
81
97
|
f_value = m[3]
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
98
|
+
|
99
|
+
if known[f_tag].nil? && f_tag =~ /^[A-Z]+$/
|
100
|
+
raise "Unknown reserved tag #{f_tag} for a #{type} record."
|
101
|
+
end
|
102
|
+
|
103
|
+
unless known[f_tag].nil? || known[f_tag] == f_type
|
104
|
+
raise "Wrong field type #{f_type} for a #{f_tag} tag," \
|
105
|
+
" expected #{known[f_tag]}"
|
106
|
+
end
|
107
|
+
|
87
108
|
add_field(f_tag, f_type, f_value)
|
88
109
|
end
|
89
|
-
|
90
110
|
end
|