gfa 0.1.2 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile +8 -2
- data/LICENSE +2 -2
- data/README.md +23 -15
- data/Rakefile +9 -8
- data/lib/gfa/common.rb +39 -42
- data/lib/gfa/field/char.rb +2 -2
- data/lib/gfa/field/float.rb +17 -1
- data/lib/gfa/field/hex.rb +17 -1
- data/lib/gfa/field/json.rb +18 -0
- data/lib/gfa/field/numarray.rb +38 -5
- data/lib/gfa/field/sigint.rb +14 -2
- data/lib/gfa/field/string.rb +11 -3
- data/lib/gfa/field.rb +89 -19
- data/lib/gfa/generator.rb +32 -28
- data/lib/gfa/graph.rb +14 -16
- data/lib/gfa/parser.rb +31 -22
- data/lib/gfa/record/comment.rb +15 -0
- data/lib/gfa/record/containment.rb +19 -13
- data/lib/gfa/record/header.rb +11 -10
- data/lib/gfa/record/jump.rb +45 -0
- data/lib/gfa/record/link.rb +42 -42
- data/lib/gfa/record/path.rb +14 -13
- data/lib/gfa/record/segment.rb +22 -18
- data/lib/gfa/record/walk.rb +20 -0
- data/lib/gfa/record.rb +53 -33
- data/lib/gfa/record_set/comment_set.rb +3 -0
- data/lib/gfa/record_set/containment_set.rb +4 -0
- data/lib/gfa/record_set/header_set.rb +3 -0
- data/lib/gfa/record_set/jump_set.rb +3 -0
- data/lib/gfa/record_set/link_set.rb +3 -0
- data/lib/gfa/record_set/path_set.rb +4 -0
- data/lib/gfa/record_set/segment_set.rb +4 -0
- data/lib/gfa/record_set/walk_set.rb +3 -0
- data/lib/gfa/record_set.rb +99 -0
- data/lib/gfa/version.rb +1 -1
- data/lib/gfa.rb +4 -4
- data/test/common_test.rb +5 -5
- data/test/field_test.rb +52 -26
- data/test/parser_test.rb +57 -20
- data/test/record_test.rb +7 -0
- data/test/test_helper.rb +10 -5
- metadata +33 -21
data/lib/gfa/graph.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'rgl/adjacency'
|
2
|
+
require 'rgl/implicit'
|
3
3
|
|
4
4
|
class GFA
|
5
|
-
|
6
5
|
##
|
7
6
|
# Generates a RGL::ImplicitGraph object describing the links in the GFA.
|
8
7
|
# The +opts+ argument is a hash with any of the following key-value pairs:
|
@@ -11,7 +10,7 @@ class GFA
|
|
11
10
|
# true.
|
12
11
|
# * :directed => bool. If false, ignores direction of the links. By defaut
|
13
12
|
# the same value as :orient.
|
14
|
-
def implicit_graph(opts={})
|
13
|
+
def implicit_graph(opts = {})
|
15
14
|
rgl_implicit_graph(opts)
|
16
15
|
end
|
17
16
|
|
@@ -19,7 +18,7 @@ class GFA
|
|
19
18
|
# Generates a RGL::DirectedAdjacencyGraph or RGL::AdjacencyGraph object.
|
20
19
|
# The +opts+ argument is a hash with the same supported key-value pairs as
|
21
20
|
# in #implicit_graph.
|
22
|
-
def adjacency_graph(opts={})
|
21
|
+
def adjacency_graph(opts = {})
|
23
22
|
implicit_graph(opts).to_adjacency
|
24
23
|
end
|
25
24
|
|
@@ -27,7 +26,7 @@ class GFA
|
|
27
26
|
|
28
27
|
def segment_names_with_orient
|
29
28
|
segments.flat_map do |s|
|
30
|
-
%w[+ -].map{ |orient| GFA::GraphVertex.idx(s, orient) }
|
29
|
+
%w[+ -].map { |orient| GFA::GraphVertex.idx(s, orient) }
|
31
30
|
end.to_set
|
32
31
|
end
|
33
32
|
|
@@ -44,8 +43,8 @@ class GFA
|
|
44
43
|
(opts[:orient] ? segment_names_with_orient :
|
45
44
|
segment_names).each(&b)
|
46
45
|
end
|
47
|
-
g.adjacent_iterator do |x,b|
|
48
|
-
rgl_implicit_adjacent_iterator(x,b,opts)
|
46
|
+
g.adjacent_iterator do |x, b|
|
47
|
+
rgl_implicit_adjacent_iterator(x, b, opts)
|
49
48
|
end
|
50
49
|
g.directed = opts[:directed]
|
51
50
|
end
|
@@ -57,25 +56,25 @@ class GFA
|
|
57
56
|
opts
|
58
57
|
end
|
59
58
|
|
60
|
-
def rgl_implicit_adjacent_iterator(x,b,opts)
|
59
|
+
def rgl_implicit_adjacent_iterator(x, b, opts)
|
61
60
|
links.each do |l|
|
62
61
|
if l.from?(x.segment, x.orient)
|
63
62
|
orient = opts[:orient] ? l.to_orient : nil
|
64
63
|
b.call(GFA::GraphVertex.idx(l.to, orient))
|
65
|
-
elsif opts[:orient]
|
64
|
+
elsif opts[:orient] && l.to?(x.segment, orient_rc(x.orient))
|
66
65
|
orient = orient_rc(l.from_orient.value)
|
67
66
|
b.call(GFA::GraphVertex.idx(l.from, orient))
|
68
67
|
end
|
69
68
|
end
|
70
69
|
end
|
71
70
|
|
72
|
-
def orient_rc(o)
|
73
|
-
|
71
|
+
def orient_rc(o)
|
72
|
+
o == '+' ? '-' : '+'
|
73
|
+
end
|
74
74
|
end
|
75
75
|
|
76
76
|
|
77
77
|
class GFA::GraphVertex # :nodoc:
|
78
|
-
|
79
78
|
# Class-level
|
80
79
|
@@idx = {}
|
81
80
|
def self.idx(segment, orient)
|
@@ -83,10 +82,10 @@ class GFA::GraphVertex # :nodoc:
|
|
83
82
|
@@idx[n.to_s] ||= n
|
84
83
|
@@idx[n.to_s]
|
85
84
|
end
|
86
|
-
|
85
|
+
|
87
86
|
# Instance-level
|
88
87
|
attr :segment, :orient
|
89
|
-
|
88
|
+
|
90
89
|
def initialize(segment, orient)
|
91
90
|
@segment = segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
92
91
|
segment.is_a?(GFA::Field) ? segment.value : segment
|
@@ -96,5 +95,4 @@ class GFA::GraphVertex # :nodoc:
|
|
96
95
|
def to_s
|
97
96
|
"#{segment}#{orient}"
|
98
97
|
end
|
99
|
-
|
100
98
|
end
|
data/lib/gfa/parser.rb
CHANGED
@@ -1,18 +1,23 @@
|
|
1
|
-
require
|
1
|
+
require 'gfa/record'
|
2
2
|
|
3
3
|
class GFA
|
4
4
|
# Class-level
|
5
|
-
MIN_VERSION =
|
6
|
-
MAX_VERSION =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
MIN_VERSION = '1.0'
|
6
|
+
MAX_VERSION = '1.2'
|
7
|
+
|
8
|
+
##
|
9
|
+
# Load a GFA object from a +gfa+ file with options +opts+:
|
10
|
+
# - index: If the records should be indexed as loaded (default: true)
|
11
|
+
# - comments: If the comment records should be saved (default: false)
|
12
|
+
def self.load(file, opts = {})
|
13
|
+
gfa = GFA.new(opts)
|
14
|
+
fh = File.open(file, 'r')
|
11
15
|
fh.each { |ln| gfa << ln }
|
12
|
-
fh.close
|
13
16
|
gfa
|
17
|
+
ensure
|
18
|
+
fh&.close
|
14
19
|
end
|
15
|
-
|
20
|
+
|
16
21
|
def self.supported_version?(v)
|
17
22
|
v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
|
18
23
|
end
|
@@ -20,26 +25,30 @@ class GFA
|
|
20
25
|
# Instance-level
|
21
26
|
def <<(obj)
|
22
27
|
obj = parse_line(obj) unless obj.is_a? GFA::Record
|
23
|
-
return if obj.nil?
|
28
|
+
return if obj.nil? || obj.empty?
|
24
29
|
@records[obj.type] << obj
|
25
|
-
|
26
|
-
|
30
|
+
|
31
|
+
if obj.type == :Header && !obj.VN.nil?
|
32
|
+
set_gfa_version(obj.VN.value)
|
27
33
|
end
|
28
34
|
end
|
29
35
|
|
30
36
|
def set_gfa_version(v)
|
37
|
+
v = v.value if v.is_a? GFA::Field
|
38
|
+
unless GFA::supported_version? v
|
39
|
+
raise "GFA version currently unsupported: #{v}"
|
40
|
+
end
|
41
|
+
|
31
42
|
@gfa_version = v
|
32
|
-
raise "GFA version currently unsupported: #{v}." unless
|
33
|
-
GFA::supported_version? gfa_version
|
34
43
|
end
|
35
|
-
|
44
|
+
|
36
45
|
private
|
37
|
-
|
38
|
-
def parse_line(ln)
|
39
|
-
ln.chomp!
|
40
|
-
return nil if ln =~ /^\s*$/
|
41
|
-
cols = ln.split("\t")
|
42
|
-
GFA::Record.code_class(cols.shift).new(*cols)
|
43
|
-
end
|
44
46
|
|
47
|
+
def parse_line(string)
|
48
|
+
string = string.chomp
|
49
|
+
return nil if string =~ /^\s*$/
|
50
|
+
return nil if !opts[:comments] && string[0] == '#'
|
51
|
+
|
52
|
+
GFA::Record[string]
|
53
|
+
end
|
45
54
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class GFA::Record::Comment < GFA::Record
|
2
|
+
CODE = :'#'
|
3
|
+
REQ_FIELDS = %i[comment]
|
4
|
+
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(comment, *opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
add_field(2, :Z, comment, /.*/)
|
13
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
14
|
+
end
|
15
|
+
end
|
@@ -1,24 +1,30 @@
|
|
1
1
|
class GFA::Record::Containment < GFA::Record
|
2
2
|
CODE = :C
|
3
|
-
REQ_FIELDS = [
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
|
4
4
|
OPT_FIELDS = {
|
5
|
-
:
|
6
|
-
:
|
5
|
+
RC: :i, # Read coverage
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
ID: :Z # Edge identifier
|
7
8
|
}
|
8
9
|
|
9
10
|
REQ_FIELDS.each_index do |i|
|
10
|
-
define_method(REQ_FIELDS[i]) { fields[i+2] }
|
11
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
11
12
|
end
|
12
|
-
|
13
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
14
|
+
|
15
|
+
alias container from
|
16
|
+
alias container_orient from_orient
|
17
|
+
alias contained to
|
18
|
+
alias contained_orient to_orient
|
19
|
+
|
13
20
|
def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
|
14
21
|
@fields = {}
|
15
|
-
add_field(2, :Z, from,
|
16
|
-
add_field(3, :Z, from_orient,
|
17
|
-
add_field(4, :Z, to,
|
18
|
-
add_field(5, :Z, to_orient,
|
19
|
-
add_field(6, :i, pos,
|
20
|
-
add_field(7, :Z, overlap,
|
21
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
22
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
23
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
24
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
25
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
26
|
+
add_field(6, :i, pos, /[0-9]*/)
|
27
|
+
add_field(7, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
28
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
22
29
|
end
|
23
|
-
|
24
30
|
end
|
data/lib/gfa/record/header.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
class GFA::Record::Header < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(*opt_fields)
|
9
|
-
@fields = {}
|
10
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
11
|
-
end
|
2
|
+
CODE = :H
|
3
|
+
REQ_FIELDS = []
|
4
|
+
OPT_FIELDS = {
|
5
|
+
VN: :Z # Version number
|
6
|
+
}
|
12
7
|
|
8
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
9
|
+
|
10
|
+
def initialize(*opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
13
|
+
end
|
13
14
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class GFA::Record::Jump < GFA::Record
|
2
|
+
CODE = :J
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient distance]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
|
6
|
+
}
|
7
|
+
|
8
|
+
REQ_FIELDS.each_index do |i|
|
9
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
10
|
+
end
|
11
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
12
|
+
|
13
|
+
def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
|
14
|
+
@fields = {}
|
15
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
16
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
17
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
18
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
19
|
+
add_field(6, :Z, distance, /\*|[-+]?[0-9]+/)
|
20
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
21
|
+
end
|
22
|
+
|
23
|
+
def from?(segment, orient = nil)
|
24
|
+
links_from_to?(segment, orient, true)
|
25
|
+
end
|
26
|
+
|
27
|
+
def to?(segment, orient = nil)
|
28
|
+
links_from_to?(segment, orient, false)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def links_from_to?(segment, orient, from)
|
34
|
+
segment = segment_name(segment)
|
35
|
+
orient = orient.value if orient.is_a? GFA::Field
|
36
|
+
base_k = from ? 2 : 4
|
37
|
+
segment==fields[base_k].value &&
|
38
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
39
|
+
end
|
40
|
+
|
41
|
+
def segment_name(segment)
|
42
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
43
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
44
|
+
end
|
45
|
+
end
|
data/lib/gfa/record/link.rb
CHANGED
@@ -1,50 +1,50 @@
|
|
1
1
|
class GFA::Record::Link < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
REQ_FIELDS.each_index do |i|
|
13
|
-
define_method(REQ_FIELDS[i]) { fields[i+2] }
|
14
|
-
end
|
2
|
+
CODE = :L
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient overlap]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
MQ: :i, # Mapping quality
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
EC: :i, # Read count
|
8
|
+
FC: :i, # Fragment count
|
9
|
+
KC: :i, # k-mer count
|
10
|
+
ID: :Z # Edge identifier
|
11
|
+
}
|
15
12
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
|
21
|
-
add_field(5, :Z, to_orient, /^+|-$/)
|
22
|
-
add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
23
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
24
|
-
end
|
13
|
+
REQ_FIELDS.each_index do |i|
|
14
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
15
|
+
end
|
16
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
25
17
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
18
|
+
def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
|
19
|
+
@fields = {}
|
20
|
+
add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
|
21
|
+
add_field(3, :Z, from_orient, /[+-]/)
|
22
|
+
add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
|
23
|
+
add_field(5, :Z, to_orient, /[+-]/)
|
24
|
+
add_field(6, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
|
25
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
26
|
+
end
|
34
27
|
|
35
|
-
|
28
|
+
def from?(segment, orient = nil)
|
29
|
+
links_from_to?(segment, orient, true)
|
30
|
+
end
|
36
31
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
base_k = from ? 2 : 4
|
41
|
-
segment==fields[base_k].value and
|
42
|
-
(orient.nil? or orient==fields[base_k + 1].value)
|
43
|
-
end
|
32
|
+
def to?(segment, orient = nil)
|
33
|
+
links_from_to?(segment, orient, false)
|
34
|
+
end
|
44
35
|
|
45
|
-
|
46
|
-
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
47
|
-
segment.is_a?(GFA::Field) ? segment.value : segment
|
48
|
-
end
|
36
|
+
private
|
49
37
|
|
38
|
+
def links_from_to?(segment, orient, from)
|
39
|
+
segment = segment_name(segment)
|
40
|
+
orient = orient.value if orient.is_a? GFA::Field
|
41
|
+
base_k = from ? 2 : 4
|
42
|
+
segment==fields[base_k].value &&
|
43
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
44
|
+
end
|
45
|
+
|
46
|
+
def segment_name(segment)
|
47
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
48
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
49
|
+
end
|
50
50
|
end
|
data/lib/gfa/record/path.rb
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
class GFA::Record::Path < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
CODE = :P
|
3
|
+
REQ_FIELDS = %i[path_name segment_name overlaps]
|
4
|
+
OPT_FIELDS = {}
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
@fields = {}
|
12
|
-
add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
|
13
|
-
add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
|
14
|
-
add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
15
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
16
|
-
end
|
10
|
+
alias cigar overlaps
|
17
11
|
|
12
|
+
def initialize(path_name, segment_name, overlaps, *opt_fields)
|
13
|
+
@fields = {}
|
14
|
+
add_field(2, :Z, path_name, /[!-)+-<>-~][!-~]*/)
|
15
|
+
add_field(3, :Z, segment_name, /[!-)+-<>-~][!-~]*/)
|
16
|
+
add_field(4, :Z, overlaps, /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/)
|
17
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
18
|
+
end
|
18
19
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -1,22 +1,26 @@
|
|
1
1
|
class GFA::Record::Segment < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
CODE = :S
|
3
|
+
REQ_FIELDS = %i[name sequence]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
LN: :i, # Segment length
|
6
|
+
RC: :i, # Read count
|
7
|
+
FC: :i, # Fragment count
|
8
|
+
KC: :i, # k-mer count
|
9
|
+
SH: :H, # SHA-256 checksum of the sequence
|
10
|
+
UR: :Z, # URI or local file-system path of the sequence
|
11
|
+
# Non-cannonical
|
12
|
+
DP: :f # (From SAM)
|
13
|
+
}
|
10
14
|
|
11
|
-
|
12
|
-
|
13
|
-
|
15
|
+
REQ_FIELDS.each_index do |i|
|
16
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
17
|
+
end
|
18
|
+
OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
|
14
19
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
def initialize(name, sequence, *opt_fields)
|
21
|
+
@fields = {}
|
22
|
+
add_field(2, :Z, name, /[!-)+-<>-~][!-~]*/)
|
23
|
+
add_field(3, :Z, sequence, /\*|[A-Za-z=.]+/)
|
24
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
25
|
+
end
|
22
26
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class GFA::Record::Walk < GFA::Record
|
2
|
+
CODE = :W
|
3
|
+
REQ_FIELDS = %i[sample_id hap_index seq_id seq_start seq_end walk]
|
4
|
+
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
add_field(2, :Z, sample_id, /[!-)+-<>-~][!-~]*/)
|
13
|
+
add_field(3, :i, hap_index, /[0-9]+/)
|
14
|
+
add_field(4, :Z, seq_id, /[!-)+-<>-~][!-~]*/)
|
15
|
+
add_field(5, :i, seq_start, /\*|[0-9]+/)
|
16
|
+
add_field(6, :i, seq_end, /\*|[0-9]+/)
|
17
|
+
add_field(7, :Z, walk, /([><][!-;=?-~]+)+/)
|
18
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
19
|
+
end
|
20
|
+
end
|
data/lib/gfa/record.rb
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
class GFA::Record
|
2
|
-
|
3
2
|
# Class-level
|
4
|
-
|
5
3
|
CODES = {
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
4
|
+
:'#' => :Comment,
|
5
|
+
H: :Header,
|
6
|
+
S: :Segment,
|
7
|
+
L: :Link,
|
8
|
+
J: :Jump, # Since 1.2
|
9
|
+
C: :Containment,
|
10
|
+
P: :Path,
|
11
|
+
W: :Walk # Since 1.1
|
11
12
|
}
|
12
13
|
REQ_FIELDS = []
|
13
14
|
OPT_FIELDS = {}
|
14
15
|
TYPES = CODES.values
|
15
|
-
|
16
16
|
TYPES.each { |t| require "gfa/record/#{t.downcase}" }
|
17
17
|
|
18
|
-
[
|
18
|
+
%i[CODES REQ_FIELDS OPT_FIELDS TYPES].each do |x|
|
19
19
|
define_singleton_method(x) { const_get(x) }
|
20
20
|
end
|
21
21
|
|
@@ -29,32 +29,46 @@ class GFA::Record
|
|
29
29
|
const_get(name)
|
30
30
|
end
|
31
31
|
|
32
|
+
def self.[](string)
|
33
|
+
split = string[0] == '#' ? ['', 2] : ["\t", 0]
|
34
|
+
code, *values = string.chomp.split(*split)
|
35
|
+
code_class(code).new(*values)
|
36
|
+
end
|
37
|
+
|
32
38
|
# Instance-level
|
33
39
|
|
34
40
|
attr :fields
|
35
41
|
|
36
|
-
def [](k)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
def
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
def [](k)
|
43
|
+
fields[k]
|
44
|
+
end
|
45
|
+
|
46
|
+
def type
|
47
|
+
CODES[code]
|
48
|
+
end
|
49
|
+
|
50
|
+
def code
|
51
|
+
self.class.const_get(:CODE)
|
52
|
+
end
|
53
|
+
|
54
|
+
def empty?
|
55
|
+
fields.empty?
|
56
|
+
end
|
57
|
+
|
44
58
|
def to_s
|
45
59
|
o = [code.to_s]
|
46
60
|
self.class.REQ_FIELDS.each_index do |i|
|
47
|
-
o << fields[i+2].to_s(false)
|
61
|
+
o << fields[i + 2].to_s(false)
|
48
62
|
end
|
49
|
-
fields.each do |k,v|
|
63
|
+
fields.each do |k, v|
|
50
64
|
next if k.is_a? Integer
|
51
65
|
o << "#{k}:#{v}"
|
52
66
|
end
|
53
67
|
o.join("\t")
|
54
68
|
end
|
55
|
-
|
69
|
+
|
56
70
|
def hash
|
57
|
-
{code => fields}.hash
|
71
|
+
{ code => fields }.hash
|
58
72
|
end
|
59
73
|
|
60
74
|
def eql?(rec)
|
@@ -64,27 +78,33 @@ class GFA::Record
|
|
64
78
|
alias == eql?
|
65
79
|
|
66
80
|
private
|
67
|
-
|
68
|
-
def add_field(f_tag, f_type, f_value, format=nil)
|
81
|
+
|
82
|
+
def add_field(f_tag, f_type, f_value, format = nil)
|
69
83
|
unless format.nil?
|
70
84
|
msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
|
71
85
|
GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
|
72
86
|
end
|
73
|
-
|
87
|
+
|
88
|
+
@fields[f_tag] = GFA::Field.code_class(f_type).new(f_value)
|
74
89
|
end
|
75
|
-
|
90
|
+
|
76
91
|
def add_opt_field(f, known)
|
77
|
-
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
78
|
-
|
92
|
+
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
93
|
+
raise "Cannot parse field: '#{f}'" unless m
|
94
|
+
|
79
95
|
f_tag = m[1].to_sym
|
80
96
|
f_type = m[2].to_sym
|
81
97
|
f_value = m[3]
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
98
|
+
|
99
|
+
if known[f_tag].nil? && f_tag =~ /^[A-Z]+$/
|
100
|
+
raise "Unknown reserved tag #{f_tag} for a #{type} record."
|
101
|
+
end
|
102
|
+
|
103
|
+
unless known[f_tag].nil? || known[f_tag] == f_type
|
104
|
+
raise "Wrong field type #{f_type} for a #{f_tag} tag," \
|
105
|
+
" expected #{known[f_tag]}"
|
106
|
+
end
|
107
|
+
|
87
108
|
add_field(f_tag, f_type, f_value)
|
88
109
|
end
|
89
|
-
|
90
110
|
end
|