gfa 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +8 -2
- data/LICENSE +2 -2
- data/README.md +17 -9
- data/Rakefile +9 -8
- data/lib/gfa/common.rb +36 -42
- data/lib/gfa/field/char.rb +0 -1
- data/lib/gfa/field/float.rb +0 -1
- data/lib/gfa/field/hex.rb +0 -1
- data/lib/gfa/field/json.rb +9 -0
- data/lib/gfa/field/numarray.rb +9 -1
- data/lib/gfa/field/sigint.rb +1 -2
- data/lib/gfa/field/string.rb +1 -2
- data/lib/gfa/field.rb +7 -9
- data/lib/gfa/generator.rb +32 -28
- data/lib/gfa/graph.rb +13 -14
- data/lib/gfa/parser.rb +10 -9
- data/lib/gfa/record/comment.rb +10 -0
- data/lib/gfa/record/containment.rb +11 -6
- data/lib/gfa/record/header.rb +9 -10
- data/lib/gfa/record/jump.rb +45 -0
- data/lib/gfa/record/link.rb +40 -41
- data/lib/gfa/record/path.rb +14 -13
- data/lib/gfa/record/segment.rb +21 -18
- data/lib/gfa/record/walk.rb +20 -0
- data/lib/gfa/record.rb +26 -20
- data/lib/gfa/version.rb +1 -1
- data/lib/gfa.rb +4 -4
- data/test/parser_test.rb +8 -8
- data/test/test_helper.rb +5 -5
- metadata +27 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cde4a3e432409c7a2967832dcebd502ddc54b1f6cb25856b6d9d21ce53f67b32
|
4
|
+
data.tar.gz: 91213d63365dd3608c28e30115cbfc8621e78bbe34936832bcee2ac7e6f460fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 321634c28ec8927bd38286a84a02783b9f915dcbabb7941233583dda7f41b66e952ff9611c9158bd7baca09d7d3d6c254a036f1c9f2169e5e24e6e964d292e71
|
7
|
+
data.tar.gz: 3698d16ab5953ffd70bf2c102d154bc1f61e5a13a752bc317a756df13762c3668c1bf6e8144821e53e395783f36c45185249c7fc206694be19145200310c3f48
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -13,20 +13,20 @@ This implementation follows the specifications of [GFA-spec][].
|
|
13
13
|
To parse a file in GFA format:
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
require
|
16
|
+
require 'gfa'
|
17
17
|
|
18
|
-
my_gfa = GFA.load(
|
18
|
+
my_gfa = GFA.load('assembly.gfa')
|
19
19
|
```
|
20
20
|
|
21
21
|
To load GFA strings line-by-line:
|
22
22
|
|
23
23
|
```ruby
|
24
|
-
require
|
24
|
+
require 'gfa'
|
25
25
|
|
26
26
|
my_gfa = GFA.new
|
27
|
-
fh = File.open(
|
27
|
+
fh = File.open('assembly.gfa', 'r')
|
28
28
|
fh.each do |ln|
|
29
|
-
|
29
|
+
my_gfa << ln
|
30
30
|
end
|
31
31
|
fh.close
|
32
32
|
```
|
@@ -37,15 +37,15 @@ fh.close
|
|
37
37
|
After altering a GFA object, you can simply save it in a file as:
|
38
38
|
|
39
39
|
```ruby
|
40
|
-
my_gfa.save(
|
40
|
+
my_gfa.save('alt-assembly.gfa')
|
41
41
|
```
|
42
42
|
|
43
43
|
Or line-by-line as:
|
44
44
|
|
45
45
|
```ruby
|
46
|
-
fh = File.open(
|
46
|
+
fh = File.open('alt-assembly.gfa', 'w')
|
47
47
|
my_gfa.each_line do |ln|
|
48
|
-
|
48
|
+
fh.puts ln
|
49
49
|
end
|
50
50
|
fh.close
|
51
51
|
```
|
@@ -85,6 +85,14 @@ ug.write_to_graphic_file("jpg")
|
|
85
85
|
gem install gfa
|
86
86
|
```
|
87
87
|
|
88
|
+
Or add the following line to your Gemfile:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
gem "gfa"
|
92
|
+
```
|
93
|
+
|
94
|
+
and run `bundle install` from your shell.
|
95
|
+
|
88
96
|
|
89
97
|
# Author
|
90
98
|
|
@@ -96,5 +104,5 @@ gem install gfa
|
|
96
104
|
[Artistic License 2.0](LICENSE).
|
97
105
|
|
98
106
|
[GFA-spec]: https://github.com/pmelsted/GFA-spec
|
99
|
-
[lrr]:
|
107
|
+
[lrr]: https://rodriguez-r.com/
|
100
108
|
[rgl]: https://github.com/monora/rgl
|
data/Rakefile
CHANGED
@@ -1,16 +1,17 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
2
3
|
|
3
|
-
$:.unshift File.join(File.dirname(__FILE__),
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
5
|
|
5
|
-
require
|
6
|
+
require 'gfa/version'
|
6
7
|
|
7
|
-
SOURCES = FileList[
|
8
|
+
SOURCES = FileList['lib/**/*.rb']
|
8
9
|
|
9
|
-
desc
|
10
|
+
desc 'Default Task'
|
10
11
|
task :default => :test
|
11
12
|
|
12
13
|
Rake::TestTask.new do |t|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
t.libs << 'test'
|
15
|
+
t.pattern = 'test/*_test.rb'
|
16
|
+
t.verbose = true
|
16
17
|
end
|
data/lib/gfa/common.rb
CHANGED
@@ -1,45 +1,39 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'gfa/version'
|
2
|
+
require 'gfa/record'
|
3
|
+
require 'gfa/field'
|
4
4
|
|
5
5
|
class GFA
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
def eql?(gfa)
|
40
|
-
records == gfa.records
|
41
|
-
end
|
42
|
-
|
43
|
-
alias == eql?
|
44
|
-
|
6
|
+
# Class-level
|
7
|
+
def self.assert_format(value, regex, message)
|
8
|
+
unless value =~ regex
|
9
|
+
raise "#{message}: #{value}."
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Instance-level
|
14
|
+
attr :gfa_version, :records
|
15
|
+
|
16
|
+
GFA::Record.TYPES.each do |r_type|
|
17
|
+
plural = "#{r_type.downcase}s"
|
18
|
+
singular = "#{r_type.downcase}"
|
19
|
+
|
20
|
+
define_method(plural) { records[r_type] }
|
21
|
+
define_method(singular) { |k| records[r_type][k] }
|
22
|
+
define_method("add_#{singular}") { |v| @records[r_type] << v }
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
@records = {}
|
27
|
+
GFA::Record.TYPES.each { |t| @records[t] = [] }
|
28
|
+
end
|
29
|
+
|
30
|
+
def empty?
|
31
|
+
records.empty? || records.values.all?(&:empty?)
|
32
|
+
end
|
33
|
+
|
34
|
+
def eql?(gfa)
|
35
|
+
records == gfa.records
|
36
|
+
end
|
37
|
+
|
38
|
+
alias == eql?
|
45
39
|
end
|
data/lib/gfa/field/char.rb
CHANGED
data/lib/gfa/field/float.rb
CHANGED
data/lib/gfa/field/hex.rb
CHANGED
data/lib/gfa/field/numarray.rb
CHANGED
@@ -10,7 +10,15 @@ class GFA::Field::NumArray < GFA::Field
|
|
10
10
|
def modifier ; value[0] ; end
|
11
11
|
|
12
12
|
def array ; value[2..-1].split(/,/) ; end
|
13
|
-
|
13
|
+
|
14
14
|
alias as_a array
|
15
15
|
|
16
|
+
def number_type
|
17
|
+
{
|
18
|
+
c: 'int8_t', C: 'uint8_t',
|
19
|
+
s: 'int16_t', S: 'uint16_t',
|
20
|
+
i: 'int32_t', I: 'uint32_t',
|
21
|
+
f: 'float'
|
22
|
+
}[modifier.to_sym]
|
23
|
+
end
|
16
24
|
end
|
data/lib/gfa/field/sigint.rb
CHANGED
data/lib/gfa/field/string.rb
CHANGED
data/lib/gfa/field.rb
CHANGED
@@ -1,17 +1,15 @@
|
|
1
1
|
class GFA::Field
|
2
|
-
|
3
2
|
# Class-level
|
4
|
-
|
5
3
|
CODES = {
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
4
|
+
A: :Char,
|
5
|
+
i: :SigInt,
|
6
|
+
f: :Float,
|
7
|
+
Z: :String,
|
8
|
+
J: :Json, # Excluding new-line and tab characters
|
9
|
+
H: :Hex,
|
10
|
+
B: :NumArray
|
12
11
|
}
|
13
12
|
TYPES = CODES.values
|
14
|
-
|
15
13
|
TYPES.each { |t| require "gfa/field/#{t.downcase}" }
|
16
14
|
|
17
15
|
[:CODES, :TYPES].each do |x|
|
data/lib/gfa/generator.rb
CHANGED
@@ -1,31 +1,35 @@
|
|
1
1
|
class GFA
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
def save(file)
|
3
|
+
fh = File.open(file, 'w')
|
4
|
+
each_line do |ln|
|
5
|
+
fh.puts ln
|
6
|
+
end
|
7
|
+
fh.close
|
8
|
+
end
|
9
|
+
|
10
|
+
def each_line(&blk)
|
11
|
+
set_version_header('1.1') if gfa_version.nil?
|
12
|
+
GFA::Record.TYPES.each do |r_type|
|
13
|
+
records[r_type].each do |record|
|
14
|
+
blk[record.to_s]
|
6
15
|
end
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def to_s
|
27
|
-
o = ""
|
28
|
-
each_line{ |ln| o += ln + "\n" }
|
29
|
-
o
|
30
|
-
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def set_version_header(v)
|
20
|
+
unset_version
|
21
|
+
@records[:Header] << GFA::Record::Header.new("VN:Z:#{v}")
|
22
|
+
@gfa_version = v
|
23
|
+
end
|
24
|
+
|
25
|
+
def unset_version
|
26
|
+
@records[:Header].delete_if { |o| !o.fields[:VN].nil? }
|
27
|
+
@gfa_version = nil
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
o = ''
|
32
|
+
each_line { |ln| o += ln + "\n" }
|
33
|
+
o
|
34
|
+
end
|
31
35
|
end
|
data/lib/gfa/graph.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'rgl/adjacency'
|
2
|
+
require 'rgl/implicit'
|
3
3
|
|
4
4
|
class GFA
|
5
5
|
|
@@ -11,7 +11,7 @@ class GFA
|
|
11
11
|
# true.
|
12
12
|
# * :directed => bool. If false, ignores direction of the links. By defaut
|
13
13
|
# the same value as :orient.
|
14
|
-
def implicit_graph(opts={})
|
14
|
+
def implicit_graph(opts = {})
|
15
15
|
rgl_implicit_graph(opts)
|
16
16
|
end
|
17
17
|
|
@@ -19,7 +19,7 @@ class GFA
|
|
19
19
|
# Generates a RGL::DirectedAdjacencyGraph or RGL::AdjacencyGraph object.
|
20
20
|
# The +opts+ argument is a hash with the same supported key-value pairs as
|
21
21
|
# in #implicit_graph.
|
22
|
-
def adjacency_graph(opts={})
|
22
|
+
def adjacency_graph(opts = {})
|
23
23
|
implicit_graph(opts).to_adjacency
|
24
24
|
end
|
25
25
|
|
@@ -27,7 +27,7 @@ class GFA
|
|
27
27
|
|
28
28
|
def segment_names_with_orient
|
29
29
|
segments.flat_map do |s|
|
30
|
-
%w[+ -].map{ |orient| GFA::GraphVertex.idx(s, orient) }
|
30
|
+
%w[+ -].map { |orient| GFA::GraphVertex.idx(s, orient) }
|
31
31
|
end.to_set
|
32
32
|
end
|
33
33
|
|
@@ -44,8 +44,8 @@ class GFA
|
|
44
44
|
(opts[:orient] ? segment_names_with_orient :
|
45
45
|
segment_names).each(&b)
|
46
46
|
end
|
47
|
-
g.adjacent_iterator do |x,b|
|
48
|
-
rgl_implicit_adjacent_iterator(x,b,opts)
|
47
|
+
g.adjacent_iterator do |x, b|
|
48
|
+
rgl_implicit_adjacent_iterator(x, b, opts)
|
49
49
|
end
|
50
50
|
g.directed = opts[:directed]
|
51
51
|
end
|
@@ -62,20 +62,20 @@ class GFA
|
|
62
62
|
if l.from?(x.segment, x.orient)
|
63
63
|
orient = opts[:orient] ? l.to_orient : nil
|
64
64
|
b.call(GFA::GraphVertex.idx(l.to, orient))
|
65
|
-
elsif opts[:orient]
|
65
|
+
elsif opts[:orient] && l.to?(x.segment, orient_rc(x.orient))
|
66
66
|
orient = orient_rc(l.from_orient.value)
|
67
67
|
b.call(GFA::GraphVertex.idx(l.from, orient))
|
68
68
|
end
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
def orient_rc(o)
|
73
|
-
|
72
|
+
def orient_rc(o)
|
73
|
+
o == '+' ? '-' : '+'
|
74
|
+
end
|
74
75
|
end
|
75
76
|
|
76
77
|
|
77
78
|
class GFA::GraphVertex # :nodoc:
|
78
|
-
|
79
79
|
# Class-level
|
80
80
|
@@idx = {}
|
81
81
|
def self.idx(segment, orient)
|
@@ -83,10 +83,10 @@ class GFA::GraphVertex # :nodoc:
|
|
83
83
|
@@idx[n.to_s] ||= n
|
84
84
|
@@idx[n.to_s]
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
# Instance-level
|
88
88
|
attr :segment, :orient
|
89
|
-
|
89
|
+
|
90
90
|
def initialize(segment, orient)
|
91
91
|
@segment = segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
92
92
|
segment.is_a?(GFA::Field) ? segment.value : segment
|
@@ -96,5 +96,4 @@ class GFA::GraphVertex # :nodoc:
|
|
96
96
|
def to_s
|
97
97
|
"#{segment}#{orient}"
|
98
98
|
end
|
99
|
-
|
100
99
|
end
|
data/lib/gfa/parser.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
require
|
1
|
+
require 'gfa/record'
|
2
2
|
|
3
3
|
class GFA
|
4
4
|
# Class-level
|
5
|
-
MIN_VERSION =
|
6
|
-
MAX_VERSION =
|
5
|
+
MIN_VERSION = '1.0'
|
6
|
+
MAX_VERSION = '1.2'
|
7
7
|
|
8
8
|
def self.load(file)
|
9
9
|
gfa = GFA.new
|
10
|
-
fh = File.open(file,
|
10
|
+
fh = File.open(file, 'r')
|
11
11
|
fh.each { |ln| gfa << ln }
|
12
12
|
fh.close
|
13
13
|
gfa
|
@@ -20,17 +20,19 @@ class GFA
|
|
20
20
|
# Instance-level
|
21
21
|
def <<(obj)
|
22
22
|
obj = parse_line(obj) unless obj.is_a? GFA::Record
|
23
|
-
return if obj.nil?
|
23
|
+
return if obj.nil? || obj.empty?
|
24
24
|
@records[obj.type] << obj
|
25
|
-
|
25
|
+
|
26
|
+
if obj.type == :Header && !obj.fields[:VN].nil?
|
26
27
|
set_gfa_version(obj.fields[:VN].value)
|
27
28
|
end
|
28
29
|
end
|
29
30
|
|
30
31
|
def set_gfa_version(v)
|
31
32
|
@gfa_version = v
|
32
|
-
|
33
|
-
GFA
|
33
|
+
unless GFA::supported_version? gfa_version
|
34
|
+
raise "GFA version currently unsupported: #{v}."
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
38
|
private
|
@@ -41,5 +43,4 @@ class GFA
|
|
41
43
|
cols = ln.split("\t")
|
42
44
|
GFA::Record.code_class(cols.shift).new(*cols)
|
43
45
|
end
|
44
|
-
|
45
46
|
end
|
@@ -1,14 +1,20 @@
|
|
1
1
|
class GFA::Record::Containment < GFA::Record
|
2
2
|
CODE = :C
|
3
|
-
REQ_FIELDS = [
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
|
4
4
|
OPT_FIELDS = {
|
5
|
-
:
|
6
|
-
:
|
5
|
+
RC: :i, # Read coverage
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
ID: :Z # Edge identifier
|
7
8
|
}
|
8
9
|
|
9
10
|
REQ_FIELDS.each_index do |i|
|
10
|
-
define_method(REQ_FIELDS[i]) { fields[i+2] }
|
11
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
11
12
|
end
|
13
|
+
|
14
|
+
alias container from
|
15
|
+
alias container_orient from_orient
|
16
|
+
alias contained to
|
17
|
+
alias contained_orient to_orient
|
12
18
|
|
13
19
|
def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
|
14
20
|
@fields = {}
|
@@ -18,7 +24,6 @@ class GFA::Record::Containment < GFA::Record
|
|
18
24
|
add_field(5, :Z, to_orient, /^+|-$/)
|
19
25
|
add_field(6, :i, pos, /^[0-9]*$/)
|
20
26
|
add_field(7, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
21
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
27
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
22
28
|
end
|
23
|
-
|
24
29
|
end
|
data/lib/gfa/record/header.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
class GFA::Record::Header < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
CODE = :H
|
3
|
+
REQ_FIELDS = []
|
4
|
+
OPT_FIELDS = {
|
5
|
+
VN: :Z # Version number
|
6
|
+
}
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
def initialize(*opt_fields)
|
9
|
+
@fields = {}
|
10
|
+
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
11
|
+
end
|
13
12
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class GFA::Record::Jump < GFA::Record
|
2
|
+
CODE = :J
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient distance]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
|
6
|
+
}
|
7
|
+
|
8
|
+
REQ_FIELDS.each_index do |i|
|
9
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
|
13
|
+
@fields = {}
|
14
|
+
add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
|
15
|
+
add_field(3, :Z, from_orient, /^+|-$/)
|
16
|
+
add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
|
17
|
+
add_field(5, :Z, to_orient, /^+|-$/)
|
18
|
+
add_field(6, :Z, distance, /^\*|[-+]?[0-9]+$/)
|
19
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def from?(segment, orient = nil)
|
24
|
+
links_from_to?(segment, orient, true)
|
25
|
+
end
|
26
|
+
|
27
|
+
def to?(segment, orient = nil)
|
28
|
+
links_from_to?(segment, orient, false)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def links_from_to?(segment, orient, from)
|
34
|
+
segment = segment_name(segment)
|
35
|
+
orient = orient.value if orient.is_a? GFA::Field
|
36
|
+
base_k = from ? 2 : 4
|
37
|
+
segment==fields[base_k].value &&
|
38
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
39
|
+
end
|
40
|
+
|
41
|
+
def segment_name(segment)
|
42
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
43
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
44
|
+
end
|
45
|
+
end
|
data/lib/gfa/record/link.rb
CHANGED
@@ -1,50 +1,49 @@
|
|
1
1
|
class GFA::Record::Link < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
CODE = :L
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient overlap]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
MQ: :i, # Mapping quality
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
EC: :i, # Read count
|
8
|
+
FC: :i, # Fragment count
|
9
|
+
KC: :i, # k-mer count
|
10
|
+
ID: :Z # Edge identifier
|
11
|
+
}
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
REQ_FIELDS.each_index do |i|
|
14
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
15
|
+
end
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
17
|
+
def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
|
18
|
+
@fields = {}
|
19
|
+
add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
|
20
|
+
add_field(3, :Z, from_orient, /^+|-$/)
|
21
|
+
add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
|
22
|
+
add_field(5, :Z, to_orient, /^+|-$/)
|
23
|
+
add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
24
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
25
|
+
end
|
25
26
|
|
27
|
+
def from?(segment, orient = nil)
|
28
|
+
links_from_to?(segment, orient, true)
|
29
|
+
end
|
26
30
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
def to?(segment, orient=nil)
|
32
|
-
links_from_to?(segment, orient, false)
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
31
|
+
def to?(segment, orient = nil)
|
32
|
+
links_from_to?(segment, orient, false)
|
33
|
+
end
|
36
34
|
|
37
|
-
|
38
|
-
segment = segment_name(segment)
|
39
|
-
orient = orient.value if orient.is_a? GFA::Field
|
40
|
-
base_k = from ? 2 : 4
|
41
|
-
segment==fields[base_k].value and
|
42
|
-
(orient.nil? or orient==fields[base_k + 1].value)
|
43
|
-
end
|
35
|
+
private
|
44
36
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
37
|
+
def links_from_to?(segment, orient, from)
|
38
|
+
segment = segment_name(segment)
|
39
|
+
orient = orient.value if orient.is_a? GFA::Field
|
40
|
+
base_k = from ? 2 : 4
|
41
|
+
segment==fields[base_k].value &&
|
42
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
43
|
+
end
|
49
44
|
|
45
|
+
def segment_name(segment)
|
46
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
47
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
48
|
+
end
|
50
49
|
end
|
data/lib/gfa/record/path.rb
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
class GFA::Record::Path < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
CODE = :P
|
3
|
+
REQ_FIELDS = %i[path_name segment_name cigar]
|
4
|
+
OPT_FIELDS = {}
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
@fields = {}
|
12
|
-
add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
|
13
|
-
add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
|
14
|
-
add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
15
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
16
|
-
end
|
10
|
+
alias overlaps cigar
|
17
11
|
|
12
|
+
def initialize(path_name, segment_name, cigar, *opt_fields)
|
13
|
+
@fields = {}
|
14
|
+
add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
|
15
|
+
add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
|
16
|
+
add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
17
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
18
|
+
end
|
18
19
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -1,22 +1,25 @@
|
|
1
1
|
class GFA::Record::Segment < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
CODE = :S
|
3
|
+
REQ_FIELDS = %i[name sequence]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
LN: :i, # Segment length
|
6
|
+
RC: :i, # Read count
|
7
|
+
FC: :i, # Fragment count
|
8
|
+
KC: :i, # k-mer count
|
9
|
+
SH: :H, # SHA-256 checksum of the sequence
|
10
|
+
UR: :Z, # URI or local file-system path of the sequence
|
11
|
+
# Non-cannonical
|
12
|
+
DP: :f # (From SAM)
|
13
|
+
}
|
10
14
|
|
11
|
-
|
12
|
-
|
13
|
-
|
15
|
+
REQ_FIELDS.each_index do |i|
|
16
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
17
|
+
end
|
14
18
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
def initialize(name, sequence, *opt_fields)
|
20
|
+
@fields = {}
|
21
|
+
add_field(2, :Z, name, /^[!-)+-<>-~][!-~]*$/)
|
22
|
+
add_field(3, :Z, sequence, /^\*|[A-Za-z=.]+$/)
|
23
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
24
|
+
end
|
22
25
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class GFA::Record::Walk < GFA::Record
|
2
|
+
CODE = :W
|
3
|
+
REQ_FIELDS = %i[sample_id hap_index seq_id seq_start seq_end walk]
|
4
|
+
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
add_field(2, :Z, sample_id, /^[!-)+-<>-~][!-~]*$/)
|
13
|
+
add_field(3, :i, hap_index, /^[0-9]+$/)
|
14
|
+
add_field(4, :Z, seq_id, /^[!-)+-<>-~][!-~]*$/)
|
15
|
+
add_field(5, :i, seq_start, /^\*|[0-9]+$/)
|
16
|
+
add_field(6, :i, seq_end, /^\*|[0-9]+$/)
|
17
|
+
add_field(7, :Z, walk, /^([><][!-;=?-~]+)+$/)
|
18
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
19
|
+
end
|
20
|
+
end
|
data/lib/gfa/record.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
class GFA::Record
|
2
|
-
|
3
2
|
# Class-level
|
4
|
-
|
5
3
|
CODES = {
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
4
|
+
:'#' => :Comment,
|
5
|
+
H: :Header,
|
6
|
+
S: :Segment,
|
7
|
+
L: :Link,
|
8
|
+
J: :Jump, # Since 1.2
|
9
|
+
C: :Containment,
|
10
|
+
P: :Path,
|
11
|
+
W: :Walk # Since 1.1
|
11
12
|
}
|
12
13
|
REQ_FIELDS = []
|
13
14
|
OPT_FIELDS = {}
|
14
15
|
TYPES = CODES.values
|
15
|
-
|
16
16
|
TYPES.each { |t| require "gfa/record/#{t.downcase}" }
|
17
17
|
|
18
18
|
[:CODES, :REQ_FIELDS, :OPT_FIELDS, :TYPES].each do |x|
|
@@ -44,9 +44,9 @@ class GFA::Record
|
|
44
44
|
def to_s
|
45
45
|
o = [code.to_s]
|
46
46
|
self.class.REQ_FIELDS.each_index do |i|
|
47
|
-
o << fields[i+2].to_s(false)
|
47
|
+
o << fields[i + 2].to_s(false)
|
48
48
|
end
|
49
|
-
fields.each do |k,v|
|
49
|
+
fields.each do |k, v|
|
50
50
|
next if k.is_a? Integer
|
51
51
|
o << "#{k}:#{v}"
|
52
52
|
end
|
@@ -54,7 +54,7 @@ class GFA::Record
|
|
54
54
|
end
|
55
55
|
|
56
56
|
def hash
|
57
|
-
{code => fields}.hash
|
57
|
+
{ code => fields }.hash
|
58
58
|
end
|
59
59
|
|
60
60
|
def eql?(rec)
|
@@ -65,26 +65,32 @@ class GFA::Record
|
|
65
65
|
|
66
66
|
private
|
67
67
|
|
68
|
-
def add_field(f_tag, f_type, f_value, format=nil)
|
68
|
+
def add_field(f_tag, f_type, f_value, format = nil)
|
69
69
|
unless format.nil?
|
70
70
|
msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
|
71
71
|
GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
|
72
72
|
end
|
73
|
+
|
73
74
|
@fields[ f_tag ] = GFA::Field.code_class(f_type).new(f_value)
|
74
75
|
end
|
75
76
|
|
76
77
|
def add_opt_field(f, known)
|
77
|
-
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
78
|
-
|
78
|
+
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
79
|
+
raise "Cannot parse field: '#{f}'." unless m
|
80
|
+
|
79
81
|
f_tag = m[1].to_sym
|
80
82
|
f_type = m[2].to_sym
|
81
83
|
f_value = m[3]
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
84
|
+
|
85
|
+
if known[f_tag].nil? && f_tag =~ /^[A-Z]+$/
|
86
|
+
raise "Unknown reserved tag #{f_tag} for a #{type} record."
|
87
|
+
end
|
88
|
+
|
89
|
+
unless known[f_tag].nil? || known[f_tag] == f_type
|
90
|
+
raise "Wrong field type #{f_type} for a #{f_tag} tag," \
|
91
|
+
" expected #{known[f_tag]}"
|
92
|
+
end
|
93
|
+
|
87
94
|
add_field(f_tag, f_type, f_value)
|
88
95
|
end
|
89
|
-
|
90
96
|
end
|
data/lib/gfa/version.rb
CHANGED
data/lib/gfa.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require 'gfa/common'
|
2
|
+
require 'gfa/parser'
|
3
|
+
require 'gfa/generator'
|
4
|
+
require 'gfa/graph'
|
data/test/parser_test.rb
CHANGED
@@ -4,7 +4,7 @@ require "gfa/parser"
|
|
4
4
|
class ParserTest < Test::Unit::TestCase
|
5
5
|
|
6
6
|
def test_load
|
7
|
-
sample_f = File.expand_path(
|
7
|
+
sample_f = File.expand_path('../fixtures/sample.gfa', __FILE__)
|
8
8
|
assert_respond_to(GFA, :load)
|
9
9
|
pre_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
|
10
10
|
sample = GFA.load(sample_f)
|
@@ -20,18 +20,18 @@ class ParserTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
def test_version_suppport
|
22
22
|
gfa = GFA.new
|
23
|
-
assert_raise { gfa.set_gfa_version(
|
24
|
-
assert_raise { gfa.set_gfa_version(
|
25
|
-
assert_nothing_raised { gfa.set_gfa_version(
|
23
|
+
assert_raise { gfa.set_gfa_version('0.9') }
|
24
|
+
assert_raise { gfa.set_gfa_version('2.1') }
|
25
|
+
assert_nothing_raised { gfa.set_gfa_version('1.0') }
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_line_by_line
|
29
29
|
gfa = GFA.new
|
30
30
|
assert_respond_to(gfa, :<<)
|
31
31
|
# Empty
|
32
|
-
gfa <<
|
32
|
+
gfa << ' '
|
33
33
|
assert(gfa.empty?)
|
34
|
-
gfa <<
|
34
|
+
gfa << 'H'
|
35
35
|
assert(gfa.empty?)
|
36
36
|
# Segment
|
37
37
|
assert_equal(0, gfa.segments.size)
|
@@ -40,8 +40,8 @@ class ParserTest < Test::Unit::TestCase
|
|
40
40
|
assert_equal(1, gfa.segments.size)
|
41
41
|
# Version
|
42
42
|
assert_nil(gfa.gfa_version)
|
43
|
-
gfa << GFA::Record::Header.new(
|
44
|
-
assert_equal(
|
43
|
+
gfa << GFA::Record::Header.new('VN:Z:1.0')
|
44
|
+
assert_equal('1.0', gfa.gfa_version)
|
45
45
|
end
|
46
46
|
|
47
47
|
end
|
data/test/test_helper.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
2
|
-
|
1
|
+
require 'simplecov'
|
2
|
+
SimpleCov.start
|
3
3
|
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
4
|
+
require 'rubygems'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'gfa/common'
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gfa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rgl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0.5'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.5'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: test-unit
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
description: GFA is a graph representation of fragment assemblies
|
@@ -59,61 +59,64 @@ extensions: []
|
|
59
59
|
extra_rdoc_files:
|
60
60
|
- README.md
|
61
61
|
files:
|
62
|
+
- Gemfile
|
63
|
+
- LICENSE
|
64
|
+
- README.md
|
65
|
+
- Rakefile
|
66
|
+
- lib/gfa.rb
|
62
67
|
- lib/gfa/common.rb
|
68
|
+
- lib/gfa/field.rb
|
63
69
|
- lib/gfa/field/char.rb
|
64
70
|
- lib/gfa/field/float.rb
|
65
71
|
- lib/gfa/field/hex.rb
|
72
|
+
- lib/gfa/field/json.rb
|
66
73
|
- lib/gfa/field/numarray.rb
|
67
74
|
- lib/gfa/field/sigint.rb
|
68
75
|
- lib/gfa/field/string.rb
|
69
|
-
- lib/gfa/field.rb
|
70
76
|
- lib/gfa/generator.rb
|
71
77
|
- lib/gfa/graph.rb
|
72
78
|
- lib/gfa/parser.rb
|
79
|
+
- lib/gfa/record.rb
|
80
|
+
- lib/gfa/record/comment.rb
|
73
81
|
- lib/gfa/record/containment.rb
|
74
82
|
- lib/gfa/record/header.rb
|
83
|
+
- lib/gfa/record/jump.rb
|
75
84
|
- lib/gfa/record/link.rb
|
76
85
|
- lib/gfa/record/path.rb
|
77
86
|
- lib/gfa/record/segment.rb
|
78
|
-
- lib/gfa/record.rb
|
87
|
+
- lib/gfa/record/walk.rb
|
79
88
|
- lib/gfa/version.rb
|
80
|
-
- lib/gfa.rb
|
81
89
|
- test/common_test.rb
|
82
90
|
- test/field_test.rb
|
83
91
|
- test/parser_test.rb
|
84
92
|
- test/record_test.rb
|
85
93
|
- test/test_helper.rb
|
86
|
-
- Gemfile
|
87
|
-
- Rakefile
|
88
|
-
- README.md
|
89
|
-
- LICENSE
|
90
94
|
homepage: https://github.com/lmrodriguezr/gfa
|
91
95
|
licenses: []
|
92
96
|
metadata: {}
|
93
|
-
post_install_message:
|
97
|
+
post_install_message:
|
94
98
|
rdoc_options:
|
95
99
|
- lib
|
96
100
|
- README.md
|
97
|
-
- --main
|
101
|
+
- "--main"
|
98
102
|
- README.md
|
99
|
-
- --title
|
103
|
+
- "--title"
|
100
104
|
- Graphical Fragment Assembly (GFA) for Ruby
|
101
105
|
require_paths:
|
102
106
|
- lib
|
103
107
|
required_ruby_version: !ruby/object:Gem::Requirement
|
104
108
|
requirements:
|
105
|
-
- -
|
109
|
+
- - ">="
|
106
110
|
- !ruby/object:Gem::Version
|
107
111
|
version: '0'
|
108
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
113
|
requirements:
|
110
|
-
- -
|
114
|
+
- - ">="
|
111
115
|
- !ruby/object:Gem::Version
|
112
116
|
version: '0'
|
113
117
|
requirements: []
|
114
|
-
|
115
|
-
|
116
|
-
signing_key:
|
118
|
+
rubygems_version: 3.2.3
|
119
|
+
signing_key:
|
117
120
|
specification_version: 4
|
118
121
|
summary: Graphical Fragment Assembly (GFA) for Ruby
|
119
122
|
test_files: []
|