gfa 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile +8 -2
- data/LICENSE +2 -2
- data/README.md +17 -9
- data/Rakefile +9 -8
- data/lib/gfa/common.rb +36 -42
- data/lib/gfa/field/char.rb +0 -1
- data/lib/gfa/field/float.rb +0 -1
- data/lib/gfa/field/hex.rb +0 -1
- data/lib/gfa/field/json.rb +9 -0
- data/lib/gfa/field/numarray.rb +9 -1
- data/lib/gfa/field/sigint.rb +1 -2
- data/lib/gfa/field/string.rb +1 -2
- data/lib/gfa/field.rb +7 -9
- data/lib/gfa/generator.rb +32 -28
- data/lib/gfa/graph.rb +13 -14
- data/lib/gfa/parser.rb +10 -9
- data/lib/gfa/record/comment.rb +10 -0
- data/lib/gfa/record/containment.rb +11 -6
- data/lib/gfa/record/header.rb +9 -10
- data/lib/gfa/record/jump.rb +45 -0
- data/lib/gfa/record/link.rb +40 -41
- data/lib/gfa/record/path.rb +14 -13
- data/lib/gfa/record/segment.rb +21 -18
- data/lib/gfa/record/walk.rb +20 -0
- data/lib/gfa/record.rb +26 -20
- data/lib/gfa/version.rb +1 -1
- data/lib/gfa.rb +4 -4
- data/test/parser_test.rb +8 -8
- data/test/test_helper.rb +5 -5
- metadata +27 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cde4a3e432409c7a2967832dcebd502ddc54b1f6cb25856b6d9d21ce53f67b32
|
4
|
+
data.tar.gz: 91213d63365dd3608c28e30115cbfc8621e78bbe34936832bcee2ac7e6f460fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 321634c28ec8927bd38286a84a02783b9f915dcbabb7941233583dda7f41b66e952ff9611c9158bd7baca09d7d3d6c254a036f1c9f2169e5e24e6e964d292e71
|
7
|
+
data.tar.gz: 3698d16ab5953ffd70bf2c102d154bc1f61e5a13a752bc317a756df13762c3668c1bf6e8144821e53e395783f36c45185249c7fc206694be19145200310c3f48
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -13,20 +13,20 @@ This implementation follows the specifications of [GFA-spec][].
|
|
13
13
|
To parse a file in GFA format:
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
require
|
16
|
+
require 'gfa'
|
17
17
|
|
18
|
-
my_gfa = GFA.load(
|
18
|
+
my_gfa = GFA.load('assembly.gfa')
|
19
19
|
```
|
20
20
|
|
21
21
|
To load GFA strings line-by-line:
|
22
22
|
|
23
23
|
```ruby
|
24
|
-
require
|
24
|
+
require 'gfa'
|
25
25
|
|
26
26
|
my_gfa = GFA.new
|
27
|
-
fh = File.open(
|
27
|
+
fh = File.open('assembly.gfa', 'r')
|
28
28
|
fh.each do |ln|
|
29
|
-
|
29
|
+
my_gfa << ln
|
30
30
|
end
|
31
31
|
fh.close
|
32
32
|
```
|
@@ -37,15 +37,15 @@ fh.close
|
|
37
37
|
After altering a GFA object, you can simply save it in a file as:
|
38
38
|
|
39
39
|
```ruby
|
40
|
-
my_gfa.save(
|
40
|
+
my_gfa.save('alt-assembly.gfa')
|
41
41
|
```
|
42
42
|
|
43
43
|
Or line-by-line as:
|
44
44
|
|
45
45
|
```ruby
|
46
|
-
fh = File.open(
|
46
|
+
fh = File.open('alt-assembly.gfa', 'w')
|
47
47
|
my_gfa.each_line do |ln|
|
48
|
-
|
48
|
+
fh.puts ln
|
49
49
|
end
|
50
50
|
fh.close
|
51
51
|
```
|
@@ -85,6 +85,14 @@ ug.write_to_graphic_file("jpg")
|
|
85
85
|
gem install gfa
|
86
86
|
```
|
87
87
|
|
88
|
+
Or add the following line to your Gemfile:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
gem "gfa"
|
92
|
+
```
|
93
|
+
|
94
|
+
and run `bundle install` from your shell.
|
95
|
+
|
88
96
|
|
89
97
|
# Author
|
90
98
|
|
@@ -96,5 +104,5 @@ gem install gfa
|
|
96
104
|
[Artistic License 2.0](LICENSE).
|
97
105
|
|
98
106
|
[GFA-spec]: https://github.com/pmelsted/GFA-spec
|
99
|
-
[lrr]:
|
107
|
+
[lrr]: https://rodriguez-r.com/
|
100
108
|
[rgl]: https://github.com/monora/rgl
|
data/Rakefile
CHANGED
@@ -1,16 +1,17 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
2
3
|
|
3
|
-
$:.unshift File.join(File.dirname(__FILE__),
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
5
|
|
5
|
-
require
|
6
|
+
require 'gfa/version'
|
6
7
|
|
7
|
-
SOURCES = FileList[
|
8
|
+
SOURCES = FileList['lib/**/*.rb']
|
8
9
|
|
9
|
-
desc
|
10
|
+
desc 'Default Task'
|
10
11
|
task :default => :test
|
11
12
|
|
12
13
|
Rake::TestTask.new do |t|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
t.libs << 'test'
|
15
|
+
t.pattern = 'test/*_test.rb'
|
16
|
+
t.verbose = true
|
16
17
|
end
|
data/lib/gfa/common.rb
CHANGED
@@ -1,45 +1,39 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'gfa/version'
|
2
|
+
require 'gfa/record'
|
3
|
+
require 'gfa/field'
|
4
4
|
|
5
5
|
class GFA
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
def eql?(gfa)
|
40
|
-
records == gfa.records
|
41
|
-
end
|
42
|
-
|
43
|
-
alias == eql?
|
44
|
-
|
6
|
+
# Class-level
|
7
|
+
def self.assert_format(value, regex, message)
|
8
|
+
unless value =~ regex
|
9
|
+
raise "#{message}: #{value}."
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Instance-level
|
14
|
+
attr :gfa_version, :records
|
15
|
+
|
16
|
+
GFA::Record.TYPES.each do |r_type|
|
17
|
+
plural = "#{r_type.downcase}s"
|
18
|
+
singular = "#{r_type.downcase}"
|
19
|
+
|
20
|
+
define_method(plural) { records[r_type] }
|
21
|
+
define_method(singular) { |k| records[r_type][k] }
|
22
|
+
define_method("add_#{singular}") { |v| @records[r_type] << v }
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
@records = {}
|
27
|
+
GFA::Record.TYPES.each { |t| @records[t] = [] }
|
28
|
+
end
|
29
|
+
|
30
|
+
def empty?
|
31
|
+
records.empty? || records.values.all?(&:empty?)
|
32
|
+
end
|
33
|
+
|
34
|
+
def eql?(gfa)
|
35
|
+
records == gfa.records
|
36
|
+
end
|
37
|
+
|
38
|
+
alias == eql?
|
45
39
|
end
|
data/lib/gfa/field/char.rb
CHANGED
data/lib/gfa/field/float.rb
CHANGED
data/lib/gfa/field/hex.rb
CHANGED
data/lib/gfa/field/numarray.rb
CHANGED
@@ -10,7 +10,15 @@ class GFA::Field::NumArray < GFA::Field
|
|
10
10
|
def modifier ; value[0] ; end
|
11
11
|
|
12
12
|
def array ; value[2..-1].split(/,/) ; end
|
13
|
-
|
13
|
+
|
14
14
|
alias as_a array
|
15
15
|
|
16
|
+
def number_type
|
17
|
+
{
|
18
|
+
c: 'int8_t', C: 'uint8_t',
|
19
|
+
s: 'int16_t', S: 'uint16_t',
|
20
|
+
i: 'int32_t', I: 'uint32_t',
|
21
|
+
f: 'float'
|
22
|
+
}[modifier.to_sym]
|
23
|
+
end
|
16
24
|
end
|
data/lib/gfa/field/sigint.rb
CHANGED
data/lib/gfa/field/string.rb
CHANGED
data/lib/gfa/field.rb
CHANGED
@@ -1,17 +1,15 @@
|
|
1
1
|
class GFA::Field
|
2
|
-
|
3
2
|
# Class-level
|
4
|
-
|
5
3
|
CODES = {
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
4
|
+
A: :Char,
|
5
|
+
i: :SigInt,
|
6
|
+
f: :Float,
|
7
|
+
Z: :String,
|
8
|
+
J: :Json, # Excluding new-line and tab characters
|
9
|
+
H: :Hex,
|
10
|
+
B: :NumArray
|
12
11
|
}
|
13
12
|
TYPES = CODES.values
|
14
|
-
|
15
13
|
TYPES.each { |t| require "gfa/field/#{t.downcase}" }
|
16
14
|
|
17
15
|
[:CODES, :TYPES].each do |x|
|
data/lib/gfa/generator.rb
CHANGED
@@ -1,31 +1,35 @@
|
|
1
1
|
class GFA
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
def save(file)
|
3
|
+
fh = File.open(file, 'w')
|
4
|
+
each_line do |ln|
|
5
|
+
fh.puts ln
|
6
|
+
end
|
7
|
+
fh.close
|
8
|
+
end
|
9
|
+
|
10
|
+
def each_line(&blk)
|
11
|
+
set_version_header('1.1') if gfa_version.nil?
|
12
|
+
GFA::Record.TYPES.each do |r_type|
|
13
|
+
records[r_type].each do |record|
|
14
|
+
blk[record.to_s]
|
6
15
|
end
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def to_s
|
27
|
-
o = ""
|
28
|
-
each_line{ |ln| o += ln + "\n" }
|
29
|
-
o
|
30
|
-
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def set_version_header(v)
|
20
|
+
unset_version
|
21
|
+
@records[:Header] << GFA::Record::Header.new("VN:Z:#{v}")
|
22
|
+
@gfa_version = v
|
23
|
+
end
|
24
|
+
|
25
|
+
def unset_version
|
26
|
+
@records[:Header].delete_if { |o| !o.fields[:VN].nil? }
|
27
|
+
@gfa_version = nil
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
o = ''
|
32
|
+
each_line { |ln| o += ln + "\n" }
|
33
|
+
o
|
34
|
+
end
|
31
35
|
end
|
data/lib/gfa/graph.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'rgl/adjacency'
|
2
|
+
require 'rgl/implicit'
|
3
3
|
|
4
4
|
class GFA
|
5
5
|
|
@@ -11,7 +11,7 @@ class GFA
|
|
11
11
|
# true.
|
12
12
|
# * :directed => bool. If false, ignores direction of the links. By defaut
|
13
13
|
# the same value as :orient.
|
14
|
-
def implicit_graph(opts={})
|
14
|
+
def implicit_graph(opts = {})
|
15
15
|
rgl_implicit_graph(opts)
|
16
16
|
end
|
17
17
|
|
@@ -19,7 +19,7 @@ class GFA
|
|
19
19
|
# Generates a RGL::DirectedAdjacencyGraph or RGL::AdjacencyGraph object.
|
20
20
|
# The +opts+ argument is a hash with the same supported key-value pairs as
|
21
21
|
# in #implicit_graph.
|
22
|
-
def adjacency_graph(opts={})
|
22
|
+
def adjacency_graph(opts = {})
|
23
23
|
implicit_graph(opts).to_adjacency
|
24
24
|
end
|
25
25
|
|
@@ -27,7 +27,7 @@ class GFA
|
|
27
27
|
|
28
28
|
def segment_names_with_orient
|
29
29
|
segments.flat_map do |s|
|
30
|
-
%w[+ -].map{ |orient| GFA::GraphVertex.idx(s, orient) }
|
30
|
+
%w[+ -].map { |orient| GFA::GraphVertex.idx(s, orient) }
|
31
31
|
end.to_set
|
32
32
|
end
|
33
33
|
|
@@ -44,8 +44,8 @@ class GFA
|
|
44
44
|
(opts[:orient] ? segment_names_with_orient :
|
45
45
|
segment_names).each(&b)
|
46
46
|
end
|
47
|
-
g.adjacent_iterator do |x,b|
|
48
|
-
rgl_implicit_adjacent_iterator(x,b,opts)
|
47
|
+
g.adjacent_iterator do |x, b|
|
48
|
+
rgl_implicit_adjacent_iterator(x, b, opts)
|
49
49
|
end
|
50
50
|
g.directed = opts[:directed]
|
51
51
|
end
|
@@ -62,20 +62,20 @@ class GFA
|
|
62
62
|
if l.from?(x.segment, x.orient)
|
63
63
|
orient = opts[:orient] ? l.to_orient : nil
|
64
64
|
b.call(GFA::GraphVertex.idx(l.to, orient))
|
65
|
-
elsif opts[:orient]
|
65
|
+
elsif opts[:orient] && l.to?(x.segment, orient_rc(x.orient))
|
66
66
|
orient = orient_rc(l.from_orient.value)
|
67
67
|
b.call(GFA::GraphVertex.idx(l.from, orient))
|
68
68
|
end
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
def orient_rc(o)
|
73
|
-
|
72
|
+
def orient_rc(o)
|
73
|
+
o == '+' ? '-' : '+'
|
74
|
+
end
|
74
75
|
end
|
75
76
|
|
76
77
|
|
77
78
|
class GFA::GraphVertex # :nodoc:
|
78
|
-
|
79
79
|
# Class-level
|
80
80
|
@@idx = {}
|
81
81
|
def self.idx(segment, orient)
|
@@ -83,10 +83,10 @@ class GFA::GraphVertex # :nodoc:
|
|
83
83
|
@@idx[n.to_s] ||= n
|
84
84
|
@@idx[n.to_s]
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
# Instance-level
|
88
88
|
attr :segment, :orient
|
89
|
-
|
89
|
+
|
90
90
|
def initialize(segment, orient)
|
91
91
|
@segment = segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
92
92
|
segment.is_a?(GFA::Field) ? segment.value : segment
|
@@ -96,5 +96,4 @@ class GFA::GraphVertex # :nodoc:
|
|
96
96
|
def to_s
|
97
97
|
"#{segment}#{orient}"
|
98
98
|
end
|
99
|
-
|
100
99
|
end
|
data/lib/gfa/parser.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
require
|
1
|
+
require 'gfa/record'
|
2
2
|
|
3
3
|
class GFA
|
4
4
|
# Class-level
|
5
|
-
MIN_VERSION =
|
6
|
-
MAX_VERSION =
|
5
|
+
MIN_VERSION = '1.0'
|
6
|
+
MAX_VERSION = '1.2'
|
7
7
|
|
8
8
|
def self.load(file)
|
9
9
|
gfa = GFA.new
|
10
|
-
fh = File.open(file,
|
10
|
+
fh = File.open(file, 'r')
|
11
11
|
fh.each { |ln| gfa << ln }
|
12
12
|
fh.close
|
13
13
|
gfa
|
@@ -20,17 +20,19 @@ class GFA
|
|
20
20
|
# Instance-level
|
21
21
|
def <<(obj)
|
22
22
|
obj = parse_line(obj) unless obj.is_a? GFA::Record
|
23
|
-
return if obj.nil?
|
23
|
+
return if obj.nil? || obj.empty?
|
24
24
|
@records[obj.type] << obj
|
25
|
-
|
25
|
+
|
26
|
+
if obj.type == :Header && !obj.fields[:VN].nil?
|
26
27
|
set_gfa_version(obj.fields[:VN].value)
|
27
28
|
end
|
28
29
|
end
|
29
30
|
|
30
31
|
def set_gfa_version(v)
|
31
32
|
@gfa_version = v
|
32
|
-
|
33
|
-
GFA
|
33
|
+
unless GFA::supported_version? gfa_version
|
34
|
+
raise "GFA version currently unsupported: #{v}."
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
38
|
private
|
@@ -41,5 +43,4 @@ class GFA
|
|
41
43
|
cols = ln.split("\t")
|
42
44
|
GFA::Record.code_class(cols.shift).new(*cols)
|
43
45
|
end
|
44
|
-
|
45
46
|
end
|
@@ -1,14 +1,20 @@
|
|
1
1
|
class GFA::Record::Containment < GFA::Record
|
2
2
|
CODE = :C
|
3
|
-
REQ_FIELDS = [
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
|
4
4
|
OPT_FIELDS = {
|
5
|
-
:
|
6
|
-
:
|
5
|
+
RC: :i, # Read coverage
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
ID: :Z # Edge identifier
|
7
8
|
}
|
8
9
|
|
9
10
|
REQ_FIELDS.each_index do |i|
|
10
|
-
define_method(REQ_FIELDS[i]) { fields[i+2] }
|
11
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
11
12
|
end
|
13
|
+
|
14
|
+
alias container from
|
15
|
+
alias container_orient from_orient
|
16
|
+
alias contained to
|
17
|
+
alias contained_orient to_orient
|
12
18
|
|
13
19
|
def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
|
14
20
|
@fields = {}
|
@@ -18,7 +24,6 @@ class GFA::Record::Containment < GFA::Record
|
|
18
24
|
add_field(5, :Z, to_orient, /^+|-$/)
|
19
25
|
add_field(6, :i, pos, /^[0-9]*$/)
|
20
26
|
add_field(7, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
21
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
27
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
22
28
|
end
|
23
|
-
|
24
29
|
end
|
data/lib/gfa/record/header.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
class GFA::Record::Header < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
CODE = :H
|
3
|
+
REQ_FIELDS = []
|
4
|
+
OPT_FIELDS = {
|
5
|
+
VN: :Z # Version number
|
6
|
+
}
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
def initialize(*opt_fields)
|
9
|
+
@fields = {}
|
10
|
+
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
11
|
+
end
|
13
12
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class GFA::Record::Jump < GFA::Record
|
2
|
+
CODE = :J
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient distance]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
|
6
|
+
}
|
7
|
+
|
8
|
+
REQ_FIELDS.each_index do |i|
|
9
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
|
13
|
+
@fields = {}
|
14
|
+
add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
|
15
|
+
add_field(3, :Z, from_orient, /^+|-$/)
|
16
|
+
add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
|
17
|
+
add_field(5, :Z, to_orient, /^+|-$/)
|
18
|
+
add_field(6, :Z, distance, /^\*|[-+]?[0-9]+$/)
|
19
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def from?(segment, orient = nil)
|
24
|
+
links_from_to?(segment, orient, true)
|
25
|
+
end
|
26
|
+
|
27
|
+
def to?(segment, orient = nil)
|
28
|
+
links_from_to?(segment, orient, false)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def links_from_to?(segment, orient, from)
|
34
|
+
segment = segment_name(segment)
|
35
|
+
orient = orient.value if orient.is_a? GFA::Field
|
36
|
+
base_k = from ? 2 : 4
|
37
|
+
segment==fields[base_k].value &&
|
38
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
39
|
+
end
|
40
|
+
|
41
|
+
def segment_name(segment)
|
42
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
43
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
44
|
+
end
|
45
|
+
end
|
data/lib/gfa/record/link.rb
CHANGED
@@ -1,50 +1,49 @@
|
|
1
1
|
class GFA::Record::Link < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
CODE = :L
|
3
|
+
REQ_FIELDS = %i[from from_orient to to_orient overlap]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
MQ: :i, # Mapping quality
|
6
|
+
NM: :i, # Number of mismatches/gaps
|
7
|
+
EC: :i, # Read count
|
8
|
+
FC: :i, # Fragment count
|
9
|
+
KC: :i, # k-mer count
|
10
|
+
ID: :Z # Edge identifier
|
11
|
+
}
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
REQ_FIELDS.each_index do |i|
|
14
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
15
|
+
end
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
17
|
+
def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
|
18
|
+
@fields = {}
|
19
|
+
add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
|
20
|
+
add_field(3, :Z, from_orient, /^+|-$/)
|
21
|
+
add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
|
22
|
+
add_field(5, :Z, to_orient, /^+|-$/)
|
23
|
+
add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
24
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
25
|
+
end
|
25
26
|
|
27
|
+
def from?(segment, orient = nil)
|
28
|
+
links_from_to?(segment, orient, true)
|
29
|
+
end
|
26
30
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
def to?(segment, orient=nil)
|
32
|
-
links_from_to?(segment, orient, false)
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
31
|
+
def to?(segment, orient = nil)
|
32
|
+
links_from_to?(segment, orient, false)
|
33
|
+
end
|
36
34
|
|
37
|
-
|
38
|
-
segment = segment_name(segment)
|
39
|
-
orient = orient.value if orient.is_a? GFA::Field
|
40
|
-
base_k = from ? 2 : 4
|
41
|
-
segment==fields[base_k].value and
|
42
|
-
(orient.nil? or orient==fields[base_k + 1].value)
|
43
|
-
end
|
35
|
+
private
|
44
36
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
37
|
+
def links_from_to?(segment, orient, from)
|
38
|
+
segment = segment_name(segment)
|
39
|
+
orient = orient.value if orient.is_a? GFA::Field
|
40
|
+
base_k = from ? 2 : 4
|
41
|
+
segment==fields[base_k].value &&
|
42
|
+
(orient.nil? || orient==fields[base_k + 1].value)
|
43
|
+
end
|
49
44
|
|
45
|
+
def segment_name(segment)
|
46
|
+
segment.is_a?(GFA::Record::Segment) ? segment.name.value :
|
47
|
+
segment.is_a?(GFA::Field) ? segment.value : segment
|
48
|
+
end
|
50
49
|
end
|
data/lib/gfa/record/path.rb
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
class GFA::Record::Path < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
CODE = :P
|
3
|
+
REQ_FIELDS = %i[path_name segment_name cigar]
|
4
|
+
OPT_FIELDS = {}
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
@fields = {}
|
12
|
-
add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
|
13
|
-
add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
|
14
|
-
add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
15
|
-
opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
|
16
|
-
end
|
10
|
+
alias overlaps cigar
|
17
11
|
|
12
|
+
def initialize(path_name, segment_name, cigar, *opt_fields)
|
13
|
+
@fields = {}
|
14
|
+
add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
|
15
|
+
add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
|
16
|
+
add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
|
17
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
18
|
+
end
|
18
19
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -1,22 +1,25 @@
|
|
1
1
|
class GFA::Record::Segment < GFA::Record
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
CODE = :S
|
3
|
+
REQ_FIELDS = %i[name sequence]
|
4
|
+
OPT_FIELDS = {
|
5
|
+
LN: :i, # Segment length
|
6
|
+
RC: :i, # Read count
|
7
|
+
FC: :i, # Fragment count
|
8
|
+
KC: :i, # k-mer count
|
9
|
+
SH: :H, # SHA-256 checksum of the sequence
|
10
|
+
UR: :Z, # URI or local file-system path of the sequence
|
11
|
+
# Non-cannonical
|
12
|
+
DP: :f # (From SAM)
|
13
|
+
}
|
10
14
|
|
11
|
-
|
12
|
-
|
13
|
-
|
15
|
+
REQ_FIELDS.each_index do |i|
|
16
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
17
|
+
end
|
14
18
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
def initialize(name, sequence, *opt_fields)
|
20
|
+
@fields = {}
|
21
|
+
add_field(2, :Z, name, /^[!-)+-<>-~][!-~]*$/)
|
22
|
+
add_field(3, :Z, sequence, /^\*|[A-Za-z=.]+$/)
|
23
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
24
|
+
end
|
22
25
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class GFA::Record::Walk < GFA::Record
|
2
|
+
CODE = :W
|
3
|
+
REQ_FIELDS = %i[sample_id hap_index seq_id seq_start seq_end walk]
|
4
|
+
OPT_FIELDS = {}
|
5
|
+
|
6
|
+
REQ_FIELDS.each_index do |i|
|
7
|
+
define_method(REQ_FIELDS[i]) { fields[i + 2] }
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
|
11
|
+
@fields = {}
|
12
|
+
add_field(2, :Z, sample_id, /^[!-)+-<>-~][!-~]*$/)
|
13
|
+
add_field(3, :i, hap_index, /^[0-9]+$/)
|
14
|
+
add_field(4, :Z, seq_id, /^[!-)+-<>-~][!-~]*$/)
|
15
|
+
add_field(5, :i, seq_start, /^\*|[0-9]+$/)
|
16
|
+
add_field(6, :i, seq_end, /^\*|[0-9]+$/)
|
17
|
+
add_field(7, :Z, walk, /^([><][!-;=?-~]+)+$/)
|
18
|
+
opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
|
19
|
+
end
|
20
|
+
end
|
data/lib/gfa/record.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
class GFA::Record
|
2
|
-
|
3
2
|
# Class-level
|
4
|
-
|
5
3
|
CODES = {
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
4
|
+
:'#' => :Comment,
|
5
|
+
H: :Header,
|
6
|
+
S: :Segment,
|
7
|
+
L: :Link,
|
8
|
+
J: :Jump, # Since 1.2
|
9
|
+
C: :Containment,
|
10
|
+
P: :Path,
|
11
|
+
W: :Walk # Since 1.1
|
11
12
|
}
|
12
13
|
REQ_FIELDS = []
|
13
14
|
OPT_FIELDS = {}
|
14
15
|
TYPES = CODES.values
|
15
|
-
|
16
16
|
TYPES.each { |t| require "gfa/record/#{t.downcase}" }
|
17
17
|
|
18
18
|
[:CODES, :REQ_FIELDS, :OPT_FIELDS, :TYPES].each do |x|
|
@@ -44,9 +44,9 @@ class GFA::Record
|
|
44
44
|
def to_s
|
45
45
|
o = [code.to_s]
|
46
46
|
self.class.REQ_FIELDS.each_index do |i|
|
47
|
-
o << fields[i+2].to_s(false)
|
47
|
+
o << fields[i + 2].to_s(false)
|
48
48
|
end
|
49
|
-
fields.each do |k,v|
|
49
|
+
fields.each do |k, v|
|
50
50
|
next if k.is_a? Integer
|
51
51
|
o << "#{k}:#{v}"
|
52
52
|
end
|
@@ -54,7 +54,7 @@ class GFA::Record
|
|
54
54
|
end
|
55
55
|
|
56
56
|
def hash
|
57
|
-
{code => fields}.hash
|
57
|
+
{ code => fields }.hash
|
58
58
|
end
|
59
59
|
|
60
60
|
def eql?(rec)
|
@@ -65,26 +65,32 @@ class GFA::Record
|
|
65
65
|
|
66
66
|
private
|
67
67
|
|
68
|
-
def add_field(f_tag, f_type, f_value, format=nil)
|
68
|
+
def add_field(f_tag, f_type, f_value, format = nil)
|
69
69
|
unless format.nil?
|
70
70
|
msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
|
71
71
|
GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
|
72
72
|
end
|
73
|
+
|
73
74
|
@fields[ f_tag ] = GFA::Field.code_class(f_type).new(f_value)
|
74
75
|
end
|
75
76
|
|
76
77
|
def add_opt_field(f, known)
|
77
|
-
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
78
|
-
|
78
|
+
m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
|
79
|
+
raise "Cannot parse field: '#{f}'." unless m
|
80
|
+
|
79
81
|
f_tag = m[1].to_sym
|
80
82
|
f_type = m[2].to_sym
|
81
83
|
f_value = m[3]
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
84
|
+
|
85
|
+
if known[f_tag].nil? && f_tag =~ /^[A-Z]+$/
|
86
|
+
raise "Unknown reserved tag #{f_tag} for a #{type} record."
|
87
|
+
end
|
88
|
+
|
89
|
+
unless known[f_tag].nil? || known[f_tag] == f_type
|
90
|
+
raise "Wrong field type #{f_type} for a #{f_tag} tag," \
|
91
|
+
" expected #{known[f_tag]}"
|
92
|
+
end
|
93
|
+
|
87
94
|
add_field(f_tag, f_type, f_value)
|
88
95
|
end
|
89
|
-
|
90
96
|
end
|
data/lib/gfa/version.rb
CHANGED
data/lib/gfa.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require 'gfa/common'
|
2
|
+
require 'gfa/parser'
|
3
|
+
require 'gfa/generator'
|
4
|
+
require 'gfa/graph'
|
data/test/parser_test.rb
CHANGED
@@ -4,7 +4,7 @@ require "gfa/parser"
|
|
4
4
|
class ParserTest < Test::Unit::TestCase
|
5
5
|
|
6
6
|
def test_load
|
7
|
-
sample_f = File.expand_path(
|
7
|
+
sample_f = File.expand_path('../fixtures/sample.gfa', __FILE__)
|
8
8
|
assert_respond_to(GFA, :load)
|
9
9
|
pre_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
|
10
10
|
sample = GFA.load(sample_f)
|
@@ -20,18 +20,18 @@ class ParserTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
def test_version_suppport
|
22
22
|
gfa = GFA.new
|
23
|
-
assert_raise { gfa.set_gfa_version(
|
24
|
-
assert_raise { gfa.set_gfa_version(
|
25
|
-
assert_nothing_raised { gfa.set_gfa_version(
|
23
|
+
assert_raise { gfa.set_gfa_version('0.9') }
|
24
|
+
assert_raise { gfa.set_gfa_version('2.1') }
|
25
|
+
assert_nothing_raised { gfa.set_gfa_version('1.0') }
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_line_by_line
|
29
29
|
gfa = GFA.new
|
30
30
|
assert_respond_to(gfa, :<<)
|
31
31
|
# Empty
|
32
|
-
gfa <<
|
32
|
+
gfa << ' '
|
33
33
|
assert(gfa.empty?)
|
34
|
-
gfa <<
|
34
|
+
gfa << 'H'
|
35
35
|
assert(gfa.empty?)
|
36
36
|
# Segment
|
37
37
|
assert_equal(0, gfa.segments.size)
|
@@ -40,8 +40,8 @@ class ParserTest < Test::Unit::TestCase
|
|
40
40
|
assert_equal(1, gfa.segments.size)
|
41
41
|
# Version
|
42
42
|
assert_nil(gfa.gfa_version)
|
43
|
-
gfa << GFA::Record::Header.new(
|
44
|
-
assert_equal(
|
43
|
+
gfa << GFA::Record::Header.new('VN:Z:1.0')
|
44
|
+
assert_equal('1.0', gfa.gfa_version)
|
45
45
|
end
|
46
46
|
|
47
47
|
end
|
data/test/test_helper.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
2
|
-
|
1
|
+
require 'simplecov'
|
2
|
+
SimpleCov.start
|
3
3
|
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
4
|
+
require 'rubygems'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'gfa/common'
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gfa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rgl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0.5'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.5'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: test-unit
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
description: GFA is a graph representation of fragment assemblies
|
@@ -59,61 +59,64 @@ extensions: []
|
|
59
59
|
extra_rdoc_files:
|
60
60
|
- README.md
|
61
61
|
files:
|
62
|
+
- Gemfile
|
63
|
+
- LICENSE
|
64
|
+
- README.md
|
65
|
+
- Rakefile
|
66
|
+
- lib/gfa.rb
|
62
67
|
- lib/gfa/common.rb
|
68
|
+
- lib/gfa/field.rb
|
63
69
|
- lib/gfa/field/char.rb
|
64
70
|
- lib/gfa/field/float.rb
|
65
71
|
- lib/gfa/field/hex.rb
|
72
|
+
- lib/gfa/field/json.rb
|
66
73
|
- lib/gfa/field/numarray.rb
|
67
74
|
- lib/gfa/field/sigint.rb
|
68
75
|
- lib/gfa/field/string.rb
|
69
|
-
- lib/gfa/field.rb
|
70
76
|
- lib/gfa/generator.rb
|
71
77
|
- lib/gfa/graph.rb
|
72
78
|
- lib/gfa/parser.rb
|
79
|
+
- lib/gfa/record.rb
|
80
|
+
- lib/gfa/record/comment.rb
|
73
81
|
- lib/gfa/record/containment.rb
|
74
82
|
- lib/gfa/record/header.rb
|
83
|
+
- lib/gfa/record/jump.rb
|
75
84
|
- lib/gfa/record/link.rb
|
76
85
|
- lib/gfa/record/path.rb
|
77
86
|
- lib/gfa/record/segment.rb
|
78
|
-
- lib/gfa/record.rb
|
87
|
+
- lib/gfa/record/walk.rb
|
79
88
|
- lib/gfa/version.rb
|
80
|
-
- lib/gfa.rb
|
81
89
|
- test/common_test.rb
|
82
90
|
- test/field_test.rb
|
83
91
|
- test/parser_test.rb
|
84
92
|
- test/record_test.rb
|
85
93
|
- test/test_helper.rb
|
86
|
-
- Gemfile
|
87
|
-
- Rakefile
|
88
|
-
- README.md
|
89
|
-
- LICENSE
|
90
94
|
homepage: https://github.com/lmrodriguezr/gfa
|
91
95
|
licenses: []
|
92
96
|
metadata: {}
|
93
|
-
post_install_message:
|
97
|
+
post_install_message:
|
94
98
|
rdoc_options:
|
95
99
|
- lib
|
96
100
|
- README.md
|
97
|
-
- --main
|
101
|
+
- "--main"
|
98
102
|
- README.md
|
99
|
-
- --title
|
103
|
+
- "--title"
|
100
104
|
- Graphical Fragment Assembly (GFA) for Ruby
|
101
105
|
require_paths:
|
102
106
|
- lib
|
103
107
|
required_ruby_version: !ruby/object:Gem::Requirement
|
104
108
|
requirements:
|
105
|
-
- -
|
109
|
+
- - ">="
|
106
110
|
- !ruby/object:Gem::Version
|
107
111
|
version: '0'
|
108
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
113
|
requirements:
|
110
|
-
- -
|
114
|
+
- - ">="
|
111
115
|
- !ruby/object:Gem::Version
|
112
116
|
version: '0'
|
113
117
|
requirements: []
|
114
|
-
|
115
|
-
|
116
|
-
signing_key:
|
118
|
+
rubygems_version: 3.2.3
|
119
|
+
signing_key:
|
117
120
|
specification_version: 4
|
118
121
|
summary: Graphical Fragment Assembly (GFA) for Ruby
|
119
122
|
test_files: []
|