gfa 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cde4a3e432409c7a2967832dcebd502ddc54b1f6cb25856b6d9d21ce53f67b32
4
- data.tar.gz: 91213d63365dd3608c28e30115cbfc8621e78bbe34936832bcee2ac7e6f460fb
3
+ metadata.gz: 0e8e61ff97b34654b7a660b011826ad5549f66933a91a09658facf58c3fd56b1
4
+ data.tar.gz: 8f85f07955e71cd38a9dfa28011c70433473ad6a1137ed3e6e217d63eeba20a8
5
5
  SHA512:
6
- metadata.gz: 321634c28ec8927bd38286a84a02783b9f915dcbabb7941233583dda7f41b66e952ff9611c9158bd7baca09d7d3d6c254a036f1c9f2169e5e24e6e964d292e71
7
- data.tar.gz: 3698d16ab5953ffd70bf2c102d154bc1f61e5a13a752bc317a756df13762c3668c1bf6e8144821e53e395783f36c45185249c7fc206694be19145200310c3f48
6
+ metadata.gz: 5b9f8fd92cd30d9e4e5c0263e938169141749c7be43011b574f937c9645608d8e72189bfe9072d7321e3acdf7e8288cecf42c90abe3ceef2bf001780bdb3e472
7
+ data.tar.gz: ee33c0b9c0dc9adb2df96d95b792b060b248e2f991ed5a0e4b4c136d66f04b9be6cf5ca58462bce046c6cfd9e7fed6c4c6949cb5e8923eb71e3d5f53f0da2703
data/README.md CHANGED
@@ -58,12 +58,12 @@ Any `GFA` object can be exported as an [`RGL`][rgl] graph using the methods
58
58
  [tiny.gfa](https://github.com/lmrodriguezr/gfa/raw/master/data/tiny.gfa):
59
59
 
60
60
  ```ruby
61
- require "gfa"
62
- require "rgl/dot"
61
+ require 'gfa'
62
+ require 'rgl/dot'
63
63
 
64
- my_gfa = GFA.load("data/tiny.gfa")
64
+ my_gfa = GFA.load('data/tiny.gfa')
65
65
  dg = my_gfa.implicit_graph
66
- dg.write_to_graphic_file("jpg")
66
+ dg.write_to_graphic_file('jpg')
67
67
  ```
68
68
 
69
69
  ![tiny_dg](https://github.com/lmrodriguezr/gfa/raw/master/data/tiny.jpg)
@@ -72,8 +72,8 @@ If you don't care about orientation, you can also build an undirected graph
72
72
  without orientation:
73
73
 
74
74
  ```ruby
75
- ug = my_gfa.implicit_graph(orient:false)
76
- ug.write_to_graphic_file("jpg")
75
+ ug = my_gfa.implicit_graph(orient: false)
76
+ ug.write_to_graphic_file('jpg')
77
77
  ```
78
78
 
79
79
  ![tiny_ug](https://github.com/lmrodriguezr/gfa/raw/master/data/tiny_undirected.jpg)
@@ -88,7 +88,7 @@ gem install gfa
88
88
  Or add the following line to your Gemfile:
89
89
 
90
90
  ```ruby
91
- gem "gfa"
91
+ gem 'gfa'
92
92
  ```
93
93
 
94
94
  and run `bundle install` from your shell.
data/lib/gfa/common.rb CHANGED
@@ -1,17 +1,17 @@
1
1
  require 'gfa/version'
2
- require 'gfa/record'
2
+ require 'gfa/record_set'
3
3
  require 'gfa/field'
4
4
 
5
5
  class GFA
6
6
  # Class-level
7
7
  def self.assert_format(value, regex, message)
8
- unless value =~ regex
9
- raise "#{message}: #{value}."
8
+ unless value =~ /^(?:#{regex})$/
9
+ raise "#{message}: #{value}"
10
10
  end
11
11
  end
12
12
 
13
13
  # Instance-level
14
- attr :gfa_version, :records
14
+ attr :gfa_version, :records, :opts
15
15
 
16
16
  GFA::Record.TYPES.each do |r_type|
17
17
  plural = "#{r_type.downcase}s"
@@ -22,9 +22,12 @@ class GFA
22
22
  define_method("add_#{singular}") { |v| @records[r_type] << v }
23
23
  end
24
24
 
25
- def initialize
25
+ def initialize(opts = {})
26
26
  @records = {}
27
- GFA::Record.TYPES.each { |t| @records[t] = [] }
27
+ @opts = { index: true, comments: false }.merge(opts)
28
+ GFA::Record.TYPES.each do |t|
29
+ @records[t] = GFA::RecordSet.name_class(t).new(self)
30
+ end
28
31
  end
29
32
 
30
33
  def empty?
@@ -1,6 +1,7 @@
1
1
  class GFA::Field::Char < GFA::Field
2
2
  CODE = :A
3
- REGEX = /^[!-~]$/
3
+ REGEX = /[!-~]/
4
+ NATIVE_FUN = :to_s
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
@@ -1,9 +1,26 @@
1
1
  class GFA::Field::Float < GFA::Field
2
2
  CODE = :f
3
- REGEX = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/
3
+ REGEX = /[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?/
4
+ NATIVE_FUN = :to_f
5
+
6
+ def to_f
7
+ value
8
+ end
9
+
10
+ def to_i
11
+ value.to_i
12
+ end
4
13
 
5
14
  def initialize(f)
6
15
  GFA.assert_format(f, regex, "Bad #{type}")
7
16
  @value = f.to_f
8
17
  end
18
+
19
+ def equivalent?(field)
20
+ if field.is_a?(GFA::Field::NumArray)
21
+ return field.size == 1 && field.first.to_f == value
22
+ end
23
+
24
+ super
25
+ end
9
26
  end
data/lib/gfa/field/hex.rb CHANGED
@@ -1,9 +1,26 @@
1
1
  class GFA::Field::Hex < GFA::Field
2
2
  CODE = :H
3
- REGEX = /^[0-9A-F]+$/
3
+ REGEX = /[0-9A-F]+/
4
+ NATIVE_FUN = :to_i
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f
8
9
  end
10
+
11
+ def to_i
12
+ value.to_i(16)
13
+ end
14
+
15
+ def to_f
16
+ to_i.to_f
17
+ end
18
+
19
+ def equivalent?(field)
20
+ if field.is_a? GFA::Field::NumArray
21
+ return field.size == 1 && field.first.to_i == value
22
+ end
23
+
24
+ super
25
+ end
9
26
  end
@@ -1,9 +1,18 @@
1
1
  class GFA::Field::Json < GFA::Field
2
2
  CODE = :J
3
- REGEX = /^[ !-~]+$/
3
+ REGEX = /[ !-~]+/
4
+ NATIVE_FUN = :to_s
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f
8
9
  end
10
+
11
+ def equivalent?(field)
12
+ # TODO
13
+ # We should parse the contents when comparing two GFA::Field::Json to
14
+ # evaluate equivalencies such as 'J:{ "a" : 1 }' ~ 'J:{"a":1}' (spaces)
15
+ # or 'J:{"a":1,"b":2}' ~ 'J:{"b":2,"a":1}' (element order)
16
+ super
17
+ end
9
18
  end
@@ -1,17 +1,30 @@
1
1
  class GFA::Field::NumArray < GFA::Field
2
2
  CODE = :B
3
- REGEX = /^[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+$/
3
+ REGEX = /[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+/
4
+ NATIVE_FUN = :to_a
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f
8
9
  end
9
10
 
10
- def modifier ; value[0] ; end
11
+ def modifier
12
+ value[0]
13
+ end
11
14
 
12
- def array ; value[2..-1].split(/,/) ; end
15
+ def modifier_fun
16
+ modifier == 'f' ? :to_f : :to_i
17
+ end
13
18
 
14
- alias as_a array
19
+ def array
20
+ @array ||= value[2..-1].split(',').map(&modifier_fun)
21
+ end
22
+
23
+ alias to_a array
24
+
25
+ %i[empty? size count length first last].each do |i|
26
+ define_method(i) { array.send(i) }
27
+ end
15
28
 
16
29
  def number_type
17
30
  {
@@ -21,4 +34,16 @@ class GFA::Field::NumArray < GFA::Field
21
34
  f: 'float'
22
35
  }[modifier.to_sym]
23
36
  end
37
+
38
+ def equivalent?(field)
39
+ return true if eql?(field)
40
+
41
+ if field.respond_to?(:to_a)
42
+ field.to_a.map(&modifier_fun) == array
43
+ elsif size == 1 && field.respond_to?(modifier_fun)
44
+ field.send(modifier_fun) == first
45
+ else
46
+ false
47
+ end
48
+ end
24
49
  end
@@ -1,9 +1,22 @@
1
1
  class GFA::Field::SigInt < GFA::Field
2
2
  CODE = :i
3
- REGEX = /^[-+]?[0-9]+$/
3
+ REGEX = /[-+]?[0-9]+/
4
+ NATIVE_FUN = :to_i
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f.to_i
8
9
  end
10
+
11
+ def to_i
12
+ value
13
+ end
14
+
15
+ def equivalent?(field)
16
+ if field.is_a?(GFA::Field::NumArray)
17
+ return field.size == 1 && field.first.to_i == value
18
+ end
19
+
20
+ super
21
+ end
9
22
  end
@@ -1,6 +1,15 @@
1
1
  class GFA::Field::String < GFA::Field
2
2
  CODE = :Z
3
- REGEX = /^[ !-~]+$/
3
+ REGEX = /[ !-~]+/
4
+ NATIVE_FUN = :to_s
5
+
6
+ def to_f
7
+ value.to_f
8
+ end
9
+
10
+ def to_i(base = 10)
11
+ value.to_i(base)
12
+ end
4
13
 
5
14
  def initialize(f)
6
15
  GFA.assert_format(f, regex, "Bad #{type}")
data/lib/gfa/field.rb CHANGED
@@ -12,7 +12,7 @@ class GFA::Field
12
12
  TYPES = CODES.values
13
13
  TYPES.each { |t| require "gfa/field/#{t.downcase}" }
14
14
 
15
- [:CODES, :TYPES].each do |x|
15
+ %i[CODES TYPES].each do |x|
16
16
  define_singleton_method(x) { const_get(x) }
17
17
  end
18
18
 
@@ -25,23 +25,95 @@ class GFA::Field
25
25
  def self.name_class(name)
26
26
  const_get(name)
27
27
  end
28
-
28
+
29
+ def self.[](string)
30
+ code, value = string.split(':', 2)
31
+ code_class(code).new(value)
32
+ end
33
+
29
34
  # Instance-level
30
35
 
31
36
  attr :value
32
37
 
33
- def type ; CODES[code] ; end
34
-
35
- def code ; self.class::CODE ; end
36
-
37
- def regex ; self.class::REGEX ; end
38
-
39
- def to_s(with_type=true)
38
+ def type
39
+ CODES[code]
40
+ end
41
+
42
+ def code
43
+ self.class::CODE
44
+ end
45
+
46
+ def regex
47
+ self.class::REGEX
48
+ end
49
+
50
+ def native_fun
51
+ self.class::NATIVE_FUN
52
+ end
53
+
54
+ def to_native
55
+ native_fun == :to_s ? to_s(false) : send(native_fun)
56
+ end
57
+
58
+ def to_s(with_type = true)
40
59
  "#{"#{code}:" if with_type}#{value}"
41
60
  end
42
-
61
+
43
62
  def hash
44
63
  value.hash
45
64
  end
46
65
 
66
+ ##
67
+ # Evaluate equivalency of contents. All the following fields are distinct but
68
+ # contain the same information, and are therefore considered equivalent:
69
+ # Z:123, i:123, f:123.0, B:i,123, H:7b
70
+ #
71
+ # Note that the information content is determined by the class of the first
72
+ # operator. For example:
73
+ # - 'i:123' ~ 'f:123.4' is true because values are compared as integers
74
+ # - 'f:123.4' ~ 'i:123' if false because values are compared as floats
75
+ def equivalent?(field)
76
+ return true if eql?(field) # Might be faster, so testing this first
77
+
78
+ if field.respond_to?(native_fun)
79
+ if field.is_a?(GFA::Field) && native_fun == :to_s
80
+ field.to_s(false) == to_native
81
+ else
82
+ field.send(native_fun) == to_native
83
+ end
84
+ else
85
+ field == value
86
+ end
87
+ end
88
+
89
+ ##
90
+ # Non-equivalent to +field+, same as +!equivalent?+
91
+ def !~(field)
92
+ !self.~(field)
93
+ end
94
+
95
+ ##
96
+ # Same as +equivalent?+
97
+ def ~(field)
98
+ equivalent?(field)
99
+ end
100
+
101
+ ##
102
+ # Evaluate equality. Note that fields with equivalent values evaluate as
103
+ # different. For example, the following fields have equivalent information,
104
+ # but they all evaluate as different: Z:123, i:123, f:123.0, B:i,123, H:7b.
105
+ # To test equivalency of contents instead, use +equivalent?+
106
+ def eql?(field)
107
+ if field.is_a?(GFA::Field)
108
+ type == field.type && value == field.value
109
+ else
110
+ field.is_a?(value.class) && value == field
111
+ end
112
+ end
113
+
114
+ ##
115
+ # Same as +eql?+
116
+ def ==(field)
117
+ eql?(field)
118
+ end
47
119
  end
data/lib/gfa/graph.rb CHANGED
@@ -2,7 +2,6 @@ require 'rgl/adjacency'
2
2
  require 'rgl/implicit'
3
3
 
4
4
  class GFA
5
-
6
5
  ##
7
6
  # Generates a RGL::ImplicitGraph object describing the links in the GFA.
8
7
  # The +opts+ argument is a hash with any of the following key-value pairs:
@@ -57,7 +56,7 @@ class GFA
57
56
  opts
58
57
  end
59
58
 
60
- def rgl_implicit_adjacent_iterator(x,b,opts)
59
+ def rgl_implicit_adjacent_iterator(x, b, opts)
61
60
  links.each do |l|
62
61
  if l.from?(x.segment, x.orient)
63
62
  orient = opts[:orient] ? l.to_orient : nil
data/lib/gfa/parser.rb CHANGED
@@ -4,15 +4,20 @@ class GFA
4
4
  # Class-level
5
5
  MIN_VERSION = '1.0'
6
6
  MAX_VERSION = '1.2'
7
-
8
- def self.load(file)
9
- gfa = GFA.new
7
+
8
+ ##
9
+ # Load a GFA object from a +gfa+ file with options +opts+:
10
+ # - index: If the records should be indexed as loaded (default: true)
11
+ # - comments: If the comment records should be saved (default: false)
12
+ def self.load(file, opts = {})
13
+ gfa = GFA.new(opts)
10
14
  fh = File.open(file, 'r')
11
15
  fh.each { |ln| gfa << ln }
12
- fh.close
13
16
  gfa
17
+ ensure
18
+ fh&.close
14
19
  end
15
-
20
+
16
21
  def self.supported_version?(v)
17
22
  v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
18
23
  end
@@ -23,24 +28,27 @@ class GFA
23
28
  return if obj.nil? || obj.empty?
24
29
  @records[obj.type] << obj
25
30
 
26
- if obj.type == :Header && !obj.fields[:VN].nil?
27
- set_gfa_version(obj.fields[:VN].value)
31
+ if obj.type == :Header && !obj.VN.nil?
32
+ set_gfa_version(obj.VN.value)
28
33
  end
29
34
  end
30
35
 
31
36
  def set_gfa_version(v)
32
- @gfa_version = v
33
- unless GFA::supported_version? gfa_version
34
- raise "GFA version currently unsupported: #{v}."
37
+ v = v.value if v.is_a? GFA::Field
38
+ unless GFA::supported_version? v
39
+ raise "GFA version currently unsupported: #{v}"
35
40
  end
41
+
42
+ @gfa_version = v
36
43
  end
37
-
44
+
38
45
  private
39
-
40
- def parse_line(ln)
41
- ln.chomp!
42
- return nil if ln =~ /^\s*$/
43
- cols = ln.split("\t")
44
- GFA::Record.code_class(cols.shift).new(*cols)
46
+
47
+ def parse_line(string)
48
+ string = string.chomp
49
+ return nil if string =~ /^\s*$/
50
+ return nil if !opts[:comments] && string[0] == '#'
51
+
52
+ GFA::Record[string]
45
53
  end
46
54
  end
@@ -1,10 +1,15 @@
1
1
  class GFA::Record::Comment < GFA::Record
2
2
  CODE = :'#'
3
- REQ_FIELDS = []
3
+ REQ_FIELDS = %i[comment]
4
4
  OPT_FIELDS = {}
5
+
6
+ REQ_FIELDS.each_index do |i|
7
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
+ end
5
9
 
6
- def initialize(*opt_fields)
10
+ def initialize(comment, *opt_fields)
7
11
  @fields = {}
12
+ add_field(2, :Z, comment, /.*/)
8
13
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
9
14
  end
10
15
  end
@@ -10,20 +10,21 @@ class GFA::Record::Containment < GFA::Record
10
10
  REQ_FIELDS.each_index do |i|
11
11
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
12
12
  end
13
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
13
14
 
14
15
  alias container from
15
16
  alias container_orient from_orient
16
17
  alias contained to
17
18
  alias contained_orient to_orient
18
-
19
+
19
20
  def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
20
21
  @fields = {}
21
- add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
22
- add_field(3, :Z, from_orient, /^+|-$/)
23
- add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
24
- add_field(5, :Z, to_orient, /^+|-$/)
25
- add_field(6, :i, pos, /^[0-9]*$/)
26
- add_field(7, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
22
+ add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
23
+ add_field(3, :Z, from_orient, /[+-]/)
24
+ add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
25
+ add_field(5, :Z, to_orient, /[+-]/)
26
+ add_field(6, :i, pos, /[0-9]*/)
27
+ add_field(7, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
27
28
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
28
29
  end
29
30
  end
@@ -4,6 +4,8 @@ class GFA::Record::Header < GFA::Record
4
4
  OPT_FIELDS = {
5
5
  VN: :Z # Version number
6
6
  }
7
+
8
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
7
9
 
8
10
  def initialize(*opt_fields)
9
11
  @fields = {}
@@ -4,26 +4,26 @@ class GFA::Record::Jump < GFA::Record
4
4
  OPT_FIELDS = {
5
5
  SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
6
6
  }
7
-
7
+
8
8
  REQ_FIELDS.each_index do |i|
9
9
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
10
10
  end
11
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
11
12
 
12
13
  def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
13
14
  @fields = {}
14
- add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
15
- add_field(3, :Z, from_orient, /^+|-$/)
16
- add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
17
- add_field(5, :Z, to_orient, /^+|-$/)
18
- add_field(6, :Z, distance, /^\*|[-+]?[0-9]+$/)
15
+ add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
16
+ add_field(3, :Z, from_orient, /[+-]/)
17
+ add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
18
+ add_field(5, :Z, to_orient, /[+-]/)
19
+ add_field(6, :Z, distance, /\*|[-+]?[0-9]+/)
19
20
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
20
21
  end
21
22
 
22
-
23
23
  def from?(segment, orient = nil)
24
24
  links_from_to?(segment, orient, true)
25
25
  end
26
-
26
+
27
27
  def to?(segment, orient = nil)
28
28
  links_from_to?(segment, orient, false)
29
29
  end
@@ -9,25 +9,26 @@ class GFA::Record::Link < GFA::Record
9
9
  KC: :i, # k-mer count
10
10
  ID: :Z # Edge identifier
11
11
  }
12
-
12
+
13
13
  REQ_FIELDS.each_index do |i|
14
14
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
15
15
  end
16
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
16
17
 
17
18
  def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
18
19
  @fields = {}
19
- add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
20
- add_field(3, :Z, from_orient, /^+|-$/)
21
- add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
22
- add_field(5, :Z, to_orient, /^+|-$/)
23
- add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
20
+ add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
21
+ add_field(3, :Z, from_orient, /[+-]/)
22
+ add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
23
+ add_field(5, :Z, to_orient, /[+-]/)
24
+ add_field(6, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
24
25
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
25
26
  end
26
27
 
27
28
  def from?(segment, orient = nil)
28
29
  links_from_to?(segment, orient, true)
29
30
  end
30
-
31
+
31
32
  def to?(segment, orient = nil)
32
33
  links_from_to?(segment, orient, false)
33
34
  end
@@ -1,19 +1,19 @@
1
1
  class GFA::Record::Path < GFA::Record
2
2
  CODE = :P
3
- REQ_FIELDS = %i[path_name segment_name cigar]
3
+ REQ_FIELDS = %i[path_name segment_name overlaps]
4
4
  OPT_FIELDS = {}
5
5
 
6
6
  REQ_FIELDS.each_index do |i|
7
7
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
8
  end
9
9
 
10
- alias overlaps cigar
10
+ alias cigar overlaps
11
11
 
12
- def initialize(path_name, segment_name, cigar, *opt_fields)
12
+ def initialize(path_name, segment_name, overlaps, *opt_fields)
13
13
  @fields = {}
14
- add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
15
- add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
16
- add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
14
+ add_field(2, :Z, path_name, /[!-)+-<>-~][!-~]*/)
15
+ add_field(3, :Z, segment_name, /[!-)+-<>-~][!-~]*/)
16
+ add_field(4, :Z, overlaps, /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/)
17
17
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
18
18
  end
19
19
  end
@@ -15,11 +15,12 @@ class GFA::Record::Segment < GFA::Record
15
15
  REQ_FIELDS.each_index do |i|
16
16
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
17
17
  end
18
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
18
19
 
19
20
  def initialize(name, sequence, *opt_fields)
20
21
  @fields = {}
21
- add_field(2, :Z, name, /^[!-)+-<>-~][!-~]*$/)
22
- add_field(3, :Z, sequence, /^\*|[A-Za-z=.]+$/)
22
+ add_field(2, :Z, name, /[!-)+-<>-~][!-~]*/)
23
+ add_field(3, :Z, sequence, /\*|[A-Za-z=.]+/)
23
24
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
24
25
  end
25
26
  end
@@ -9,12 +9,12 @@ class GFA::Record::Walk < GFA::Record
9
9
 
10
10
  def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
11
11
  @fields = {}
12
- add_field(2, :Z, sample_id, /^[!-)+-<>-~][!-~]*$/)
13
- add_field(3, :i, hap_index, /^[0-9]+$/)
14
- add_field(4, :Z, seq_id, /^[!-)+-<>-~][!-~]*$/)
15
- add_field(5, :i, seq_start, /^\*|[0-9]+$/)
16
- add_field(6, :i, seq_end, /^\*|[0-9]+$/)
17
- add_field(7, :Z, walk, /^([><][!-;=?-~]+)+$/)
12
+ add_field(2, :Z, sample_id, /[!-)+-<>-~][!-~]*/)
13
+ add_field(3, :i, hap_index, /[0-9]+/)
14
+ add_field(4, :Z, seq_id, /[!-)+-<>-~][!-~]*/)
15
+ add_field(5, :i, seq_start, /\*|[0-9]+/)
16
+ add_field(6, :i, seq_end, /\*|[0-9]+/)
17
+ add_field(7, :Z, walk, /([><][!-;=?-~]+)+/)
18
18
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
19
19
  end
20
20
  end
data/lib/gfa/record.rb CHANGED
@@ -15,7 +15,7 @@ class GFA::Record
15
15
  TYPES = CODES.values
16
16
  TYPES.each { |t| require "gfa/record/#{t.downcase}" }
17
17
 
18
- [:CODES, :REQ_FIELDS, :OPT_FIELDS, :TYPES].each do |x|
18
+ %i[CODES REQ_FIELDS OPT_FIELDS TYPES].each do |x|
19
19
  define_singleton_method(x) { const_get(x) }
20
20
  end
21
21
 
@@ -29,18 +29,32 @@ class GFA::Record
29
29
  const_get(name)
30
30
  end
31
31
 
32
+ def self.[](string)
33
+ split = string[0] == '#' ? ['', 2] : ["\t", 0]
34
+ code, *values = string.chomp.split(*split)
35
+ code_class(code).new(*values)
36
+ end
37
+
32
38
  # Instance-level
33
39
 
34
40
  attr :fields
35
41
 
36
- def [](k) fields[k] ; end
37
-
38
- def type ; CODES[code] ; end
39
-
40
- def code ; self.class.const_get(:CODE) ; end
41
-
42
- def empty? ; fields.empty? ; end
43
-
42
+ def [](k)
43
+ fields[k]
44
+ end
45
+
46
+ def type
47
+ CODES[code]
48
+ end
49
+
50
+ def code
51
+ self.class.const_get(:CODE)
52
+ end
53
+
54
+ def empty?
55
+ fields.empty?
56
+ end
57
+
44
58
  def to_s
45
59
  o = [code.to_s]
46
60
  self.class.REQ_FIELDS.each_index do |i|
@@ -52,7 +66,7 @@ class GFA::Record
52
66
  end
53
67
  o.join("\t")
54
68
  end
55
-
69
+
56
70
  def hash
57
71
  { code => fields }.hash
58
72
  end
@@ -64,19 +78,19 @@ class GFA::Record
64
78
  alias == eql?
65
79
 
66
80
  private
67
-
81
+
68
82
  def add_field(f_tag, f_type, f_value, format = nil)
69
83
  unless format.nil?
70
84
  msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
71
85
  GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
72
86
  end
73
87
 
74
- @fields[ f_tag ] = GFA::Field.code_class(f_type).new(f_value)
88
+ @fields[f_tag] = GFA::Field.code_class(f_type).new(f_value)
75
89
  end
76
-
90
+
77
91
  def add_opt_field(f, known)
78
92
  m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
79
- raise "Cannot parse field: '#{f}'." unless m
93
+ raise "Cannot parse field: '#{f}'" unless m
80
94
 
81
95
  f_tag = m[1].to_sym
82
96
  f_type = m[2].to_sym
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::CommentSet < GFA::RecordSet
2
+ CODE = :'#'
3
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::ContainmentSet < GFA::RecordSet
2
+ CODE = :C
3
+ INDEX_FIELD = 2 # Container: Name of container segment
4
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::HeaderSet < GFA::RecordSet
2
+ CODE = :H
3
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::JumpSet < GFA::RecordSet
2
+ CODE = :J
3
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::LinkSet < GFA::RecordSet
2
+ CODE = :L
3
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::PathSet < GFA::RecordSet
2
+ CODE = :P
3
+ INDEX_FIELD = 2 # PathName: Path name
4
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::SegmentSet < GFA::RecordSet
2
+ CODE = :S
3
+ INDEX_FIELD = 2 # Name: Segment name
4
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::WalkSet < GFA::RecordSet
2
+ CODE = :W
3
+ end
@@ -0,0 +1,99 @@
1
+
2
+ require 'gfa/record'
3
+
4
+ class GFA::RecordSet
5
+ INDEX_FIELD = nil
6
+ TYPES = GFA::Record.TYPES.map { |i| :"#{i}Set" }
7
+ GFA::Record.TYPES.each { |t| require "gfa/record_set/#{t.downcase}_set" }
8
+
9
+ %i[TYPES].each do |x|
10
+ define_singleton_method(x) { const_get(x) }
11
+ end
12
+
13
+ def self.code_class(code)
14
+ name = GFA::Record.CODES[code.to_sym]
15
+ raise "Unknown record type: #{code}." if name.nil?
16
+ name_class(name)
17
+ end
18
+
19
+ def self.name_class(name)
20
+ name = "#{name}Set" unless name =~ /Set$/
21
+ const_get(name)
22
+ end
23
+
24
+ # Instance-level
25
+
26
+ attr_reader :set, :gfa
27
+
28
+ def initialize(gfa)
29
+ @set = []
30
+ @index = {}
31
+ @gfa = gfa
32
+ end
33
+
34
+ def [](k)
35
+ return set[k] if k.is_a?(Integer)
36
+ find_index(k)
37
+ end
38
+
39
+ def type
40
+ GFA::Record.CODES[code]
41
+ end
42
+
43
+ def code
44
+ self.class.const_get(:CODE)
45
+ end
46
+
47
+ def index_field
48
+ self.class.const_get(:INDEX_FIELD)
49
+ end
50
+
51
+ %i[empty? hash size count length first last].each do |i|
52
+ define_method(i) { set.send(i) }
53
+ end
54
+
55
+ def to_s
56
+ set.map(&:to_s).join("\n")
57
+ end
58
+
59
+ def eql?(rec)
60
+ hash == rec.hash
61
+ end
62
+
63
+ alias == eql?
64
+
65
+ def <<(v)
66
+ v = v.split("\t") if v.is_a? String
67
+ v = GFA::Record.code_class(code).new(*v) if v.is_a? Array
68
+ raise "Not a GFA Record: #{v}" unless v.is_a? GFA::Record
69
+ raise "Wrong type of record: #{v.type}" if v.type != type
70
+
71
+ @set << v
72
+ index(v)
73
+ end
74
+
75
+ def index_id(v)
76
+ v[index_field]&.value
77
+ end
78
+
79
+ def index(v)
80
+ save_index(index_id(v), v) if index_field
81
+
82
+ # Whenever present, index also by ID
83
+ save_index(v[:ID].value, v) if v[:ID] && v[:ID].value =~ index_id(v)
84
+ end
85
+
86
+ def save_index(k, v)
87
+ return unless gfa.opts[:index] && k
88
+
89
+ if @index[k]
90
+ warn "#{type} already registered with field #{index_field}: #{k}"
91
+ end
92
+ @index[k] = v
93
+ end
94
+
95
+ def find_index(k)
96
+ k = k.value if k.is_a? GFA::Field
97
+ @index[k]
98
+ end
99
+ end
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.1'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
data/test/common_test.rb CHANGED
@@ -4,10 +4,10 @@ class CommonTest < Test::Unit::TestCase
4
4
 
5
5
  def test_assert_format
6
6
  assert_raise do
7
- GFA.assert_format("tsooq", /^.$/, "Not a char")
7
+ GFA.assert_format('tsooq', /^.$/, 'Not a char')
8
8
  end
9
9
  assert_nothing_raised do
10
- GFA.assert_format("z", /^.$/, "Not a char")
10
+ GFA.assert_format('z', /^.$/, 'Not a char')
11
11
  end
12
12
  end
13
13
 
@@ -20,15 +20,15 @@ class CommonTest < Test::Unit::TestCase
20
20
  def test_record_getters
21
21
  gfa = GFA.new
22
22
  assert_respond_to(gfa, :headers)
23
- assert_equal([], gfa.links)
23
+ assert_equal([], gfa.links.set)
24
24
  assert_nil( gfa.segment(0) )
25
25
  end
26
26
 
27
27
  def test_record_setters
28
28
  gfa = GFA.new
29
29
  assert_respond_to(gfa, :add_path)
30
- gfa.add_containment("zooq")
31
- assert_equal("zooq", gfa.records[:Containment].first)
30
+ gfa.add_containment("zooq\t+\ttsuk\t-\t1\t*")
31
+ assert_equal('zooq', gfa.records[:Containment].first.from.value)
32
32
  end
33
33
 
34
34
  end
data/test/field_test.rb CHANGED
@@ -3,48 +3,74 @@ require "test_helper"
3
3
  class FieldTest < Test::Unit::TestCase
4
4
 
5
5
  def test_char
6
- f = GFA::Field::Char.new("%")
7
- assert_equal("%", f.value)
8
- assert_raise do
9
- GFA::Field::Char.new(" ")
10
- end
11
- assert_raise do
12
- GFA::Field::Char.new("")
13
- end
14
- assert_raise do
15
- GFA::Field::Char.new("^.^")
16
- end
6
+ f = GFA::Field::Char.new('%')
7
+ assert_equal('%', f.value)
8
+ assert_raise { GFA::Field::Char.new(' ') }
9
+ assert_raise { GFA::Field::Char.new('') }
10
+ assert_raise { GFA::Field::Char.new('^.^') }
17
11
  end
18
12
 
19
13
  def test_sigint
20
14
  end
21
15
 
22
16
  def test_float
23
- f = GFA::Field::Float.new("1.3e-5")
17
+ f = GFA::Field::Float.new('1.3e-5')
24
18
  assert_equal(1.3e-5, f.value)
25
- assert_raise do
26
- GFA::Field::Float.new("e-5")
27
- end
19
+ assert_raise { GFA::Field::Float.new('e-5') }
28
20
  end
29
21
 
30
22
  def test_string
31
23
  end
32
24
 
33
25
  def test_hex
34
- f = GFA::Field::Hex.new("C3F0")
35
- assert_equal("C3F0", f.value)
36
- assert_raise do
37
- GFA::Field::Hex.new("C3PO")
38
- end
26
+ f = GFA::Field::Hex.new('C3F0')
27
+ assert_equal('C3F0', f.value)
28
+ assert_raise { GFA::Field::Hex.new('C3PO') }
39
29
  end
40
30
 
41
31
  def test_numarray
42
- f = GFA::Field::NumArray.new("i,1,2,3")
43
- assert_equal(%w[1 2 3], f.array)
44
- assert_equal("i", f.modifier)
45
- assert_raise do
46
- GFA::Field::NumArray.new("c,1,e,3")
47
- end
32
+ f = GFA::Field::NumArray.new('i,1,2,3')
33
+ assert_equal([1, 2, 3], f.array)
34
+ assert_equal('i', f.modifier)
35
+ assert_raise { GFA::Field::NumArray.new('c,1,e,3') }
48
36
  end
49
37
 
38
+ def test_equal
39
+ f = GFA::Field::SigInt.new('123')
40
+ j = GFA::Field::String.new('123')
41
+ k = GFA::Field::Float.new('123')
42
+ assert(f == 123)
43
+ assert(123 == f)
44
+ assert(f != 123.0)
45
+ assert(f != '123')
46
+ assert(f.eql?(123))
47
+ assert(f != j)
48
+ assert(f != k)
49
+ assert(f != k.value)
50
+ assert(f.value == k.value)
51
+ end
52
+
53
+ def test_equivalent
54
+ # String comparisons
55
+ assert(GFA::Field['Z:a'].~ GFA::Field['A:a'])
56
+ assert(GFA::Field['Z:ab'] !~ GFA::Field['A:a'])
57
+ assert(GFA::Field['Z:{"a":1}'].~ GFA::Field['J:{"a":1}'])
58
+ assert(GFA::Field['J:{"a":1}'].~ GFA::Field['Z:{"a":1}'])
59
+
60
+ # Numeric comparisons
61
+ assert(GFA::Field['Z:123'].~ GFA::Field['i:123'])
62
+ assert(GFA::Field['Z:123'].~ GFA::Field['i:123'])
63
+ assert(GFA::Field['i:123'].~ GFA::Field['f:123'])
64
+ assert(GFA::Field['f:123'].~ GFA::Field['B:i,123'])
65
+ assert(GFA::Field['B:i,123'].~ GFA::Field['H:7B'])
66
+ assert(GFA::Field['H:7B'].~ GFA::Field['f:123.0'])
67
+ assert(GFA::Field['Z:123'] !~ GFA::Field['H:7B']) # In hex-space!
68
+ assert(GFA::Field['f:1e3'].~ GFA::Field['f:1000'])
69
+ assert(GFA::Field['f:1e3'].~ 1e3)
70
+ assert(GFA::Field['B:i,123,456'].~ [123, 456.0])
71
+
72
+ # Non-commutative
73
+ assert(GFA::Field['i:123'].~ GFA::Field['f:123.4'])
74
+ assert(GFA::Field['f:123.4'] !~ GFA::Field['i:123'])
75
+ end
50
76
  end
data/test/parser_test.rb CHANGED
@@ -2,20 +2,55 @@ require "test_helper"
2
2
  require "gfa/parser"
3
3
 
4
4
  class ParserTest < Test::Unit::TestCase
5
-
5
+
6
6
  def test_load
7
- sample_f = File.expand_path('../fixtures/sample.gfa', __FILE__)
8
7
  assert_respond_to(GFA, :load)
9
- pre_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
10
- sample = GFA.load(sample_f)
11
- post_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
8
+
9
+ # Can load files and close pointers properly
10
+ pre_fhs = ObjectSpace.each_object(IO).count { |i| not i.closed? }
11
+ assert_nothing_raised do
12
+ GFA.load(fixture_path('sample1.gfa'))
13
+ end
14
+ assert_nothing_raised do
15
+ GFA.load(fixture_path('sample2.gfa'))
16
+ end
17
+ assert_nothing_raised do
18
+ GFA.load(fixture_path('sample3.gfa'))
19
+ end
20
+ assert_raise do
21
+ GFA.load(fixture_path('sample4.gfa'))
22
+ end
23
+ post_fhs = ObjectSpace.each_object(IO).count { |i| not i.closed? }
12
24
  assert_equal(pre_fhs, post_fhs)
13
- assert_equal(1, sample.headers.size)
14
- assert_equal(6, sample.segments.size)
15
- assert_equal(4, sample.links.size)
16
- assert(sample.containments.empty?)
17
- assert(sample.paths.empty?)
18
- assert_respond_to(sample, :records)
25
+ end
26
+
27
+ def test_records
28
+ # Samples are properly parsed
29
+ sample1 = GFA.load(fixture_path('sample1.gfa'))
30
+ assert_equal(1, sample1.headers.size)
31
+ assert_equal(6, sample1.segments.size)
32
+ assert_equal(4, sample1.links.size)
33
+ assert(sample1.containments.empty?)
34
+ assert(sample1.paths.empty?)
35
+ assert_respond_to(sample1, :records)
36
+ end
37
+
38
+ def test_comments
39
+ path = fixture_path('sample2.gfa')
40
+ sample = GFA.load(path)
41
+ assert(sample.comments.empty?)
42
+ sample = GFA.load(path, comments: true)
43
+ assert(!sample.comments.empty?)
44
+ end
45
+
46
+ def test_index
47
+ path = fixture_path('sample3.gfa')
48
+ sample = GFA.load(path)
49
+ assert(sample.path('first').is_a?(GFA::Record))
50
+ assert(sample.paths['first'].is_a?(GFA::Record))
51
+ assert_equal('first', sample.path('first')[2]&.value)
52
+ sample = GFA.load(path, index: false)
53
+ assert_nil(sample.path('first'))
19
54
  end
20
55
 
21
56
  def test_version_suppport
@@ -28,20 +63,22 @@ class ParserTest < Test::Unit::TestCase
28
63
  def test_line_by_line
29
64
  gfa = GFA.new
30
65
  assert_respond_to(gfa, :<<)
66
+
31
67
  # Empty
32
68
  gfa << ' '
33
69
  assert(gfa.empty?)
34
70
  gfa << 'H'
35
71
  assert(gfa.empty?)
72
+
36
73
  # Segment
37
74
  assert_equal(0, gfa.segments.size)
38
- gfa << "S\t1\tACTG"
75
+ gfa << "S\t1\tACTG\n"
39
76
  assert(!gfa.empty?)
40
77
  assert_equal(1, gfa.segments.size)
78
+
41
79
  # Version
42
80
  assert_nil(gfa.gfa_version)
43
81
  gfa << GFA::Record::Header.new('VN:Z:1.0')
44
82
  assert_equal('1.0', gfa.gfa_version)
45
83
  end
46
-
47
84
  end
data/test/record_test.rb CHANGED
@@ -17,6 +17,13 @@ class RecordTest < Test::Unit::TestCase
17
17
  assert_equal("P\ta\tb\t*", $rec_p.to_s)
18
18
  end
19
19
 
20
+ def test_init_by_string
21
+ p = GFA::Record["P\ta\tb\t*"]
22
+ assert_equal('a', p.path_name&.value)
23
+ c = GFA::Record["# doink!\n"]
24
+ assert_equal(' doink!', c.comment&.value)
25
+ end
26
+
20
27
  def test_hash
21
28
  other_h = GFA::Record::Header.new("VN:Z:1.0")
22
29
  assert_equal($rec_h.hash, other_h.hash)
data/test/test_helper.rb CHANGED
@@ -4,3 +4,8 @@ SimpleCov.start
4
4
  require 'rubygems'
5
5
  require 'test/unit'
6
6
  require 'gfa/common'
7
+
8
+ def fixture_path(file)
9
+ File.expand_path("../fixtures/#{file}", __FILE__)
10
+ end
11
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-22 00:00:00.000000000 Z
11
+ date: 2023-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl
@@ -85,6 +85,15 @@ files:
85
85
  - lib/gfa/record/path.rb
86
86
  - lib/gfa/record/segment.rb
87
87
  - lib/gfa/record/walk.rb
88
+ - lib/gfa/record_set.rb
89
+ - lib/gfa/record_set/comment_set.rb
90
+ - lib/gfa/record_set/containment_set.rb
91
+ - lib/gfa/record_set/header_set.rb
92
+ - lib/gfa/record_set/jump_set.rb
93
+ - lib/gfa/record_set/link_set.rb
94
+ - lib/gfa/record_set/path_set.rb
95
+ - lib/gfa/record_set/segment_set.rb
96
+ - lib/gfa/record_set/walk_set.rb
88
97
  - lib/gfa/version.rb
89
98
  - test/common_test.rb
90
99
  - test/field_test.rb
@@ -94,7 +103,7 @@ files:
94
103
  homepage: https://github.com/lmrodriguezr/gfa
95
104
  licenses: []
96
105
  metadata: {}
97
- post_install_message:
106
+ post_install_message:
98
107
  rdoc_options:
99
108
  - lib
100
109
  - README.md
@@ -115,8 +124,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
115
124
  - !ruby/object:Gem::Version
116
125
  version: '0'
117
126
  requirements: []
118
- rubygems_version: 3.2.3
119
- signing_key:
127
+ rubygems_version: 3.1.6
128
+ signing_key:
120
129
  specification_version: 4
121
130
  summary: Graphical Fragment Assembly (GFA) for Ruby
122
131
  test_files: []