gfa 0.2.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cde4a3e432409c7a2967832dcebd502ddc54b1f6cb25856b6d9d21ce53f67b32
4
- data.tar.gz: 91213d63365dd3608c28e30115cbfc8621e78bbe34936832bcee2ac7e6f460fb
3
+ metadata.gz: 0e8e61ff97b34654b7a660b011826ad5549f66933a91a09658facf58c3fd56b1
4
+ data.tar.gz: 8f85f07955e71cd38a9dfa28011c70433473ad6a1137ed3e6e217d63eeba20a8
5
5
  SHA512:
6
- metadata.gz: 321634c28ec8927bd38286a84a02783b9f915dcbabb7941233583dda7f41b66e952ff9611c9158bd7baca09d7d3d6c254a036f1c9f2169e5e24e6e964d292e71
7
- data.tar.gz: 3698d16ab5953ffd70bf2c102d154bc1f61e5a13a752bc317a756df13762c3668c1bf6e8144821e53e395783f36c45185249c7fc206694be19145200310c3f48
6
+ metadata.gz: 5b9f8fd92cd30d9e4e5c0263e938169141749c7be43011b574f937c9645608d8e72189bfe9072d7321e3acdf7e8288cecf42c90abe3ceef2bf001780bdb3e472
7
+ data.tar.gz: ee33c0b9c0dc9adb2df96d95b792b060b248e2f991ed5a0e4b4c136d66f04b9be6cf5ca58462bce046c6cfd9e7fed6c4c6949cb5e8923eb71e3d5f53f0da2703
data/README.md CHANGED
@@ -58,12 +58,12 @@ Any `GFA` object can be exported as an [`RGL`][rgl] graph using the methods
58
58
  [tiny.gfa](https://github.com/lmrodriguezr/gfa/raw/master/data/tiny.gfa):
59
59
 
60
60
  ```ruby
61
- require "gfa"
62
- require "rgl/dot"
61
+ require 'gfa'
62
+ require 'rgl/dot'
63
63
 
64
- my_gfa = GFA.load("data/tiny.gfa")
64
+ my_gfa = GFA.load('data/tiny.gfa')
65
65
  dg = my_gfa.implicit_graph
66
- dg.write_to_graphic_file("jpg")
66
+ dg.write_to_graphic_file('jpg')
67
67
  ```
68
68
 
69
69
  ![tiny_dg](https://github.com/lmrodriguezr/gfa/raw/master/data/tiny.jpg)
@@ -72,8 +72,8 @@ If you don't care about orientation, you can also build an undirected graph
72
72
  without orientation:
73
73
 
74
74
  ```ruby
75
- ug = my_gfa.implicit_graph(orient:false)
76
- ug.write_to_graphic_file("jpg")
75
+ ug = my_gfa.implicit_graph(orient: false)
76
+ ug.write_to_graphic_file('jpg')
77
77
  ```
78
78
 
79
79
  ![tiny_ug](https://github.com/lmrodriguezr/gfa/raw/master/data/tiny_undirected.jpg)
@@ -88,7 +88,7 @@ gem install gfa
88
88
  Or add the following line to your Gemfile:
89
89
 
90
90
  ```ruby
91
- gem "gfa"
91
+ gem 'gfa'
92
92
  ```
93
93
 
94
94
  and run `bundle install` from your shell.
data/lib/gfa/common.rb CHANGED
@@ -1,17 +1,17 @@
1
1
  require 'gfa/version'
2
- require 'gfa/record'
2
+ require 'gfa/record_set'
3
3
  require 'gfa/field'
4
4
 
5
5
  class GFA
6
6
  # Class-level
7
7
  def self.assert_format(value, regex, message)
8
- unless value =~ regex
9
- raise "#{message}: #{value}."
8
+ unless value =~ /^(?:#{regex})$/
9
+ raise "#{message}: #{value}"
10
10
  end
11
11
  end
12
12
 
13
13
  # Instance-level
14
- attr :gfa_version, :records
14
+ attr :gfa_version, :records, :opts
15
15
 
16
16
  GFA::Record.TYPES.each do |r_type|
17
17
  plural = "#{r_type.downcase}s"
@@ -22,9 +22,12 @@ class GFA
22
22
  define_method("add_#{singular}") { |v| @records[r_type] << v }
23
23
  end
24
24
 
25
- def initialize
25
+ def initialize(opts = {})
26
26
  @records = {}
27
- GFA::Record.TYPES.each { |t| @records[t] = [] }
27
+ @opts = { index: true, comments: false }.merge(opts)
28
+ GFA::Record.TYPES.each do |t|
29
+ @records[t] = GFA::RecordSet.name_class(t).new(self)
30
+ end
28
31
  end
29
32
 
30
33
  def empty?
@@ -1,6 +1,7 @@
1
1
  class GFA::Field::Char < GFA::Field
2
2
  CODE = :A
3
- REGEX = /^[!-~]$/
3
+ REGEX = /[!-~]/
4
+ NATIVE_FUN = :to_s
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
@@ -1,9 +1,26 @@
1
1
  class GFA::Field::Float < GFA::Field
2
2
  CODE = :f
3
- REGEX = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/
3
+ REGEX = /[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?/
4
+ NATIVE_FUN = :to_f
5
+
6
+ def to_f
7
+ value
8
+ end
9
+
10
+ def to_i
11
+ value.to_i
12
+ end
4
13
 
5
14
  def initialize(f)
6
15
  GFA.assert_format(f, regex, "Bad #{type}")
7
16
  @value = f.to_f
8
17
  end
18
+
19
+ def equivalent?(field)
20
+ if field.is_a?(GFA::Field::NumArray)
21
+ return field.size == 1 && field.first.to_f == value
22
+ end
23
+
24
+ super
25
+ end
9
26
  end
data/lib/gfa/field/hex.rb CHANGED
@@ -1,9 +1,26 @@
1
1
  class GFA::Field::Hex < GFA::Field
2
2
  CODE = :H
3
- REGEX = /^[0-9A-F]+$/
3
+ REGEX = /[0-9A-F]+/
4
+ NATIVE_FUN = :to_i
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f
8
9
  end
10
+
11
+ def to_i
12
+ value.to_i(16)
13
+ end
14
+
15
+ def to_f
16
+ to_i.to_f
17
+ end
18
+
19
+ def equivalent?(field)
20
+ if field.is_a? GFA::Field::NumArray
21
+ return field.size == 1 && field.first.to_i == value
22
+ end
23
+
24
+ super
25
+ end
9
26
  end
@@ -1,9 +1,18 @@
1
1
  class GFA::Field::Json < GFA::Field
2
2
  CODE = :J
3
- REGEX = /^[ !-~]+$/
3
+ REGEX = /[ !-~]+/
4
+ NATIVE_FUN = :to_s
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f
8
9
  end
10
+
11
+ def equivalent?(field)
12
+ # TODO
13
+ # We should parse the contents when comparing two GFA::Field::Json to
14
+ # evaluate equivalencies such as 'J:{ "a" : 1 }' ~ 'J:{"a":1}' (spaces)
15
+ # or 'J:{"a":1,"b":2}' ~ 'J:{"b":2,"a":1}' (element order)
16
+ super
17
+ end
9
18
  end
@@ -1,17 +1,30 @@
1
1
  class GFA::Field::NumArray < GFA::Field
2
2
  CODE = :B
3
- REGEX = /^[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+$/
3
+ REGEX = /[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+/
4
+ NATIVE_FUN = :to_a
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f
8
9
  end
9
10
 
10
- def modifier ; value[0] ; end
11
+ def modifier
12
+ value[0]
13
+ end
11
14
 
12
- def array ; value[2..-1].split(/,/) ; end
15
+ def modifier_fun
16
+ modifier == 'f' ? :to_f : :to_i
17
+ end
13
18
 
14
- alias as_a array
19
+ def array
20
+ @array ||= value[2..-1].split(',').map(&modifier_fun)
21
+ end
22
+
23
+ alias to_a array
24
+
25
+ %i[empty? size count length first last].each do |i|
26
+ define_method(i) { array.send(i) }
27
+ end
15
28
 
16
29
  def number_type
17
30
  {
@@ -21,4 +34,16 @@ class GFA::Field::NumArray < GFA::Field
21
34
  f: 'float'
22
35
  }[modifier.to_sym]
23
36
  end
37
+
38
+ def equivalent?(field)
39
+ return true if eql?(field)
40
+
41
+ if field.respond_to?(:to_a)
42
+ field.to_a.map(&modifier_fun) == array
43
+ elsif size == 1 && field.respond_to?(modifier_fun)
44
+ field.send(modifier_fun) == first
45
+ else
46
+ false
47
+ end
48
+ end
24
49
  end
@@ -1,9 +1,22 @@
1
1
  class GFA::Field::SigInt < GFA::Field
2
2
  CODE = :i
3
- REGEX = /^[-+]?[0-9]+$/
3
+ REGEX = /[-+]?[0-9]+/
4
+ NATIVE_FUN = :to_i
4
5
 
5
6
  def initialize(f)
6
7
  GFA.assert_format(f, regex, "Bad #{type}")
7
8
  @value = f.to_i
8
9
  end
10
+
11
+ def to_i
12
+ value
13
+ end
14
+
15
+ def equivalent?(field)
16
+ if field.is_a?(GFA::Field::NumArray)
17
+ return field.size == 1 && field.first.to_i == value
18
+ end
19
+
20
+ super
21
+ end
9
22
  end
@@ -1,6 +1,15 @@
1
1
  class GFA::Field::String < GFA::Field
2
2
  CODE = :Z
3
- REGEX = /^[ !-~]+$/
3
+ REGEX = /[ !-~]+/
4
+ NATIVE_FUN = :to_s
5
+
6
+ def to_f
7
+ value.to_f
8
+ end
9
+
10
+ def to_i(base = 10)
11
+ value.to_i(base)
12
+ end
4
13
 
5
14
  def initialize(f)
6
15
  GFA.assert_format(f, regex, "Bad #{type}")
data/lib/gfa/field.rb CHANGED
@@ -12,7 +12,7 @@ class GFA::Field
12
12
  TYPES = CODES.values
13
13
  TYPES.each { |t| require "gfa/field/#{t.downcase}" }
14
14
 
15
- [:CODES, :TYPES].each do |x|
15
+ %i[CODES TYPES].each do |x|
16
16
  define_singleton_method(x) { const_get(x) }
17
17
  end
18
18
 
@@ -25,23 +25,95 @@ class GFA::Field
25
25
  def self.name_class(name)
26
26
  const_get(name)
27
27
  end
28
-
28
+
29
+ def self.[](string)
30
+ code, value = string.split(':', 2)
31
+ code_class(code).new(value)
32
+ end
33
+
29
34
  # Instance-level
30
35
 
31
36
  attr :value
32
37
 
33
- def type ; CODES[code] ; end
34
-
35
- def code ; self.class::CODE ; end
36
-
37
- def regex ; self.class::REGEX ; end
38
-
39
- def to_s(with_type=true)
38
+ def type
39
+ CODES[code]
40
+ end
41
+
42
+ def code
43
+ self.class::CODE
44
+ end
45
+
46
+ def regex
47
+ self.class::REGEX
48
+ end
49
+
50
+ def native_fun
51
+ self.class::NATIVE_FUN
52
+ end
53
+
54
+ def to_native
55
+ native_fun == :to_s ? to_s(false) : send(native_fun)
56
+ end
57
+
58
+ def to_s(with_type = true)
40
59
  "#{"#{code}:" if with_type}#{value}"
41
60
  end
42
-
61
+
43
62
  def hash
44
63
  value.hash
45
64
  end
46
65
 
66
+ ##
67
+ # Evaluate equivalency of contents. All the following fields are distinct but
68
+ # contain the same information, and are therefore considered equivalent:
69
+ # Z:123, i:123, f:123.0, B:i,123, H:7b
70
+ #
71
+ # Note that the information content is determined by the class of the first
72
+ # operator. For example:
73
+ # - 'i:123' ~ 'f:123.4' is true because values are compared as integers
74
+ # - 'f:123.4' ~ 'i:123' if false because values are compared as floats
75
+ def equivalent?(field)
76
+ return true if eql?(field) # Might be faster, so testing this first
77
+
78
+ if field.respond_to?(native_fun)
79
+ if field.is_a?(GFA::Field) && native_fun == :to_s
80
+ field.to_s(false) == to_native
81
+ else
82
+ field.send(native_fun) == to_native
83
+ end
84
+ else
85
+ field == value
86
+ end
87
+ end
88
+
89
+ ##
90
+ # Non-equivalent to +field+, same as +!equivalent?+
91
+ def !~(field)
92
+ !self.~(field)
93
+ end
94
+
95
+ ##
96
+ # Same as +equivalent?+
97
+ def ~(field)
98
+ equivalent?(field)
99
+ end
100
+
101
+ ##
102
+ # Evaluate equality. Note that fields with equivalent values evaluate as
103
+ # different. For example, the following fields have equivalent information,
104
+ # but they all evaluate as different: Z:123, i:123, f:123.0, B:i,123, H:7b.
105
+ # To test equivalency of contents instead, use +equivalent?+
106
+ def eql?(field)
107
+ if field.is_a?(GFA::Field)
108
+ type == field.type && value == field.value
109
+ else
110
+ field.is_a?(value.class) && value == field
111
+ end
112
+ end
113
+
114
+ ##
115
+ # Same as +eql?+
116
+ def ==(field)
117
+ eql?(field)
118
+ end
47
119
  end
data/lib/gfa/graph.rb CHANGED
@@ -2,7 +2,6 @@ require 'rgl/adjacency'
2
2
  require 'rgl/implicit'
3
3
 
4
4
  class GFA
5
-
6
5
  ##
7
6
  # Generates a RGL::ImplicitGraph object describing the links in the GFA.
8
7
  # The +opts+ argument is a hash with any of the following key-value pairs:
@@ -57,7 +56,7 @@ class GFA
57
56
  opts
58
57
  end
59
58
 
60
- def rgl_implicit_adjacent_iterator(x,b,opts)
59
+ def rgl_implicit_adjacent_iterator(x, b, opts)
61
60
  links.each do |l|
62
61
  if l.from?(x.segment, x.orient)
63
62
  orient = opts[:orient] ? l.to_orient : nil
data/lib/gfa/parser.rb CHANGED
@@ -4,15 +4,20 @@ class GFA
4
4
  # Class-level
5
5
  MIN_VERSION = '1.0'
6
6
  MAX_VERSION = '1.2'
7
-
8
- def self.load(file)
9
- gfa = GFA.new
7
+
8
+ ##
9
+ # Load a GFA object from a +gfa+ file with options +opts+:
10
+ # - index: If the records should be indexed as loaded (default: true)
11
+ # - comments: If the comment records should be saved (default: false)
12
+ def self.load(file, opts = {})
13
+ gfa = GFA.new(opts)
10
14
  fh = File.open(file, 'r')
11
15
  fh.each { |ln| gfa << ln }
12
- fh.close
13
16
  gfa
17
+ ensure
18
+ fh&.close
14
19
  end
15
-
20
+
16
21
  def self.supported_version?(v)
17
22
  v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
18
23
  end
@@ -23,24 +28,27 @@ class GFA
23
28
  return if obj.nil? || obj.empty?
24
29
  @records[obj.type] << obj
25
30
 
26
- if obj.type == :Header && !obj.fields[:VN].nil?
27
- set_gfa_version(obj.fields[:VN].value)
31
+ if obj.type == :Header && !obj.VN.nil?
32
+ set_gfa_version(obj.VN.value)
28
33
  end
29
34
  end
30
35
 
31
36
  def set_gfa_version(v)
32
- @gfa_version = v
33
- unless GFA::supported_version? gfa_version
34
- raise "GFA version currently unsupported: #{v}."
37
+ v = v.value if v.is_a? GFA::Field
38
+ unless GFA::supported_version? v
39
+ raise "GFA version currently unsupported: #{v}"
35
40
  end
41
+
42
+ @gfa_version = v
36
43
  end
37
-
44
+
38
45
  private
39
-
40
- def parse_line(ln)
41
- ln.chomp!
42
- return nil if ln =~ /^\s*$/
43
- cols = ln.split("\t")
44
- GFA::Record.code_class(cols.shift).new(*cols)
46
+
47
+ def parse_line(string)
48
+ string = string.chomp
49
+ return nil if string =~ /^\s*$/
50
+ return nil if !opts[:comments] && string[0] == '#'
51
+
52
+ GFA::Record[string]
45
53
  end
46
54
  end
@@ -1,10 +1,15 @@
1
1
  class GFA::Record::Comment < GFA::Record
2
2
  CODE = :'#'
3
- REQ_FIELDS = []
3
+ REQ_FIELDS = %i[comment]
4
4
  OPT_FIELDS = {}
5
+
6
+ REQ_FIELDS.each_index do |i|
7
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
+ end
5
9
 
6
- def initialize(*opt_fields)
10
+ def initialize(comment, *opt_fields)
7
11
  @fields = {}
12
+ add_field(2, :Z, comment, /.*/)
8
13
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
9
14
  end
10
15
  end
@@ -10,20 +10,21 @@ class GFA::Record::Containment < GFA::Record
10
10
  REQ_FIELDS.each_index do |i|
11
11
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
12
12
  end
13
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
13
14
 
14
15
  alias container from
15
16
  alias container_orient from_orient
16
17
  alias contained to
17
18
  alias contained_orient to_orient
18
-
19
+
19
20
  def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
20
21
  @fields = {}
21
- add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
22
- add_field(3, :Z, from_orient, /^+|-$/)
23
- add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
24
- add_field(5, :Z, to_orient, /^+|-$/)
25
- add_field(6, :i, pos, /^[0-9]*$/)
26
- add_field(7, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
22
+ add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
23
+ add_field(3, :Z, from_orient, /[+-]/)
24
+ add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
25
+ add_field(5, :Z, to_orient, /[+-]/)
26
+ add_field(6, :i, pos, /[0-9]*/)
27
+ add_field(7, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
27
28
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
28
29
  end
29
30
  end
@@ -4,6 +4,8 @@ class GFA::Record::Header < GFA::Record
4
4
  OPT_FIELDS = {
5
5
  VN: :Z # Version number
6
6
  }
7
+
8
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
7
9
 
8
10
  def initialize(*opt_fields)
9
11
  @fields = {}
@@ -4,26 +4,26 @@ class GFA::Record::Jump < GFA::Record
4
4
  OPT_FIELDS = {
5
5
  SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
6
6
  }
7
-
7
+
8
8
  REQ_FIELDS.each_index do |i|
9
9
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
10
10
  end
11
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
11
12
 
12
13
  def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
13
14
  @fields = {}
14
- add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
15
- add_field(3, :Z, from_orient, /^+|-$/)
16
- add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
17
- add_field(5, :Z, to_orient, /^+|-$/)
18
- add_field(6, :Z, distance, /^\*|[-+]?[0-9]+$/)
15
+ add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
16
+ add_field(3, :Z, from_orient, /[+-]/)
17
+ add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
18
+ add_field(5, :Z, to_orient, /[+-]/)
19
+ add_field(6, :Z, distance, /\*|[-+]?[0-9]+/)
19
20
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
20
21
  end
21
22
 
22
-
23
23
  def from?(segment, orient = nil)
24
24
  links_from_to?(segment, orient, true)
25
25
  end
26
-
26
+
27
27
  def to?(segment, orient = nil)
28
28
  links_from_to?(segment, orient, false)
29
29
  end
@@ -9,25 +9,26 @@ class GFA::Record::Link < GFA::Record
9
9
  KC: :i, # k-mer count
10
10
  ID: :Z # Edge identifier
11
11
  }
12
-
12
+
13
13
  REQ_FIELDS.each_index do |i|
14
14
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
15
15
  end
16
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
16
17
 
17
18
  def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
18
19
  @fields = {}
19
- add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
20
- add_field(3, :Z, from_orient, /^+|-$/)
21
- add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
22
- add_field(5, :Z, to_orient, /^+|-$/)
23
- add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
20
+ add_field(2, :Z, from, /[!-)+-<>-~][!-~]*/)
21
+ add_field(3, :Z, from_orient, /[+-]/)
22
+ add_field(4, :Z, to, /[!-)+-<>-~][!-~]*/)
23
+ add_field(5, :Z, to_orient, /[+-]/)
24
+ add_field(6, :Z, overlap, /\*|([0-9]+[MIDNSHPX=])+/)
24
25
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
25
26
  end
26
27
 
27
28
  def from?(segment, orient = nil)
28
29
  links_from_to?(segment, orient, true)
29
30
  end
30
-
31
+
31
32
  def to?(segment, orient = nil)
32
33
  links_from_to?(segment, orient, false)
33
34
  end
@@ -1,19 +1,19 @@
1
1
  class GFA::Record::Path < GFA::Record
2
2
  CODE = :P
3
- REQ_FIELDS = %i[path_name segment_name cigar]
3
+ REQ_FIELDS = %i[path_name segment_name overlaps]
4
4
  OPT_FIELDS = {}
5
5
 
6
6
  REQ_FIELDS.each_index do |i|
7
7
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
8
  end
9
9
 
10
- alias overlaps cigar
10
+ alias cigar overlaps
11
11
 
12
- def initialize(path_name, segment_name, cigar, *opt_fields)
12
+ def initialize(path_name, segment_name, overlaps, *opt_fields)
13
13
  @fields = {}
14
- add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
15
- add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
16
- add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
14
+ add_field(2, :Z, path_name, /[!-)+-<>-~][!-~]*/)
15
+ add_field(3, :Z, segment_name, /[!-)+-<>-~][!-~]*/)
16
+ add_field(4, :Z, overlaps, /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/)
17
17
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
18
18
  end
19
19
  end
@@ -15,11 +15,12 @@ class GFA::Record::Segment < GFA::Record
15
15
  REQ_FIELDS.each_index do |i|
16
16
  define_method(REQ_FIELDS[i]) { fields[i + 2] }
17
17
  end
18
+ OPT_FIELDS.each_key { |i| define_method(i) { fields[i] } }
18
19
 
19
20
  def initialize(name, sequence, *opt_fields)
20
21
  @fields = {}
21
- add_field(2, :Z, name, /^[!-)+-<>-~][!-~]*$/)
22
- add_field(3, :Z, sequence, /^\*|[A-Za-z=.]+$/)
22
+ add_field(2, :Z, name, /[!-)+-<>-~][!-~]*/)
23
+ add_field(3, :Z, sequence, /\*|[A-Za-z=.]+/)
23
24
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
24
25
  end
25
26
  end
@@ -9,12 +9,12 @@ class GFA::Record::Walk < GFA::Record
9
9
 
10
10
  def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
11
11
  @fields = {}
12
- add_field(2, :Z, sample_id, /^[!-)+-<>-~][!-~]*$/)
13
- add_field(3, :i, hap_index, /^[0-9]+$/)
14
- add_field(4, :Z, seq_id, /^[!-)+-<>-~][!-~]*$/)
15
- add_field(5, :i, seq_start, /^\*|[0-9]+$/)
16
- add_field(6, :i, seq_end, /^\*|[0-9]+$/)
17
- add_field(7, :Z, walk, /^([><][!-;=?-~]+)+$/)
12
+ add_field(2, :Z, sample_id, /[!-)+-<>-~][!-~]*/)
13
+ add_field(3, :i, hap_index, /[0-9]+/)
14
+ add_field(4, :Z, seq_id, /[!-)+-<>-~][!-~]*/)
15
+ add_field(5, :i, seq_start, /\*|[0-9]+/)
16
+ add_field(6, :i, seq_end, /\*|[0-9]+/)
17
+ add_field(7, :Z, walk, /([><][!-;=?-~]+)+/)
18
18
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
19
19
  end
20
20
  end
data/lib/gfa/record.rb CHANGED
@@ -15,7 +15,7 @@ class GFA::Record
15
15
  TYPES = CODES.values
16
16
  TYPES.each { |t| require "gfa/record/#{t.downcase}" }
17
17
 
18
- [:CODES, :REQ_FIELDS, :OPT_FIELDS, :TYPES].each do |x|
18
+ %i[CODES REQ_FIELDS OPT_FIELDS TYPES].each do |x|
19
19
  define_singleton_method(x) { const_get(x) }
20
20
  end
21
21
 
@@ -29,18 +29,32 @@ class GFA::Record
29
29
  const_get(name)
30
30
  end
31
31
 
32
+ def self.[](string)
33
+ split = string[0] == '#' ? ['', 2] : ["\t", 0]
34
+ code, *values = string.chomp.split(*split)
35
+ code_class(code).new(*values)
36
+ end
37
+
32
38
  # Instance-level
33
39
 
34
40
  attr :fields
35
41
 
36
- def [](k) fields[k] ; end
37
-
38
- def type ; CODES[code] ; end
39
-
40
- def code ; self.class.const_get(:CODE) ; end
41
-
42
- def empty? ; fields.empty? ; end
43
-
42
+ def [](k)
43
+ fields[k]
44
+ end
45
+
46
+ def type
47
+ CODES[code]
48
+ end
49
+
50
+ def code
51
+ self.class.const_get(:CODE)
52
+ end
53
+
54
+ def empty?
55
+ fields.empty?
56
+ end
57
+
44
58
  def to_s
45
59
  o = [code.to_s]
46
60
  self.class.REQ_FIELDS.each_index do |i|
@@ -52,7 +66,7 @@ class GFA::Record
52
66
  end
53
67
  o.join("\t")
54
68
  end
55
-
69
+
56
70
  def hash
57
71
  { code => fields }.hash
58
72
  end
@@ -64,19 +78,19 @@ class GFA::Record
64
78
  alias == eql?
65
79
 
66
80
  private
67
-
81
+
68
82
  def add_field(f_tag, f_type, f_value, format = nil)
69
83
  unless format.nil?
70
84
  msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
71
85
  GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
72
86
  end
73
87
 
74
- @fields[ f_tag ] = GFA::Field.code_class(f_type).new(f_value)
88
+ @fields[f_tag] = GFA::Field.code_class(f_type).new(f_value)
75
89
  end
76
-
90
+
77
91
  def add_opt_field(f, known)
78
92
  m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
79
- raise "Cannot parse field: '#{f}'." unless m
93
+ raise "Cannot parse field: '#{f}'" unless m
80
94
 
81
95
  f_tag = m[1].to_sym
82
96
  f_type = m[2].to_sym
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::CommentSet < GFA::RecordSet
2
+ CODE = :'#'
3
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::ContainmentSet < GFA::RecordSet
2
+ CODE = :C
3
+ INDEX_FIELD = 2 # Container: Name of container segment
4
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::HeaderSet < GFA::RecordSet
2
+ CODE = :H
3
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::JumpSet < GFA::RecordSet
2
+ CODE = :J
3
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::LinkSet < GFA::RecordSet
2
+ CODE = :L
3
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::PathSet < GFA::RecordSet
2
+ CODE = :P
3
+ INDEX_FIELD = 2 # PathName: Path name
4
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::SegmentSet < GFA::RecordSet
2
+ CODE = :S
3
+ INDEX_FIELD = 2 # Name: Segment name
4
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::WalkSet < GFA::RecordSet
2
+ CODE = :W
3
+ end
@@ -0,0 +1,99 @@
1
+
2
+ require 'gfa/record'
3
+
4
+ class GFA::RecordSet
5
+ INDEX_FIELD = nil
6
+ TYPES = GFA::Record.TYPES.map { |i| :"#{i}Set" }
7
+ GFA::Record.TYPES.each { |t| require "gfa/record_set/#{t.downcase}_set" }
8
+
9
+ %i[TYPES].each do |x|
10
+ define_singleton_method(x) { const_get(x) }
11
+ end
12
+
13
+ def self.code_class(code)
14
+ name = GFA::Record.CODES[code.to_sym]
15
+ raise "Unknown record type: #{code}." if name.nil?
16
+ name_class(name)
17
+ end
18
+
19
+ def self.name_class(name)
20
+ name = "#{name}Set" unless name =~ /Set$/
21
+ const_get(name)
22
+ end
23
+
24
+ # Instance-level
25
+
26
+ attr_reader :set, :gfa
27
+
28
+ def initialize(gfa)
29
+ @set = []
30
+ @index = {}
31
+ @gfa = gfa
32
+ end
33
+
34
+ def [](k)
35
+ return set[k] if k.is_a?(Integer)
36
+ find_index(k)
37
+ end
38
+
39
+ def type
40
+ GFA::Record.CODES[code]
41
+ end
42
+
43
+ def code
44
+ self.class.const_get(:CODE)
45
+ end
46
+
47
+ def index_field
48
+ self.class.const_get(:INDEX_FIELD)
49
+ end
50
+
51
+ %i[empty? hash size count length first last].each do |i|
52
+ define_method(i) { set.send(i) }
53
+ end
54
+
55
+ def to_s
56
+ set.map(&:to_s).join("\n")
57
+ end
58
+
59
+ def eql?(rec)
60
+ hash == rec.hash
61
+ end
62
+
63
+ alias == eql?
64
+
65
+ def <<(v)
66
+ v = v.split("\t") if v.is_a? String
67
+ v = GFA::Record.code_class(code).new(*v) if v.is_a? Array
68
+ raise "Not a GFA Record: #{v}" unless v.is_a? GFA::Record
69
+ raise "Wrong type of record: #{v.type}" if v.type != type
70
+
71
+ @set << v
72
+ index(v)
73
+ end
74
+
75
+ def index_id(v)
76
+ v[index_field]&.value
77
+ end
78
+
79
+ def index(v)
80
+ save_index(index_id(v), v) if index_field
81
+
82
+ # Whenever present, index also by ID
83
+ save_index(v[:ID].value, v) if v[:ID] && v[:ID].value =~ index_id(v)
84
+ end
85
+
86
+ def save_index(k, v)
87
+ return unless gfa.opts[:index] && k
88
+
89
+ if @index[k]
90
+ warn "#{type} already registered with field #{index_field}: #{k}"
91
+ end
92
+ @index[k] = v
93
+ end
94
+
95
+ def find_index(k)
96
+ k = k.value if k.is_a? GFA::Field
97
+ @index[k]
98
+ end
99
+ end
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.1'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
data/test/common_test.rb CHANGED
@@ -4,10 +4,10 @@ class CommonTest < Test::Unit::TestCase
4
4
 
5
5
  def test_assert_format
6
6
  assert_raise do
7
- GFA.assert_format("tsooq", /^.$/, "Not a char")
7
+ GFA.assert_format('tsooq', /^.$/, 'Not a char')
8
8
  end
9
9
  assert_nothing_raised do
10
- GFA.assert_format("z", /^.$/, "Not a char")
10
+ GFA.assert_format('z', /^.$/, 'Not a char')
11
11
  end
12
12
  end
13
13
 
@@ -20,15 +20,15 @@ class CommonTest < Test::Unit::TestCase
20
20
  def test_record_getters
21
21
  gfa = GFA.new
22
22
  assert_respond_to(gfa, :headers)
23
- assert_equal([], gfa.links)
23
+ assert_equal([], gfa.links.set)
24
24
  assert_nil( gfa.segment(0) )
25
25
  end
26
26
 
27
27
  def test_record_setters
28
28
  gfa = GFA.new
29
29
  assert_respond_to(gfa, :add_path)
30
- gfa.add_containment("zooq")
31
- assert_equal("zooq", gfa.records[:Containment].first)
30
+ gfa.add_containment("zooq\t+\ttsuk\t-\t1\t*")
31
+ assert_equal('zooq', gfa.records[:Containment].first.from.value)
32
32
  end
33
33
 
34
34
  end
data/test/field_test.rb CHANGED
@@ -3,48 +3,74 @@ require "test_helper"
3
3
  class FieldTest < Test::Unit::TestCase
4
4
 
5
5
  def test_char
6
- f = GFA::Field::Char.new("%")
7
- assert_equal("%", f.value)
8
- assert_raise do
9
- GFA::Field::Char.new(" ")
10
- end
11
- assert_raise do
12
- GFA::Field::Char.new("")
13
- end
14
- assert_raise do
15
- GFA::Field::Char.new("^.^")
16
- end
6
+ f = GFA::Field::Char.new('%')
7
+ assert_equal('%', f.value)
8
+ assert_raise { GFA::Field::Char.new(' ') }
9
+ assert_raise { GFA::Field::Char.new('') }
10
+ assert_raise { GFA::Field::Char.new('^.^') }
17
11
  end
18
12
 
19
13
  def test_sigint
20
14
  end
21
15
 
22
16
  def test_float
23
- f = GFA::Field::Float.new("1.3e-5")
17
+ f = GFA::Field::Float.new('1.3e-5')
24
18
  assert_equal(1.3e-5, f.value)
25
- assert_raise do
26
- GFA::Field::Float.new("e-5")
27
- end
19
+ assert_raise { GFA::Field::Float.new('e-5') }
28
20
  end
29
21
 
30
22
  def test_string
31
23
  end
32
24
 
33
25
  def test_hex
34
- f = GFA::Field::Hex.new("C3F0")
35
- assert_equal("C3F0", f.value)
36
- assert_raise do
37
- GFA::Field::Hex.new("C3PO")
38
- end
26
+ f = GFA::Field::Hex.new('C3F0')
27
+ assert_equal('C3F0', f.value)
28
+ assert_raise { GFA::Field::Hex.new('C3PO') }
39
29
  end
40
30
 
41
31
  def test_numarray
42
- f = GFA::Field::NumArray.new("i,1,2,3")
43
- assert_equal(%w[1 2 3], f.array)
44
- assert_equal("i", f.modifier)
45
- assert_raise do
46
- GFA::Field::NumArray.new("c,1,e,3")
47
- end
32
+ f = GFA::Field::NumArray.new('i,1,2,3')
33
+ assert_equal([1, 2, 3], f.array)
34
+ assert_equal('i', f.modifier)
35
+ assert_raise { GFA::Field::NumArray.new('c,1,e,3') }
48
36
  end
49
37
 
38
+ def test_equal
39
+ f = GFA::Field::SigInt.new('123')
40
+ j = GFA::Field::String.new('123')
41
+ k = GFA::Field::Float.new('123')
42
+ assert(f == 123)
43
+ assert(123 == f)
44
+ assert(f != 123.0)
45
+ assert(f != '123')
46
+ assert(f.eql?(123))
47
+ assert(f != j)
48
+ assert(f != k)
49
+ assert(f != k.value)
50
+ assert(f.value == k.value)
51
+ end
52
+
53
+ def test_equivalent
54
+ # String comparisons
55
+ assert(GFA::Field['Z:a'].~ GFA::Field['A:a'])
56
+ assert(GFA::Field['Z:ab'] !~ GFA::Field['A:a'])
57
+ assert(GFA::Field['Z:{"a":1}'].~ GFA::Field['J:{"a":1}'])
58
+ assert(GFA::Field['J:{"a":1}'].~ GFA::Field['Z:{"a":1}'])
59
+
60
+ # Numeric comparisons
61
+ assert(GFA::Field['Z:123'].~ GFA::Field['i:123'])
62
+ assert(GFA::Field['Z:123'].~ GFA::Field['i:123'])
63
+ assert(GFA::Field['i:123'].~ GFA::Field['f:123'])
64
+ assert(GFA::Field['f:123'].~ GFA::Field['B:i,123'])
65
+ assert(GFA::Field['B:i,123'].~ GFA::Field['H:7B'])
66
+ assert(GFA::Field['H:7B'].~ GFA::Field['f:123.0'])
67
+ assert(GFA::Field['Z:123'] !~ GFA::Field['H:7B']) # In hex-space!
68
+ assert(GFA::Field['f:1e3'].~ GFA::Field['f:1000'])
69
+ assert(GFA::Field['f:1e3'].~ 1e3)
70
+ assert(GFA::Field['B:i,123,456'].~ [123, 456.0])
71
+
72
+ # Non-commutative
73
+ assert(GFA::Field['i:123'].~ GFA::Field['f:123.4'])
74
+ assert(GFA::Field['f:123.4'] !~ GFA::Field['i:123'])
75
+ end
50
76
  end
data/test/parser_test.rb CHANGED
@@ -2,20 +2,55 @@ require "test_helper"
2
2
  require "gfa/parser"
3
3
 
4
4
  class ParserTest < Test::Unit::TestCase
5
-
5
+
6
6
  def test_load
7
- sample_f = File.expand_path('../fixtures/sample.gfa', __FILE__)
8
7
  assert_respond_to(GFA, :load)
9
- pre_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
10
- sample = GFA.load(sample_f)
11
- post_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
8
+
9
+ # Can load files and close pointers properly
10
+ pre_fhs = ObjectSpace.each_object(IO).count { |i| not i.closed? }
11
+ assert_nothing_raised do
12
+ GFA.load(fixture_path('sample1.gfa'))
13
+ end
14
+ assert_nothing_raised do
15
+ GFA.load(fixture_path('sample2.gfa'))
16
+ end
17
+ assert_nothing_raised do
18
+ GFA.load(fixture_path('sample3.gfa'))
19
+ end
20
+ assert_raise do
21
+ GFA.load(fixture_path('sample4.gfa'))
22
+ end
23
+ post_fhs = ObjectSpace.each_object(IO).count { |i| not i.closed? }
12
24
  assert_equal(pre_fhs, post_fhs)
13
- assert_equal(1, sample.headers.size)
14
- assert_equal(6, sample.segments.size)
15
- assert_equal(4, sample.links.size)
16
- assert(sample.containments.empty?)
17
- assert(sample.paths.empty?)
18
- assert_respond_to(sample, :records)
25
+ end
26
+
27
+ def test_records
28
+ # Samples are properly parsed
29
+ sample1 = GFA.load(fixture_path('sample1.gfa'))
30
+ assert_equal(1, sample1.headers.size)
31
+ assert_equal(6, sample1.segments.size)
32
+ assert_equal(4, sample1.links.size)
33
+ assert(sample1.containments.empty?)
34
+ assert(sample1.paths.empty?)
35
+ assert_respond_to(sample1, :records)
36
+ end
37
+
38
+ def test_comments
39
+ path = fixture_path('sample2.gfa')
40
+ sample = GFA.load(path)
41
+ assert(sample.comments.empty?)
42
+ sample = GFA.load(path, comments: true)
43
+ assert(!sample.comments.empty?)
44
+ end
45
+
46
+ def test_index
47
+ path = fixture_path('sample3.gfa')
48
+ sample = GFA.load(path)
49
+ assert(sample.path('first').is_a?(GFA::Record))
50
+ assert(sample.paths['first'].is_a?(GFA::Record))
51
+ assert_equal('first', sample.path('first')[2]&.value)
52
+ sample = GFA.load(path, index: false)
53
+ assert_nil(sample.path('first'))
19
54
  end
20
55
 
21
56
  def test_version_suppport
@@ -28,20 +63,22 @@ class ParserTest < Test::Unit::TestCase
28
63
  def test_line_by_line
29
64
  gfa = GFA.new
30
65
  assert_respond_to(gfa, :<<)
66
+
31
67
  # Empty
32
68
  gfa << ' '
33
69
  assert(gfa.empty?)
34
70
  gfa << 'H'
35
71
  assert(gfa.empty?)
72
+
36
73
  # Segment
37
74
  assert_equal(0, gfa.segments.size)
38
- gfa << "S\t1\tACTG"
75
+ gfa << "S\t1\tACTG\n"
39
76
  assert(!gfa.empty?)
40
77
  assert_equal(1, gfa.segments.size)
78
+
41
79
  # Version
42
80
  assert_nil(gfa.gfa_version)
43
81
  gfa << GFA::Record::Header.new('VN:Z:1.0')
44
82
  assert_equal('1.0', gfa.gfa_version)
45
83
  end
46
-
47
84
  end
data/test/record_test.rb CHANGED
@@ -17,6 +17,13 @@ class RecordTest < Test::Unit::TestCase
17
17
  assert_equal("P\ta\tb\t*", $rec_p.to_s)
18
18
  end
19
19
 
20
+ def test_init_by_string
21
+ p = GFA::Record["P\ta\tb\t*"]
22
+ assert_equal('a', p.path_name&.value)
23
+ c = GFA::Record["# doink!\n"]
24
+ assert_equal(' doink!', c.comment&.value)
25
+ end
26
+
20
27
  def test_hash
21
28
  other_h = GFA::Record::Header.new("VN:Z:1.0")
22
29
  assert_equal($rec_h.hash, other_h.hash)
data/test/test_helper.rb CHANGED
@@ -4,3 +4,8 @@ SimpleCov.start
4
4
  require 'rubygems'
5
5
  require 'test/unit'
6
6
  require 'gfa/common'
7
+
8
+ def fixture_path(file)
9
+ File.expand_path("../fixtures/#{file}", __FILE__)
10
+ end
11
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-22 00:00:00.000000000 Z
11
+ date: 2023-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl
@@ -85,6 +85,15 @@ files:
85
85
  - lib/gfa/record/path.rb
86
86
  - lib/gfa/record/segment.rb
87
87
  - lib/gfa/record/walk.rb
88
+ - lib/gfa/record_set.rb
89
+ - lib/gfa/record_set/comment_set.rb
90
+ - lib/gfa/record_set/containment_set.rb
91
+ - lib/gfa/record_set/header_set.rb
92
+ - lib/gfa/record_set/jump_set.rb
93
+ - lib/gfa/record_set/link_set.rb
94
+ - lib/gfa/record_set/path_set.rb
95
+ - lib/gfa/record_set/segment_set.rb
96
+ - lib/gfa/record_set/walk_set.rb
88
97
  - lib/gfa/version.rb
89
98
  - test/common_test.rb
90
99
  - test/field_test.rb
@@ -94,7 +103,7 @@ files:
94
103
  homepage: https://github.com/lmrodriguezr/gfa
95
104
  licenses: []
96
105
  metadata: {}
97
- post_install_message:
106
+ post_install_message:
98
107
  rdoc_options:
99
108
  - lib
100
109
  - README.md
@@ -115,8 +124,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
115
124
  - !ruby/object:Gem::Version
116
125
  version: '0'
117
126
  requirements: []
118
- rubygems_version: 3.2.3
119
- signing_key:
127
+ rubygems_version: 3.1.6
128
+ signing_key:
120
129
  specification_version: 4
121
130
  summary: Graphical Fragment Assembly (GFA) for Ruby
122
131
  test_files: []