gfa 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/gfa/record.rb CHANGED
@@ -15,7 +15,7 @@ class GFA::Record
15
15
  TYPES = CODES.values
16
16
  TYPES.each { |t| require "gfa/record/#{t.downcase}" }
17
17
 
18
- [:CODES, :REQ_FIELDS, :OPT_FIELDS, :TYPES].each do |x|
18
+ %i[CODES REQ_FIELDS OPT_FIELDS TYPES].each do |x|
19
19
  define_singleton_method(x) { const_get(x) }
20
20
  end
21
21
 
@@ -29,18 +29,34 @@ class GFA::Record
29
29
  const_get(name)
30
30
  end
31
31
 
32
+ def self.[](string)
33
+ return nil if string.nil? || string =~ /^\s*$/
34
+
35
+ split = string[0] == '#' ? ['', 2] : ["\t", 0]
36
+ code, *values = string.chomp.split(*split)
37
+ code_class(code).new(*values)
38
+ end
39
+
32
40
  # Instance-level
33
41
 
34
42
  attr :fields
35
43
 
36
- def [](k) fields[k] ; end
37
-
38
- def type ; CODES[code] ; end
39
-
40
- def code ; self.class.const_get(:CODE) ; end
41
-
42
- def empty? ; fields.empty? ; end
43
-
44
+ def [](k)
45
+ fields[k]
46
+ end
47
+
48
+ def type
49
+ CODES[code]
50
+ end
51
+
52
+ def code
53
+ self.class.const_get(:CODE)
54
+ end
55
+
56
+ def empty?
57
+ fields.empty?
58
+ end
59
+
44
60
  def to_s
45
61
  o = [code.to_s]
46
62
  self.class.REQ_FIELDS.each_index do |i|
@@ -52,7 +68,11 @@ class GFA::Record
52
68
  end
53
69
  o.join("\t")
54
70
  end
55
-
71
+
72
+ def dup
73
+ self.class[to_s]
74
+ end
75
+
56
76
  def hash
57
77
  { code => fields }.hash
58
78
  end
@@ -64,19 +84,19 @@ class GFA::Record
64
84
  alias == eql?
65
85
 
66
86
  private
67
-
87
+
68
88
  def add_field(f_tag, f_type, f_value, format = nil)
69
89
  unless format.nil?
70
90
  msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
71
91
  GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
72
92
  end
73
93
 
74
- @fields[ f_tag ] = GFA::Field.code_class(f_type).new(f_value)
94
+ @fields[f_tag] = GFA::Field.code_class(f_type).new(f_value)
75
95
  end
76
-
96
+
77
97
  def add_opt_field(f, known)
78
98
  m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
79
- raise "Cannot parse field: '#{f}'." unless m
99
+ raise "Cannot parse field: '#{f}'" unless m
80
100
 
81
101
  f_tag = m[1].to_sym
82
102
  f_type = m[2].to_sym
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::CommentSet < GFA::RecordSet
2
+ CODE = :'#'
3
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::ContainmentSet < GFA::RecordSet
2
+ CODE = :C
3
+ INDEX_FIELD = 2 # Container: Name of container segment
4
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::HeaderSet < GFA::RecordSet
2
+ CODE = :H
3
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::JumpSet < GFA::RecordSet
2
+ CODE = :J
3
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::LinkSet < GFA::RecordSet
2
+ CODE = :L
3
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::PathSet < GFA::RecordSet
2
+ CODE = :P
3
+ INDEX_FIELD = 2 # PathName: Path name
4
+ end
@@ -0,0 +1,4 @@
1
+ class GFA::RecordSet::SegmentSet < GFA::RecordSet
2
+ CODE = :S
3
+ INDEX_FIELD = 2 # Name: Segment name
4
+ end
@@ -0,0 +1,3 @@
1
+ class GFA::RecordSet::WalkSet < GFA::RecordSet
2
+ CODE = :W
3
+ end
@@ -0,0 +1,121 @@
1
+
2
+ require 'gfa/record'
3
+
4
+ class GFA::RecordSet
5
+ INDEX_FIELD = nil
6
+ TYPES = GFA::Record.TYPES.map { |i| :"#{i}Set" }
7
+ GFA::Record.TYPES.each { |t| require "gfa/record_set/#{t.downcase}_set" }
8
+
9
+ %i[TYPES].each do |x|
10
+ define_singleton_method(x) { const_get(x) }
11
+ end
12
+
13
+ def self.code_class(code)
14
+ name = GFA::Record.CODES[code.to_sym]
15
+ raise "Unknown record type: #{code}." if name.nil?
16
+ name_class(name)
17
+ end
18
+
19
+ def self.name_class(name)
20
+ name = "#{name}Set" unless name =~ /Set$/
21
+ const_get(name)
22
+ end
23
+
24
+ # Instance-level
25
+
26
+ attr_reader :set, :index, :gfa
27
+
28
+ def initialize(gfa = nil)
29
+ @set = []
30
+ @index = {}
31
+ @gfa = gfa || GFA.new
32
+ end
33
+
34
+ def [](k)
35
+ return set[k] if k.is_a?(Integer)
36
+ find_index(k)
37
+ end
38
+
39
+ def type
40
+ GFA::Record.CODES[code]
41
+ end
42
+
43
+ def code
44
+ self.class.const_get(:CODE)
45
+ end
46
+
47
+ def index_field
48
+ self.class.const_get(:INDEX_FIELD)
49
+ end
50
+
51
+ %i[empty? hash size count length first last].each do |i|
52
+ define_method(i) { set.send(i) }
53
+ end
54
+
55
+ def to_s
56
+ set.map(&:to_s).join("\n")
57
+ end
58
+
59
+ def eql?(rec)
60
+ hash == rec.hash
61
+ end
62
+
63
+ alias == eql?
64
+
65
+ def <<(v)
66
+ v = v.split("\t") if v.is_a? String
67
+ v = GFA::Record.code_class(code).new(*v) if v.is_a? Array
68
+ raise "Not a GFA Record: #{v}" unless v.is_a? GFA::Record
69
+ raise "Wrong type of record: #{v.type}" if v.type != type
70
+
71
+ @set << v
72
+ index!(v)
73
+ end
74
+
75
+ def indexed?
76
+ (empty? || !index_field) ? gfa.opts[:index] : !index.empty?
77
+ end
78
+
79
+ def rebuild_index!
80
+ @index = {}
81
+ set.each { |v| index!(v) }
82
+ end
83
+
84
+ def index_id(v)
85
+ v[index_field]&.value
86
+ end
87
+
88
+ def index!(v)
89
+ save_index(index_id(v), v) if index_field
90
+
91
+ # Whenever present, index also by ID
92
+ if gfa.opts[:index_id] && v[:ID] && v[:ID].value =~ index_id(v)
93
+ save_index(v[:ID].value, v)
94
+ end
95
+ end
96
+
97
+ def save_index(k, v)
98
+ return unless gfa.opts[:index] && k
99
+
100
+ if @index[k]
101
+ f = index_field.is_a?(Integer) ? '' : "#{index_field}: "
102
+ raise "#{type} already registered: #{f}#{k}"
103
+ end
104
+ @index[k] = v
105
+ end
106
+
107
+ def find_index(k)
108
+ k = k.value if k.is_a? GFA::Field
109
+ @index[k]
110
+ end
111
+
112
+ def merge!(record_set)
113
+ raise "Not a record set" unless record_set.is_a?(GFA::RecordSet)
114
+ if record_set.type != type
115
+ raise "Wrong type of record set: #{record_set.type}"
116
+ end
117
+
118
+ record_set.set.each { |i| @set << i }
119
+ record_set.index.each { |k, v| save_index(k, v) }
120
+ end
121
+ end
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.2.0'
2
+ VERSION = '0.4.0'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
data/test/common_test.rb CHANGED
@@ -4,10 +4,10 @@ class CommonTest < Test::Unit::TestCase
4
4
 
5
5
  def test_assert_format
6
6
  assert_raise do
7
- GFA.assert_format("tsooq", /^.$/, "Not a char")
7
+ GFA.assert_format('tsooq', /^.$/, 'Not a char')
8
8
  end
9
9
  assert_nothing_raised do
10
- GFA.assert_format("z", /^.$/, "Not a char")
10
+ GFA.assert_format('z', /^.$/, 'Not a char')
11
11
  end
12
12
  end
13
13
 
@@ -20,15 +20,15 @@ class CommonTest < Test::Unit::TestCase
20
20
  def test_record_getters
21
21
  gfa = GFA.new
22
22
  assert_respond_to(gfa, :headers)
23
- assert_equal([], gfa.links)
23
+ assert_equal([], gfa.links.set)
24
24
  assert_nil( gfa.segment(0) )
25
25
  end
26
26
 
27
27
  def test_record_setters
28
28
  gfa = GFA.new
29
29
  assert_respond_to(gfa, :add_path)
30
- gfa.add_containment("zooq")
31
- assert_equal("zooq", gfa.records[:Containment].first)
30
+ gfa.add_containment("zooq\t+\ttsuk\t-\t1\t*")
31
+ assert_equal('zooq', gfa.records[:Containment].first.from.value)
32
32
  end
33
33
 
34
34
  end
data/test/field_test.rb CHANGED
@@ -3,48 +3,74 @@ require "test_helper"
3
3
  class FieldTest < Test::Unit::TestCase
4
4
 
5
5
  def test_char
6
- f = GFA::Field::Char.new("%")
7
- assert_equal("%", f.value)
8
- assert_raise do
9
- GFA::Field::Char.new(" ")
10
- end
11
- assert_raise do
12
- GFA::Field::Char.new("")
13
- end
14
- assert_raise do
15
- GFA::Field::Char.new("^.^")
16
- end
6
+ f = GFA::Field::Char.new('%')
7
+ assert_equal('%', f.value)
8
+ assert_raise { GFA::Field::Char.new(' ') }
9
+ assert_raise { GFA::Field::Char.new('') }
10
+ assert_raise { GFA::Field::Char.new('^.^') }
17
11
  end
18
12
 
19
13
  def test_sigint
20
14
  end
21
15
 
22
16
  def test_float
23
- f = GFA::Field::Float.new("1.3e-5")
17
+ f = GFA::Field::Float.new('1.3e-5')
24
18
  assert_equal(1.3e-5, f.value)
25
- assert_raise do
26
- GFA::Field::Float.new("e-5")
27
- end
19
+ assert_raise { GFA::Field::Float.new('e-5') }
28
20
  end
29
21
 
30
22
  def test_string
31
23
  end
32
24
 
33
25
  def test_hex
34
- f = GFA::Field::Hex.new("C3F0")
35
- assert_equal("C3F0", f.value)
36
- assert_raise do
37
- GFA::Field::Hex.new("C3PO")
38
- end
26
+ f = GFA::Field::Hex.new('C3F0')
27
+ assert_equal('C3F0', f.value)
28
+ assert_raise { GFA::Field::Hex.new('C3PO') }
39
29
  end
40
30
 
41
31
  def test_numarray
42
- f = GFA::Field::NumArray.new("i,1,2,3")
43
- assert_equal(%w[1 2 3], f.array)
44
- assert_equal("i", f.modifier)
45
- assert_raise do
46
- GFA::Field::NumArray.new("c,1,e,3")
47
- end
32
+ f = GFA::Field::NumArray.new('i,1,2,3')
33
+ assert_equal([1, 2, 3], f.array)
34
+ assert_equal('i', f.modifier)
35
+ assert_raise { GFA::Field::NumArray.new('c,1,e,3') }
48
36
  end
49
37
 
38
+ def test_equal
39
+ f = GFA::Field::SigInt.new('123')
40
+ j = GFA::Field::String.new('123')
41
+ k = GFA::Field::Float.new('123')
42
+ assert(f == 123)
43
+ assert(123 == f)
44
+ assert(f != 123.0)
45
+ assert(f != '123')
46
+ assert(f.eql?(123))
47
+ assert(f != j)
48
+ assert(f != k)
49
+ assert(f != k.value)
50
+ assert(f.value == k.value)
51
+ end
52
+
53
+ def test_equivalent
54
+ # String comparisons
55
+ assert(GFA::Field['Z:a'].~ GFA::Field['A:a'])
56
+ assert(GFA::Field['Z:ab'] !~ GFA::Field['A:a'])
57
+ assert(GFA::Field['Z:{"a":1}'].~ GFA::Field['J:{"a":1}'])
58
+ assert(GFA::Field['J:{"a":1}'].~ GFA::Field['Z:{"a":1}'])
59
+
60
+ # Numeric comparisons
61
+ assert(GFA::Field['Z:123'].~ GFA::Field['i:123'])
62
+ assert(GFA::Field['Z:123'].~ GFA::Field['i:123'])
63
+ assert(GFA::Field['i:123'].~ GFA::Field['f:123'])
64
+ assert(GFA::Field['f:123'].~ GFA::Field['B:i,123'])
65
+ assert(GFA::Field['B:i,123'].~ GFA::Field['H:7B'])
66
+ assert(GFA::Field['H:7B'].~ GFA::Field['f:123.0'])
67
+ assert(GFA::Field['Z:123'] !~ GFA::Field['H:7B']) # In hex-space!
68
+ assert(GFA::Field['f:1e3'].~ GFA::Field['f:1000'])
69
+ assert(GFA::Field['f:1e3'].~ 1e3)
70
+ assert(GFA::Field['B:i,123,456'].~ [123, 456.0])
71
+
72
+ # Non-commutative
73
+ assert(GFA::Field['i:123'].~ GFA::Field['f:123.4'])
74
+ assert(GFA::Field['f:123.4'] !~ GFA::Field['i:123'])
75
+ end
50
76
  end
data/test/parser_test.rb CHANGED
@@ -2,20 +2,57 @@ require "test_helper"
2
2
  require "gfa/parser"
3
3
 
4
4
  class ParserTest < Test::Unit::TestCase
5
-
5
+
6
6
  def test_load
7
- sample_f = File.expand_path('../fixtures/sample.gfa', __FILE__)
8
7
  assert_respond_to(GFA, :load)
9
- pre_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
10
- sample = GFA.load(sample_f)
11
- post_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
8
+
9
+ # Can load files and close pointers properly
10
+ pre_fhs = ObjectSpace.each_object(IO).count { |i| not i.closed? }
11
+ assert_nothing_raised do
12
+ GFA.load(fixture_path('sample1.gfa'))
13
+ end
14
+ assert_nothing_raised do
15
+ GFA.load(fixture_path('sample2.gfa'))
16
+ end
17
+ assert_nothing_raised do
18
+ GFA.load(fixture_path('sample3.gfa'))
19
+ end
20
+ assert_raise do
21
+ GFA.load(fixture_path('sample4.gfa'))
22
+ end
23
+ post_fhs = ObjectSpace.each_object(IO).count { |i| not i.closed? }
12
24
  assert_equal(pre_fhs, post_fhs)
13
- assert_equal(1, sample.headers.size)
14
- assert_equal(6, sample.segments.size)
15
- assert_equal(4, sample.links.size)
16
- assert(sample.containments.empty?)
17
- assert(sample.paths.empty?)
18
- assert_respond_to(sample, :records)
25
+ end
26
+
27
+ def test_records
28
+ # Samples are properly parsed
29
+ sample1 = GFA.load(fixture_path('sample1.gfa'))
30
+ assert_equal(1, sample1.headers.size)
31
+ assert_equal(6, sample1.segments.size)
32
+ assert_equal(4, sample1.links.size)
33
+ assert(sample1.containments.empty?)
34
+ assert(sample1.paths.empty?)
35
+ assert_respond_to(sample1, :records)
36
+ end
37
+
38
+ def test_comments
39
+ path = fixture_path('sample2.gfa')
40
+ sample = GFA.load(path)
41
+ assert(sample.comments.empty?)
42
+ sample = GFA.load(path, comments: true)
43
+ assert(!sample.comments.empty?)
44
+ end
45
+
46
+ def test_index
47
+ path = fixture_path('sample3.gfa')
48
+ sample = GFA.load(path)
49
+ assert(sample.path('first').is_a?(GFA::Record))
50
+ assert(sample.paths['first'].is_a?(GFA::Record))
51
+ assert_equal('first', sample.path('first')[2]&.value)
52
+ assert(sample.indexed?)
53
+ sample = GFA.load(path, index: false)
54
+ assert_nil(sample.path('first'))
55
+ assert(!sample.indexed?)
19
56
  end
20
57
 
21
58
  def test_version_suppport
@@ -28,20 +65,22 @@ class ParserTest < Test::Unit::TestCase
28
65
  def test_line_by_line
29
66
  gfa = GFA.new
30
67
  assert_respond_to(gfa, :<<)
68
+
31
69
  # Empty
32
70
  gfa << ' '
33
71
  assert(gfa.empty?)
34
72
  gfa << 'H'
35
73
  assert(gfa.empty?)
74
+
36
75
  # Segment
37
76
  assert_equal(0, gfa.segments.size)
38
- gfa << "S\t1\tACTG"
77
+ gfa << "S\t1\tACTG\n"
39
78
  assert(!gfa.empty?)
40
79
  assert_equal(1, gfa.segments.size)
80
+
41
81
  # Version
42
82
  assert_nil(gfa.gfa_version)
43
83
  gfa << GFA::Record::Header.new('VN:Z:1.0')
44
84
  assert_equal('1.0', gfa.gfa_version)
45
85
  end
46
-
47
86
  end
data/test/record_test.rb CHANGED
@@ -17,6 +17,13 @@ class RecordTest < Test::Unit::TestCase
17
17
  assert_equal("P\ta\tb\t*", $rec_p.to_s)
18
18
  end
19
19
 
20
+ def test_init_by_string
21
+ p = GFA::Record["P\ta\tb\t*"]
22
+ assert_equal('a', p.path_name&.value)
23
+ c = GFA::Record["# doink!\n"]
24
+ assert_equal(' doink!', c.comment&.value)
25
+ end
26
+
20
27
  def test_hash
21
28
  other_h = GFA::Record::Header.new("VN:Z:1.0")
22
29
  assert_equal($rec_h.hash, other_h.hash)
data/test/test_helper.rb CHANGED
@@ -4,3 +4,8 @@ SimpleCov.start
4
4
  require 'rubygems'
5
5
  require 'test/unit'
6
6
  require 'gfa/common'
7
+
8
+ def fixture_path(file)
9
+ File.expand_path("../fixtures/#{file}", __FILE__)
10
+ end
11
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-22 00:00:00.000000000 Z
11
+ date: 2023-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl
@@ -63,6 +63,8 @@ files:
63
63
  - LICENSE
64
64
  - README.md
65
65
  - Rakefile
66
+ - bin/gfa-add-gaf
67
+ - bin/gfa-subgraph
66
68
  - lib/gfa.rb
67
69
  - lib/gfa/common.rb
68
70
  - lib/gfa/field.rb
@@ -79,12 +81,22 @@ files:
79
81
  - lib/gfa/record.rb
80
82
  - lib/gfa/record/comment.rb
81
83
  - lib/gfa/record/containment.rb
84
+ - lib/gfa/record/has_from_to.rb
82
85
  - lib/gfa/record/header.rb
83
86
  - lib/gfa/record/jump.rb
84
87
  - lib/gfa/record/link.rb
85
88
  - lib/gfa/record/path.rb
86
89
  - lib/gfa/record/segment.rb
87
90
  - lib/gfa/record/walk.rb
91
+ - lib/gfa/record_set.rb
92
+ - lib/gfa/record_set/comment_set.rb
93
+ - lib/gfa/record_set/containment_set.rb
94
+ - lib/gfa/record_set/header_set.rb
95
+ - lib/gfa/record_set/jump_set.rb
96
+ - lib/gfa/record_set/link_set.rb
97
+ - lib/gfa/record_set/path_set.rb
98
+ - lib/gfa/record_set/segment_set.rb
99
+ - lib/gfa/record_set/walk_set.rb
88
100
  - lib/gfa/version.rb
89
101
  - test/common_test.rb
90
102
  - test/field_test.rb
@@ -94,7 +106,7 @@ files:
94
106
  homepage: https://github.com/lmrodriguezr/gfa
95
107
  licenses: []
96
108
  metadata: {}
97
- post_install_message:
109
+ post_install_message:
98
110
  rdoc_options:
99
111
  - lib
100
112
  - README.md
@@ -115,8 +127,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
115
127
  - !ruby/object:Gem::Version
116
128
  version: '0'
117
129
  requirements: []
118
- rubygems_version: 3.2.3
119
- signing_key:
130
+ rubygems_version: 3.1.6
131
+ signing_key:
120
132
  specification_version: 4
121
133
  summary: Graphical Fragment Assembly (GFA) for Ruby
122
134
  test_files: []