gfa 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/gfa/parser.rb ADDED
@@ -0,0 +1,46 @@
1
+ require 'gfa/record'
2
+
3
+ class GFA
4
+ # Class-level
5
+ MIN_VERSION = '1.0'
6
+ MAX_VERSION = '1.2'
7
+
8
+ def self.load(file)
9
+ gfa = GFA.new
10
+ fh = File.open(file, 'r')
11
+ fh.each { |ln| gfa << ln }
12
+ fh.close
13
+ gfa
14
+ end
15
+
16
+ def self.supported_version?(v)
17
+ v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
18
+ end
19
+
20
+ # Instance-level
21
+ def <<(obj)
22
+ obj = parse_line(obj) unless obj.is_a? GFA::Record
23
+ return if obj.nil? || obj.empty?
24
+ @records[obj.type] << obj
25
+
26
+ if obj.type == :Header && !obj.fields[:VN].nil?
27
+ set_gfa_version(obj.fields[:VN].value)
28
+ end
29
+ end
30
+
31
+ def set_gfa_version(v)
32
+ @gfa_version = v
33
+ unless GFA::supported_version? gfa_version
34
+ raise "GFA version currently unsupported: #{v}."
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def parse_line(ln)
41
+ ln.chomp!
42
+ return nil if ln =~ /^\s*$/
43
+ cols = ln.split("\t")
44
+ GFA::Record.code_class(cols.shift).new(*cols)
45
+ end
46
+ end
@@ -0,0 +1,10 @@
1
+ class GFA::Record::Comment < GFA::Record
2
+ CODE = :'#'
3
+ REQ_FIELDS = []
4
+ OPT_FIELDS = {}
5
+
6
+ def initialize(*opt_fields)
7
+ @fields = {}
8
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
9
+ end
10
+ end
@@ -0,0 +1,29 @@
1
+ class GFA::Record::Containment < GFA::Record
2
+ CODE = :C
3
+ REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
4
+ OPT_FIELDS = {
5
+ RC: :i, # Read coverage
6
+ NM: :i, # Number of mismatches/gaps
7
+ ID: :Z # Edge identifier
8
+ }
9
+
10
+ REQ_FIELDS.each_index do |i|
11
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
12
+ end
13
+
14
+ alias container from
15
+ alias container_orient from_orient
16
+ alias contained to
17
+ alias contained_orient to_orient
18
+
19
+ def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
20
+ @fields = {}
21
+ add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
22
+ add_field(3, :Z, from_orient, /^+|-$/)
23
+ add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
24
+ add_field(5, :Z, to_orient, /^+|-$/)
25
+ add_field(6, :i, pos, /^[0-9]*$/)
26
+ add_field(7, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
27
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
28
+ end
29
+ end
@@ -0,0 +1,12 @@
1
+ class GFA::Record::Header < GFA::Record
2
+ CODE = :H
3
+ REQ_FIELDS = []
4
+ OPT_FIELDS = {
5
+ VN: :Z # Version number
6
+ }
7
+
8
+ def initialize(*opt_fields)
9
+ @fields = {}
10
+ opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
11
+ end
12
+ end
@@ -0,0 +1,45 @@
1
+ class GFA::Record::Jump < GFA::Record
2
+ CODE = :J
3
+ REQ_FIELDS = %i[from from_orient to to_orient distance]
4
+ OPT_FIELDS = {
5
+ SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
6
+ }
7
+
8
+ REQ_FIELDS.each_index do |i|
9
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
10
+ end
11
+
12
+ def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
13
+ @fields = {}
14
+ add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
15
+ add_field(3, :Z, from_orient, /^+|-$/)
16
+ add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
17
+ add_field(5, :Z, to_orient, /^+|-$/)
18
+ add_field(6, :Z, distance, /^\*|[-+]?[0-9]+$/)
19
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
20
+ end
21
+
22
+
23
+ def from?(segment, orient = nil)
24
+ links_from_to?(segment, orient, true)
25
+ end
26
+
27
+ def to?(segment, orient = nil)
28
+ links_from_to?(segment, orient, false)
29
+ end
30
+
31
+ private
32
+
33
+ def links_from_to?(segment, orient, from)
34
+ segment = segment_name(segment)
35
+ orient = orient.value if orient.is_a? GFA::Field
36
+ base_k = from ? 2 : 4
37
+ segment==fields[base_k].value &&
38
+ (orient.nil? || orient==fields[base_k + 1].value)
39
+ end
40
+
41
+ def segment_name(segment)
42
+ segment.is_a?(GFA::Record::Segment) ? segment.name.value :
43
+ segment.is_a?(GFA::Field) ? segment.value : segment
44
+ end
45
+ end
@@ -0,0 +1,49 @@
1
+ class GFA::Record::Link < GFA::Record
2
+ CODE = :L
3
+ REQ_FIELDS = %i[from from_orient to to_orient overlap]
4
+ OPT_FIELDS = {
5
+ MQ: :i, # Mapping quality
6
+ NM: :i, # Number of mismatches/gaps
7
+ EC: :i, # Read count
8
+ FC: :i, # Fragment count
9
+ KC: :i, # k-mer count
10
+ ID: :Z # Edge identifier
11
+ }
12
+
13
+ REQ_FIELDS.each_index do |i|
14
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
15
+ end
16
+
17
+ def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
18
+ @fields = {}
19
+ add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
20
+ add_field(3, :Z, from_orient, /^+|-$/)
21
+ add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
22
+ add_field(5, :Z, to_orient, /^+|-$/)
23
+ add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
24
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
25
+ end
26
+
27
+ def from?(segment, orient = nil)
28
+ links_from_to?(segment, orient, true)
29
+ end
30
+
31
+ def to?(segment, orient = nil)
32
+ links_from_to?(segment, orient, false)
33
+ end
34
+
35
+ private
36
+
37
+ def links_from_to?(segment, orient, from)
38
+ segment = segment_name(segment)
39
+ orient = orient.value if orient.is_a? GFA::Field
40
+ base_k = from ? 2 : 4
41
+ segment==fields[base_k].value &&
42
+ (orient.nil? || orient==fields[base_k + 1].value)
43
+ end
44
+
45
+ def segment_name(segment)
46
+ segment.is_a?(GFA::Record::Segment) ? segment.name.value :
47
+ segment.is_a?(GFA::Field) ? segment.value : segment
48
+ end
49
+ end
@@ -0,0 +1,19 @@
1
+ class GFA::Record::Path < GFA::Record
2
+ CODE = :P
3
+ REQ_FIELDS = %i[path_name segment_name cigar]
4
+ OPT_FIELDS = {}
5
+
6
+ REQ_FIELDS.each_index do |i|
7
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
+ end
9
+
10
+ alias overlaps cigar
11
+
12
+ def initialize(path_name, segment_name, cigar, *opt_fields)
13
+ @fields = {}
14
+ add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
15
+ add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
16
+ add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
17
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
18
+ end
19
+ end
@@ -0,0 +1,25 @@
1
+ class GFA::Record::Segment < GFA::Record
2
+ CODE = :S
3
+ REQ_FIELDS = %i[name sequence]
4
+ OPT_FIELDS = {
5
+ LN: :i, # Segment length
6
+ RC: :i, # Read count
7
+ FC: :i, # Fragment count
8
+ KC: :i, # k-mer count
9
+ SH: :H, # SHA-256 checksum of the sequence
10
+ UR: :Z, # URI or local file-system path of the sequence
11
+ # Non-cannonical
12
+ DP: :f # (From SAM)
13
+ }
14
+
15
+ REQ_FIELDS.each_index do |i|
16
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
17
+ end
18
+
19
+ def initialize(name, sequence, *opt_fields)
20
+ @fields = {}
21
+ add_field(2, :Z, name, /^[!-)+-<>-~][!-~]*$/)
22
+ add_field(3, :Z, sequence, /^\*|[A-Za-z=.]+$/)
23
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
24
+ end
25
+ end
@@ -0,0 +1,20 @@
1
+ class GFA::Record::Walk < GFA::Record
2
+ CODE = :W
3
+ REQ_FIELDS = %i[sample_id hap_index seq_id seq_start seq_end walk]
4
+ OPT_FIELDS = {}
5
+
6
+ REQ_FIELDS.each_index do |i|
7
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
+ end
9
+
10
+ def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
11
+ @fields = {}
12
+ add_field(2, :Z, sample_id, /^[!-)+-<>-~][!-~]*$/)
13
+ add_field(3, :i, hap_index, /^[0-9]+$/)
14
+ add_field(4, :Z, seq_id, /^[!-)+-<>-~][!-~]*$/)
15
+ add_field(5, :i, seq_start, /^\*|[0-9]+$/)
16
+ add_field(6, :i, seq_end, /^\*|[0-9]+$/)
17
+ add_field(7, :Z, walk, /^([><][!-;=?-~]+)+$/)
18
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
19
+ end
20
+ end
data/lib/gfa/record.rb ADDED
@@ -0,0 +1,96 @@
1
+ class GFA::Record
2
+ # Class-level
3
+ CODES = {
4
+ :'#' => :Comment,
5
+ H: :Header,
6
+ S: :Segment,
7
+ L: :Link,
8
+ J: :Jump, # Since 1.2
9
+ C: :Containment,
10
+ P: :Path,
11
+ W: :Walk # Since 1.1
12
+ }
13
+ REQ_FIELDS = []
14
+ OPT_FIELDS = {}
15
+ TYPES = CODES.values
16
+ TYPES.each { |t| require "gfa/record/#{t.downcase}" }
17
+
18
+ [:CODES, :REQ_FIELDS, :OPT_FIELDS, :TYPES].each do |x|
19
+ define_singleton_method(x) { const_get(x) }
20
+ end
21
+
22
+ def self.code_class(code)
23
+ name = CODES[code.to_sym]
24
+ raise "Unknown record type: #{code}." if name.nil?
25
+ name_class(name)
26
+ end
27
+
28
+ def self.name_class(name)
29
+ const_get(name)
30
+ end
31
+
32
+ # Instance-level
33
+
34
+ attr :fields
35
+
36
+ def [](k) fields[k] ; end
37
+
38
+ def type ; CODES[code] ; end
39
+
40
+ def code ; self.class.const_get(:CODE) ; end
41
+
42
+ def empty? ; fields.empty? ; end
43
+
44
+ def to_s
45
+ o = [code.to_s]
46
+ self.class.REQ_FIELDS.each_index do |i|
47
+ o << fields[i + 2].to_s(false)
48
+ end
49
+ fields.each do |k, v|
50
+ next if k.is_a? Integer
51
+ o << "#{k}:#{v}"
52
+ end
53
+ o.join("\t")
54
+ end
55
+
56
+ def hash
57
+ { code => fields }.hash
58
+ end
59
+
60
+ def eql?(rec)
61
+ hash == rec.hash
62
+ end
63
+
64
+ alias == eql?
65
+
66
+ private
67
+
68
+ def add_field(f_tag, f_type, f_value, format = nil)
69
+ unless format.nil?
70
+ msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
71
+ GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
72
+ end
73
+
74
+ @fields[ f_tag ] = GFA::Field.code_class(f_type).new(f_value)
75
+ end
76
+
77
+ def add_opt_field(f, known)
78
+ m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
79
+ raise "Cannot parse field: '#{f}'." unless m
80
+
81
+ f_tag = m[1].to_sym
82
+ f_type = m[2].to_sym
83
+ f_value = m[3]
84
+
85
+ if known[f_tag].nil? && f_tag =~ /^[A-Z]+$/
86
+ raise "Unknown reserved tag #{f_tag} for a #{type} record."
87
+ end
88
+
89
+ unless known[f_tag].nil? || known[f_tag] == f_type
90
+ raise "Wrong field type #{f_type} for a #{f_tag} tag," \
91
+ " expected #{known[f_tag]}"
92
+ end
93
+
94
+ add_field(f_tag, f_type, f_value)
95
+ end
96
+ end
@@ -0,0 +1,7 @@
1
+ class GFA
2
+ VERSION = '0.2.0'
3
+ VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
6
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
7
+ end
data/lib/gfa.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'gfa/common'
2
+ require 'gfa/parser'
3
+ require 'gfa/generator'
4
+ require 'gfa/graph'
@@ -0,0 +1,34 @@
1
+ require "test_helper"
2
+
3
+ class CommonTest < Test::Unit::TestCase
4
+
5
+ def test_assert_format
6
+ assert_raise do
7
+ GFA.assert_format("tsooq", /^.$/, "Not a char")
8
+ end
9
+ assert_nothing_raised do
10
+ GFA.assert_format("z", /^.$/, "Not a char")
11
+ end
12
+ end
13
+
14
+ def test_empty
15
+ gfa = GFA.new
16
+ assert(gfa.empty?)
17
+ assert_equal(GFA.new, gfa)
18
+ end
19
+
20
+ def test_record_getters
21
+ gfa = GFA.new
22
+ assert_respond_to(gfa, :headers)
23
+ assert_equal([], gfa.links)
24
+ assert_nil( gfa.segment(0) )
25
+ end
26
+
27
+ def test_record_setters
28
+ gfa = GFA.new
29
+ assert_respond_to(gfa, :add_path)
30
+ gfa.add_containment("zooq")
31
+ assert_equal("zooq", gfa.records[:Containment].first)
32
+ end
33
+
34
+ end
@@ -0,0 +1,50 @@
1
+ require "test_helper"
2
+
3
+ class FieldTest < Test::Unit::TestCase
4
+
5
+ def test_char
6
+ f = GFA::Field::Char.new("%")
7
+ assert_equal("%", f.value)
8
+ assert_raise do
9
+ GFA::Field::Char.new(" ")
10
+ end
11
+ assert_raise do
12
+ GFA::Field::Char.new("")
13
+ end
14
+ assert_raise do
15
+ GFA::Field::Char.new("^.^")
16
+ end
17
+ end
18
+
19
+ def test_sigint
20
+ end
21
+
22
+ def test_float
23
+ f = GFA::Field::Float.new("1.3e-5")
24
+ assert_equal(1.3e-5, f.value)
25
+ assert_raise do
26
+ GFA::Field::Float.new("e-5")
27
+ end
28
+ end
29
+
30
+ def test_string
31
+ end
32
+
33
+ def test_hex
34
+ f = GFA::Field::Hex.new("C3F0")
35
+ assert_equal("C3F0", f.value)
36
+ assert_raise do
37
+ GFA::Field::Hex.new("C3PO")
38
+ end
39
+ end
40
+
41
+ def test_numarray
42
+ f = GFA::Field::NumArray.new("i,1,2,3")
43
+ assert_equal(%w[1 2 3], f.array)
44
+ assert_equal("i", f.modifier)
45
+ assert_raise do
46
+ GFA::Field::NumArray.new("c,1,e,3")
47
+ end
48
+ end
49
+
50
+ end
@@ -0,0 +1,47 @@
1
+ require "test_helper"
2
+ require "gfa/parser"
3
+
4
+ class ParserTest < Test::Unit::TestCase
5
+
6
+ def test_load
7
+ sample_f = File.expand_path('../fixtures/sample.gfa', __FILE__)
8
+ assert_respond_to(GFA, :load)
9
+ pre_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
10
+ sample = GFA.load(sample_f)
11
+ post_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
12
+ assert_equal(pre_fhs, post_fhs)
13
+ assert_equal(1, sample.headers.size)
14
+ assert_equal(6, sample.segments.size)
15
+ assert_equal(4, sample.links.size)
16
+ assert(sample.containments.empty?)
17
+ assert(sample.paths.empty?)
18
+ assert_respond_to(sample, :records)
19
+ end
20
+
21
+ def test_version_suppport
22
+ gfa = GFA.new
23
+ assert_raise { gfa.set_gfa_version('0.9') }
24
+ assert_raise { gfa.set_gfa_version('2.1') }
25
+ assert_nothing_raised { gfa.set_gfa_version('1.0') }
26
+ end
27
+
28
+ def test_line_by_line
29
+ gfa = GFA.new
30
+ assert_respond_to(gfa, :<<)
31
+ # Empty
32
+ gfa << ' '
33
+ assert(gfa.empty?)
34
+ gfa << 'H'
35
+ assert(gfa.empty?)
36
+ # Segment
37
+ assert_equal(0, gfa.segments.size)
38
+ gfa << "S\t1\tACTG"
39
+ assert(!gfa.empty?)
40
+ assert_equal(1, gfa.segments.size)
41
+ # Version
42
+ assert_nil(gfa.gfa_version)
43
+ gfa << GFA::Record::Header.new('VN:Z:1.0')
44
+ assert_equal('1.0', gfa.gfa_version)
45
+ end
46
+
47
+ end
@@ -0,0 +1,73 @@
1
+ require "test_helper"
2
+
3
+ class RecordTest < Test::Unit::TestCase
4
+
5
+ def setup
6
+ $rec_h = GFA::Record::Header.new("VN:Z:1.0")
7
+ $rec_p = GFA::Record::Path.new("a", "b", "*")
8
+ end
9
+
10
+ def test_class_methods
11
+ assert_respond_to(GFA::Record, :CODES)
12
+ assert_respond_to(GFA::Record, :TYPES)
13
+ end
14
+
15
+ def test_to_s
16
+ assert_equal("H\tVN:Z:1.0", $rec_h.to_s)
17
+ assert_equal("P\ta\tb\t*", $rec_p.to_s)
18
+ end
19
+
20
+ def test_hash
21
+ other_h = GFA::Record::Header.new("VN:Z:1.0")
22
+ assert_equal($rec_h.hash, other_h.hash)
23
+ assert_equal($rec_h, other_h)
24
+ end
25
+
26
+ def test_reserved_fields
27
+ assert_nothing_raised do
28
+ GFA::Record::Path.new("a", "b", "*", "smile:Z:(-:")
29
+ GFA::Record::Header.new("Ooo:i:3")
30
+ GFA::Record::Header.new("oOo:i:2")
31
+ GFA::Record::Header.new("ooO:i:1")
32
+ end
33
+ assert_raise do
34
+ GFA::Record::Header.new("OOPS:i:3")
35
+ end
36
+ end
37
+
38
+ def test_header
39
+ end
40
+
41
+ def test_segment
42
+ end
43
+
44
+ def test_link
45
+ l = GFA::Record::Link.new("Seg1","+","Seg2","-","*","NM:i:123")
46
+ assert_equal("+", l.from_orient.value)
47
+ assert_equal(123, l[:NM].value)
48
+ assert(l.from?("Seg1"))
49
+ assert(l.from?("Seg1", "+"))
50
+ assert(l.to?("Seg2", "-"))
51
+ assert(! l.from?("Seg2"))
52
+ assert(! l.from?("Seg1", "-"))
53
+ end
54
+
55
+ def test_containment
56
+ assert_raise do
57
+ GFA::Record::Containment.new("Seg1","+","Seg2","-","*","RC:i:123")
58
+ end
59
+ c = GFA::Record::Containment.new("Seg1","+","Seg2","-","10","*","RC:i:123")
60
+ assert_equal("+", c.from_orient.value)
61
+ assert_equal(10, c.pos.value)
62
+ assert_equal(123, c[:RC].value)
63
+ end
64
+
65
+ def test_path
66
+ assert_raise do
67
+ GFA::Record::Path.new("PathA","SegB\t","*")
68
+ end
69
+ p = GFA::Record::Path.new("PathA","SegB","*")
70
+ assert_equal("*", p.cigar.value)
71
+ end
72
+
73
+ end
@@ -0,0 +1,6 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ require 'rubygems'
5
+ require 'test/unit'
6
+ require 'gfa/common'