gfa 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/gfa/parser.rb ADDED
@@ -0,0 +1,46 @@
1
+ require 'gfa/record'
2
+
3
+ class GFA
4
+ # Class-level
5
+ MIN_VERSION = '1.0'
6
+ MAX_VERSION = '1.2'
7
+
8
+ def self.load(file)
9
+ gfa = GFA.new
10
+ fh = File.open(file, 'r')
11
+ fh.each { |ln| gfa << ln }
12
+ fh.close
13
+ gfa
14
+ end
15
+
16
+ def self.supported_version?(v)
17
+ v.to_f >= MIN_VERSION.to_f and v.to_f <= MAX_VERSION.to_f
18
+ end
19
+
20
+ # Instance-level
21
+ def <<(obj)
22
+ obj = parse_line(obj) unless obj.is_a? GFA::Record
23
+ return if obj.nil? || obj.empty?
24
+ @records[obj.type] << obj
25
+
26
+ if obj.type == :Header && !obj.fields[:VN].nil?
27
+ set_gfa_version(obj.fields[:VN].value)
28
+ end
29
+ end
30
+
31
+ def set_gfa_version(v)
32
+ @gfa_version = v
33
+ unless GFA::supported_version? gfa_version
34
+ raise "GFA version currently unsupported: #{v}."
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def parse_line(ln)
41
+ ln.chomp!
42
+ return nil if ln =~ /^\s*$/
43
+ cols = ln.split("\t")
44
+ GFA::Record.code_class(cols.shift).new(*cols)
45
+ end
46
+ end
@@ -0,0 +1,10 @@
1
+ class GFA::Record::Comment < GFA::Record
2
+ CODE = :'#'
3
+ REQ_FIELDS = []
4
+ OPT_FIELDS = {}
5
+
6
+ def initialize(*opt_fields)
7
+ @fields = {}
8
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
9
+ end
10
+ end
@@ -0,0 +1,29 @@
1
+ class GFA::Record::Containment < GFA::Record
2
+ CODE = :C
3
+ REQ_FIELDS = %i[from from_orient to to_orient pos overlap]
4
+ OPT_FIELDS = {
5
+ RC: :i, # Read coverage
6
+ NM: :i, # Number of mismatches/gaps
7
+ ID: :Z # Edge identifier
8
+ }
9
+
10
+ REQ_FIELDS.each_index do |i|
11
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
12
+ end
13
+
14
+ alias container from
15
+ alias container_orient from_orient
16
+ alias contained to
17
+ alias contained_orient to_orient
18
+
19
+ def initialize(from, from_orient, to, to_orient, pos, overlap, *opt_fields)
20
+ @fields = {}
21
+ add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
22
+ add_field(3, :Z, from_orient, /^+|-$/)
23
+ add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
24
+ add_field(5, :Z, to_orient, /^+|-$/)
25
+ add_field(6, :i, pos, /^[0-9]*$/)
26
+ add_field(7, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
27
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
28
+ end
29
+ end
@@ -0,0 +1,12 @@
1
+ class GFA::Record::Header < GFA::Record
2
+ CODE = :H
3
+ REQ_FIELDS = []
4
+ OPT_FIELDS = {
5
+ VN: :Z # Version number
6
+ }
7
+
8
+ def initialize(*opt_fields)
9
+ @fields = {}
10
+ opt_fields.each{ |f| add_opt_field(f, OPT_FIELDS) }
11
+ end
12
+ end
@@ -0,0 +1,45 @@
1
+ class GFA::Record::Jump < GFA::Record
2
+ CODE = :J
3
+ REQ_FIELDS = %i[from from_orient to to_orient distance]
4
+ OPT_FIELDS = {
5
+ SC: :i # 1 indicates indirect shortcut connections. Only 0/1 allowed.
6
+ }
7
+
8
+ REQ_FIELDS.each_index do |i|
9
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
10
+ end
11
+
12
+ def initialize(from, from_orient, to, to_orient, distance, *opt_fields)
13
+ @fields = {}
14
+ add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
15
+ add_field(3, :Z, from_orient, /^+|-$/)
16
+ add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
17
+ add_field(5, :Z, to_orient, /^+|-$/)
18
+ add_field(6, :Z, distance, /^\*|[-+]?[0-9]+$/)
19
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
20
+ end
21
+
22
+
23
+ def from?(segment, orient = nil)
24
+ links_from_to?(segment, orient, true)
25
+ end
26
+
27
+ def to?(segment, orient = nil)
28
+ links_from_to?(segment, orient, false)
29
+ end
30
+
31
+ private
32
+
33
+ def links_from_to?(segment, orient, from)
34
+ segment = segment_name(segment)
35
+ orient = orient.value if orient.is_a? GFA::Field
36
+ base_k = from ? 2 : 4
37
+ segment==fields[base_k].value &&
38
+ (orient.nil? || orient==fields[base_k + 1].value)
39
+ end
40
+
41
+ def segment_name(segment)
42
+ segment.is_a?(GFA::Record::Segment) ? segment.name.value :
43
+ segment.is_a?(GFA::Field) ? segment.value : segment
44
+ end
45
+ end
@@ -0,0 +1,49 @@
1
+ class GFA::Record::Link < GFA::Record
2
+ CODE = :L
3
+ REQ_FIELDS = %i[from from_orient to to_orient overlap]
4
+ OPT_FIELDS = {
5
+ MQ: :i, # Mapping quality
6
+ NM: :i, # Number of mismatches/gaps
7
+ EC: :i, # Read count
8
+ FC: :i, # Fragment count
9
+ KC: :i, # k-mer count
10
+ ID: :Z # Edge identifier
11
+ }
12
+
13
+ REQ_FIELDS.each_index do |i|
14
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
15
+ end
16
+
17
+ def initialize(from, from_orient, to, to_orient, overlap, *opt_fields)
18
+ @fields = {}
19
+ add_field(2, :Z, from, /^[!-)+-<>-~][!-~]*$/)
20
+ add_field(3, :Z, from_orient, /^+|-$/)
21
+ add_field(4, :Z, to, /^[!-)+-<>-~][!-~]*$/)
22
+ add_field(5, :Z, to_orient, /^+|-$/)
23
+ add_field(6, :Z, overlap, /^\*|([0-9]+[MIDNSHPX=])+$/)
24
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
25
+ end
26
+
27
+ def from?(segment, orient = nil)
28
+ links_from_to?(segment, orient, true)
29
+ end
30
+
31
+ def to?(segment, orient = nil)
32
+ links_from_to?(segment, orient, false)
33
+ end
34
+
35
+ private
36
+
37
+ def links_from_to?(segment, orient, from)
38
+ segment = segment_name(segment)
39
+ orient = orient.value if orient.is_a? GFA::Field
40
+ base_k = from ? 2 : 4
41
+ segment==fields[base_k].value &&
42
+ (orient.nil? || orient==fields[base_k + 1].value)
43
+ end
44
+
45
+ def segment_name(segment)
46
+ segment.is_a?(GFA::Record::Segment) ? segment.name.value :
47
+ segment.is_a?(GFA::Field) ? segment.value : segment
48
+ end
49
+ end
@@ -0,0 +1,19 @@
1
+ class GFA::Record::Path < GFA::Record
2
+ CODE = :P
3
+ REQ_FIELDS = %i[path_name segment_name cigar]
4
+ OPT_FIELDS = {}
5
+
6
+ REQ_FIELDS.each_index do |i|
7
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
+ end
9
+
10
+ alias overlaps cigar
11
+
12
+ def initialize(path_name, segment_name, cigar, *opt_fields)
13
+ @fields = {}
14
+ add_field(2, :Z, path_name, /^[!-)+-<>-~][!-~]*$/)
15
+ add_field(3, :Z, segment_name, /^[!-)+-<>-~][!-~]*$/)
16
+ add_field(4, :Z, cigar, /^\*|([0-9]+[MIDNSHPX=])+$/)
17
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
18
+ end
19
+ end
@@ -0,0 +1,25 @@
1
+ class GFA::Record::Segment < GFA::Record
2
+ CODE = :S
3
+ REQ_FIELDS = %i[name sequence]
4
+ OPT_FIELDS = {
5
+ LN: :i, # Segment length
6
+ RC: :i, # Read count
7
+ FC: :i, # Fragment count
8
+ KC: :i, # k-mer count
9
+ SH: :H, # SHA-256 checksum of the sequence
10
+ UR: :Z, # URI or local file-system path of the sequence
11
+ # Non-cannonical
12
+ DP: :f # (From SAM)
13
+ }
14
+
15
+ REQ_FIELDS.each_index do |i|
16
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
17
+ end
18
+
19
+ def initialize(name, sequence, *opt_fields)
20
+ @fields = {}
21
+ add_field(2, :Z, name, /^[!-)+-<>-~][!-~]*$/)
22
+ add_field(3, :Z, sequence, /^\*|[A-Za-z=.]+$/)
23
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
24
+ end
25
+ end
@@ -0,0 +1,20 @@
1
+ class GFA::Record::Walk < GFA::Record
2
+ CODE = :W
3
+ REQ_FIELDS = %i[sample_id hap_index seq_id seq_start seq_end walk]
4
+ OPT_FIELDS = {}
5
+
6
+ REQ_FIELDS.each_index do |i|
7
+ define_method(REQ_FIELDS[i]) { fields[i + 2] }
8
+ end
9
+
10
+ def initialize(sample_id, hap_index, seq_id, seq_start, seq_end, walk, *opt_fields)
11
+ @fields = {}
12
+ add_field(2, :Z, sample_id, /^[!-)+-<>-~][!-~]*$/)
13
+ add_field(3, :i, hap_index, /^[0-9]+$/)
14
+ add_field(4, :Z, seq_id, /^[!-)+-<>-~][!-~]*$/)
15
+ add_field(5, :i, seq_start, /^\*|[0-9]+$/)
16
+ add_field(6, :i, seq_end, /^\*|[0-9]+$/)
17
+ add_field(7, :Z, walk, /^([><][!-;=?-~]+)+$/)
18
+ opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
19
+ end
20
+ end
data/lib/gfa/record.rb ADDED
@@ -0,0 +1,96 @@
1
+ class GFA::Record
2
+ # Class-level
3
+ CODES = {
4
+ :'#' => :Comment,
5
+ H: :Header,
6
+ S: :Segment,
7
+ L: :Link,
8
+ J: :Jump, # Since 1.2
9
+ C: :Containment,
10
+ P: :Path,
11
+ W: :Walk # Since 1.1
12
+ }
13
+ REQ_FIELDS = []
14
+ OPT_FIELDS = {}
15
+ TYPES = CODES.values
16
+ TYPES.each { |t| require "gfa/record/#{t.downcase}" }
17
+
18
+ [:CODES, :REQ_FIELDS, :OPT_FIELDS, :TYPES].each do |x|
19
+ define_singleton_method(x) { const_get(x) }
20
+ end
21
+
22
+ def self.code_class(code)
23
+ name = CODES[code.to_sym]
24
+ raise "Unknown record type: #{code}." if name.nil?
25
+ name_class(name)
26
+ end
27
+
28
+ def self.name_class(name)
29
+ const_get(name)
30
+ end
31
+
32
+ # Instance-level
33
+
34
+ attr :fields
35
+
36
+ def [](k) fields[k] ; end
37
+
38
+ def type ; CODES[code] ; end
39
+
40
+ def code ; self.class.const_get(:CODE) ; end
41
+
42
+ def empty? ; fields.empty? ; end
43
+
44
+ def to_s
45
+ o = [code.to_s]
46
+ self.class.REQ_FIELDS.each_index do |i|
47
+ o << fields[i + 2].to_s(false)
48
+ end
49
+ fields.each do |k, v|
50
+ next if k.is_a? Integer
51
+ o << "#{k}:#{v}"
52
+ end
53
+ o.join("\t")
54
+ end
55
+
56
+ def hash
57
+ { code => fields }.hash
58
+ end
59
+
60
+ def eql?(rec)
61
+ hash == rec.hash
62
+ end
63
+
64
+ alias == eql?
65
+
66
+ private
67
+
68
+ def add_field(f_tag, f_type, f_value, format = nil)
69
+ unless format.nil?
70
+ msg = (f_tag.is_a?(Integer) ? "column #{f_tag}" : "#{f_tag} field")
71
+ GFA.assert_format(f_value, format, "Bad #{type} #{msg}")
72
+ end
73
+
74
+ @fields[ f_tag ] = GFA::Field.code_class(f_type).new(f_value)
75
+ end
76
+
77
+ def add_opt_field(f, known)
78
+ m = /^([A-Za-z]+):([A-Za-z]+):(.*)$/.match(f)
79
+ raise "Cannot parse field: '#{f}'." unless m
80
+
81
+ f_tag = m[1].to_sym
82
+ f_type = m[2].to_sym
83
+ f_value = m[3]
84
+
85
+ if known[f_tag].nil? && f_tag =~ /^[A-Z]+$/
86
+ raise "Unknown reserved tag #{f_tag} for a #{type} record."
87
+ end
88
+
89
+ unless known[f_tag].nil? || known[f_tag] == f_type
90
+ raise "Wrong field type #{f_type} for a #{f_tag} tag," \
91
+ " expected #{known[f_tag]}"
92
+ end
93
+
94
+ add_field(f_tag, f_type, f_value)
95
+ end
96
+ end
@@ -0,0 +1,7 @@
1
+ class GFA
2
+ VERSION = '0.2.0'
3
+ VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
6
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
7
+ end
data/lib/gfa.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'gfa/common'
2
+ require 'gfa/parser'
3
+ require 'gfa/generator'
4
+ require 'gfa/graph'
@@ -0,0 +1,34 @@
1
+ require "test_helper"
2
+
3
+ class CommonTest < Test::Unit::TestCase
4
+
5
+ def test_assert_format
6
+ assert_raise do
7
+ GFA.assert_format("tsooq", /^.$/, "Not a char")
8
+ end
9
+ assert_nothing_raised do
10
+ GFA.assert_format("z", /^.$/, "Not a char")
11
+ end
12
+ end
13
+
14
+ def test_empty
15
+ gfa = GFA.new
16
+ assert(gfa.empty?)
17
+ assert_equal(GFA.new, gfa)
18
+ end
19
+
20
+ def test_record_getters
21
+ gfa = GFA.new
22
+ assert_respond_to(gfa, :headers)
23
+ assert_equal([], gfa.links)
24
+ assert_nil( gfa.segment(0) )
25
+ end
26
+
27
+ def test_record_setters
28
+ gfa = GFA.new
29
+ assert_respond_to(gfa, :add_path)
30
+ gfa.add_containment("zooq")
31
+ assert_equal("zooq", gfa.records[:Containment].first)
32
+ end
33
+
34
+ end
@@ -0,0 +1,50 @@
1
+ require "test_helper"
2
+
3
+ class FieldTest < Test::Unit::TestCase
4
+
5
+ def test_char
6
+ f = GFA::Field::Char.new("%")
7
+ assert_equal("%", f.value)
8
+ assert_raise do
9
+ GFA::Field::Char.new(" ")
10
+ end
11
+ assert_raise do
12
+ GFA::Field::Char.new("")
13
+ end
14
+ assert_raise do
15
+ GFA::Field::Char.new("^.^")
16
+ end
17
+ end
18
+
19
+ def test_sigint
20
+ end
21
+
22
+ def test_float
23
+ f = GFA::Field::Float.new("1.3e-5")
24
+ assert_equal(1.3e-5, f.value)
25
+ assert_raise do
26
+ GFA::Field::Float.new("e-5")
27
+ end
28
+ end
29
+
30
+ def test_string
31
+ end
32
+
33
+ def test_hex
34
+ f = GFA::Field::Hex.new("C3F0")
35
+ assert_equal("C3F0", f.value)
36
+ assert_raise do
37
+ GFA::Field::Hex.new("C3PO")
38
+ end
39
+ end
40
+
41
+ def test_numarray
42
+ f = GFA::Field::NumArray.new("i,1,2,3")
43
+ assert_equal(%w[1 2 3], f.array)
44
+ assert_equal("i", f.modifier)
45
+ assert_raise do
46
+ GFA::Field::NumArray.new("c,1,e,3")
47
+ end
48
+ end
49
+
50
+ end
@@ -0,0 +1,47 @@
1
+ require "test_helper"
2
+ require "gfa/parser"
3
+
4
+ class ParserTest < Test::Unit::TestCase
5
+
6
+ def test_load
7
+ sample_f = File.expand_path('../fixtures/sample.gfa', __FILE__)
8
+ assert_respond_to(GFA, :load)
9
+ pre_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
10
+ sample = GFA.load(sample_f)
11
+ post_fhs = ObjectSpace.each_object(IO).count{ |i| not i.closed? }
12
+ assert_equal(pre_fhs, post_fhs)
13
+ assert_equal(1, sample.headers.size)
14
+ assert_equal(6, sample.segments.size)
15
+ assert_equal(4, sample.links.size)
16
+ assert(sample.containments.empty?)
17
+ assert(sample.paths.empty?)
18
+ assert_respond_to(sample, :records)
19
+ end
20
+
21
+ def test_version_suppport
22
+ gfa = GFA.new
23
+ assert_raise { gfa.set_gfa_version('0.9') }
24
+ assert_raise { gfa.set_gfa_version('2.1') }
25
+ assert_nothing_raised { gfa.set_gfa_version('1.0') }
26
+ end
27
+
28
+ def test_line_by_line
29
+ gfa = GFA.new
30
+ assert_respond_to(gfa, :<<)
31
+ # Empty
32
+ gfa << ' '
33
+ assert(gfa.empty?)
34
+ gfa << 'H'
35
+ assert(gfa.empty?)
36
+ # Segment
37
+ assert_equal(0, gfa.segments.size)
38
+ gfa << "S\t1\tACTG"
39
+ assert(!gfa.empty?)
40
+ assert_equal(1, gfa.segments.size)
41
+ # Version
42
+ assert_nil(gfa.gfa_version)
43
+ gfa << GFA::Record::Header.new('VN:Z:1.0')
44
+ assert_equal('1.0', gfa.gfa_version)
45
+ end
46
+
47
+ end
@@ -0,0 +1,73 @@
1
+ require "test_helper"
2
+
3
+ class RecordTest < Test::Unit::TestCase
4
+
5
+ def setup
6
+ $rec_h = GFA::Record::Header.new("VN:Z:1.0")
7
+ $rec_p = GFA::Record::Path.new("a", "b", "*")
8
+ end
9
+
10
+ def test_class_methods
11
+ assert_respond_to(GFA::Record, :CODES)
12
+ assert_respond_to(GFA::Record, :TYPES)
13
+ end
14
+
15
+ def test_to_s
16
+ assert_equal("H\tVN:Z:1.0", $rec_h.to_s)
17
+ assert_equal("P\ta\tb\t*", $rec_p.to_s)
18
+ end
19
+
20
+ def test_hash
21
+ other_h = GFA::Record::Header.new("VN:Z:1.0")
22
+ assert_equal($rec_h.hash, other_h.hash)
23
+ assert_equal($rec_h, other_h)
24
+ end
25
+
26
+ def test_reserved_fields
27
+ assert_nothing_raised do
28
+ GFA::Record::Path.new("a", "b", "*", "smile:Z:(-:")
29
+ GFA::Record::Header.new("Ooo:i:3")
30
+ GFA::Record::Header.new("oOo:i:2")
31
+ GFA::Record::Header.new("ooO:i:1")
32
+ end
33
+ assert_raise do
34
+ GFA::Record::Header.new("OOPS:i:3")
35
+ end
36
+ end
37
+
38
+ def test_header
39
+ end
40
+
41
+ def test_segment
42
+ end
43
+
44
+ def test_link
45
+ l = GFA::Record::Link.new("Seg1","+","Seg2","-","*","NM:i:123")
46
+ assert_equal("+", l.from_orient.value)
47
+ assert_equal(123, l[:NM].value)
48
+ assert(l.from?("Seg1"))
49
+ assert(l.from?("Seg1", "+"))
50
+ assert(l.to?("Seg2", "-"))
51
+ assert(! l.from?("Seg2"))
52
+ assert(! l.from?("Seg1", "-"))
53
+ end
54
+
55
+ def test_containment
56
+ assert_raise do
57
+ GFA::Record::Containment.new("Seg1","+","Seg2","-","*","RC:i:123")
58
+ end
59
+ c = GFA::Record::Containment.new("Seg1","+","Seg2","-","10","*","RC:i:123")
60
+ assert_equal("+", c.from_orient.value)
61
+ assert_equal(10, c.pos.value)
62
+ assert_equal(123, c[:RC].value)
63
+ end
64
+
65
+ def test_path
66
+ assert_raise do
67
+ GFA::Record::Path.new("PathA","SegB\t","*")
68
+ end
69
+ p = GFA::Record::Path.new("PathA","SegB","*")
70
+ assert_equal("*", p.cigar.value)
71
+ end
72
+
73
+ end
@@ -0,0 +1,6 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ require 'rubygems'
5
+ require 'test/unit'
6
+ require 'gfa/common'