bio-gff3 0.8.4 → 0.8.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -5
- data/Gemfile.lock +4 -0
- data/Rakefile +12 -10
- data/VERSION +1 -1
- data/bin/gff3-fetch +104 -47
- data/bio-gff3.gemspec +40 -12
- data/lib/bio-gff3.rb +29 -1
- data/lib/bio/db/gff/{gffinmemory.rb → digest/gffinmemory.rb} +15 -2
- data/lib/bio/db/gff/{gffnocache.rb → digest/gffnocache.rb} +2 -0
- data/lib/bio/db/gff/{gffparser.rb → digest/gffparser.rb} +40 -49
- data/lib/bio/db/gff/{gfffasta.rb → file/gfffasta.rb} +1 -1
- data/lib/bio/db/gff/{gfffileiterator.rb → file/gfffileiterator.rb} +3 -1
- data/lib/bio/db/gff/{gffdb.rb → gff3.rb} +6 -11
- data/lib/bio/db/gff/gff3fasta.rb +25 -0
- data/lib/bio/db/gff/gff3parsefile.rb +33 -0
- data/lib/bio/db/gff/gff3parserec.rb +63 -0
- data/lib/bio/db/gff/gffcomponent.rb +94 -0
- data/lib/bio/db/gff/gffrecord.rb +71 -0
- data/lib/bio/db/gff/gffsection.rb +41 -0
- data/lib/bio/db/gff/gffsequence.rb +160 -0
- data/lib/bio/db/gff/gffvalidate.rb +90 -0
- data/lib/bio/output/gfflogger.rb +33 -0
- data/spec/gff3_assemble2_spec.rb +3 -3
- data/spec/gff3_assemble3_spec.rb +4 -4
- data/spec/gff3_assemble_spec.rb +3 -3
- data/spec/gffdb_spec.rb +15 -15
- data/spec/gffparserec.rb +44 -0
- data/test/data/gff/test-cds.gff3 +3 -7
- data/test/data/regression/test_ext_gff3.rtest +61 -0
- data/test/data/regression/test_gff3.rtest +65 -0
- data/test/data/regression/test_nocache_ext_gff3.rtest +56 -0
- data/test/data/regression/test_nocache_gff3.rtest +65 -0
- data/test/regressiontest.rb +52 -0
- data/test/test_bio-gff3.rb +34 -4
- metadata +103 -29
- data/lib/bio/db/gff/gffassemble.rb +0 -341
data/spec/gffdb_spec.rb
CHANGED
@@ -42,44 +42,44 @@ def iterators_should_be_implemented
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
describe
|
45
|
+
describe GFF3, "GFF3 API (InMemory) with everything in memory" do
|
46
46
|
|
47
47
|
before :all do
|
48
48
|
# initialize
|
49
|
-
|
50
|
-
@gff =
|
49
|
+
gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1)
|
50
|
+
@gff = gff3.assembler
|
51
51
|
end
|
52
52
|
|
53
53
|
iterators_should_be_implemented
|
54
54
|
end
|
55
55
|
|
56
|
-
describe
|
56
|
+
describe GFF3, "GFF3 API with :cache_components => 1000, :cache_records => :cache_none" do
|
57
57
|
# iterators_should_be_implemented
|
58
58
|
end
|
59
59
|
|
60
|
-
describe
|
60
|
+
describe GFF3, "GFF3 API with :cache_components => 1000, :cache_records => 1000" do
|
61
61
|
it "should implement real caching"
|
62
62
|
# iterators_should_be_implemented
|
63
63
|
end
|
64
64
|
|
65
|
-
describe
|
65
|
+
describe GFF3, "GFF3 API with :cache_records => :cache_none" do
|
66
66
|
# iterators_should_be_implemented
|
67
67
|
end
|
68
68
|
|
69
|
-
describe
|
69
|
+
describe GFF3, "GFF3 API (NoCache) with :cache_components => :cache_none, :cache_records => :cache_none" do
|
70
70
|
before :all do
|
71
71
|
# initialize
|
72
|
-
|
73
|
-
@gff =
|
72
|
+
gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1, :cache_components => :cache_none, :cache_records => :cache_none)
|
73
|
+
@gff = gff3.assembler
|
74
74
|
end
|
75
75
|
|
76
76
|
iterators_should_be_implemented
|
77
77
|
end
|
78
78
|
|
79
|
-
describe
|
79
|
+
describe GFF3, "GFF3 API (InMemory) with external FASTA" do
|
80
80
|
before :all do
|
81
|
-
|
82
|
-
@gff =
|
81
|
+
gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1EXT, :parser => :line, :fasta_filename => TESTGFF1FASTA)
|
82
|
+
@gff = gff3.assembler
|
83
83
|
end
|
84
84
|
|
85
85
|
it "should have a sequence list" do
|
@@ -89,10 +89,10 @@ describe GFFdb, "GFF3 API (InMemory) with external FASTA" do
|
|
89
89
|
iterators_should_be_implemented
|
90
90
|
end
|
91
91
|
|
92
|
-
describe
|
92
|
+
describe GFF3, "GFF3 API (NoCache) with external FASTA" do
|
93
93
|
before :all do
|
94
|
-
|
95
|
-
@gff =
|
94
|
+
gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1EXT, :fasta_filename => TESTGFF1FASTA, :cache_components => :cache_none, :cache_records => :cache_none)
|
95
|
+
@gff = gff3.assembler
|
96
96
|
end
|
97
97
|
|
98
98
|
iterators_should_be_implemented
|
data/spec/gffparserec.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# RSpec for BioRuby-GFF3-Plugin. Run with something like:
|
2
|
+
#
|
3
|
+
# rspec -I ../bioruby/lib/ spec/gffdb_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) 2010 Pjotr Prins <pjotr.prins@thebird.nl>
|
6
|
+
#
|
7
|
+
$: << "../lib"
|
8
|
+
|
9
|
+
|
10
|
+
require 'bio-gff3'
|
11
|
+
|
12
|
+
include Bio::GFFbrowser
|
13
|
+
|
14
|
+
describe FastLineParser, "GFF3 Fast line parser" do
|
15
|
+
include Bio::GFFbrowser::FastLineParser
|
16
|
+
|
17
|
+
it "should parse attributes" do
|
18
|
+
parse_attributes_fast("id=1").should == { "id"=>"1" }
|
19
|
+
parse_attributes_fast("id=1;parent=45").should == { "id"=>"1", "parent" => "45" }
|
20
|
+
end
|
21
|
+
it "should parse escaped attributes" do
|
22
|
+
parse_attributes_fast("id%3D1=1").should == { "id%3D1"=>"1" }
|
23
|
+
parse_attributes_fast("id=1%3Bparent=45").should == { "id"=>"1%3Bparent=45" }
|
24
|
+
end
|
25
|
+
it "should parse records" do
|
26
|
+
parse_line_fast("ctg123\t.\tCDS\t1201\t1500\t.\t+\t0\tID=cds00001;Parent=mRNA00001;Name=edenprotein.1").should == ["ctg123", ".", "CDS", 1201, 1500, 0.0, "+", 0, {"ID"=>"cds00001", "Parent"=>"mRNA00001", "Name"=>"edenprotein.1"}]
|
27
|
+
end
|
28
|
+
it "should handle a Record interface" do
|
29
|
+
fields =
|
30
|
+
parse_line_fast("ctg123\t.\tCDS\t1201\t1500\t.\t+\t0\tID=cds00001;Parent=mRNA00001;Name=edenprotein.1")
|
31
|
+
rec = Bio::GFFbrowser::FastLineRecord.new(fields)
|
32
|
+
rec.seqname.should == "ctg123"
|
33
|
+
rec.id.should == "cds00001"
|
34
|
+
rec.phase.should == 0
|
35
|
+
rec.start.should == 1201
|
36
|
+
rec.end.should == 1500
|
37
|
+
rec.score.should == 0.0
|
38
|
+
rec.strand.should == '+'
|
39
|
+
rec.feature.should == 'CDS'
|
40
|
+
rec.source.should == '.'
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
data/test/data/gff/test-cds.gff3
CHANGED
@@ -1,13 +1,9 @@
|
|
1
1
|
##gff-version 3 ##sequence-regio
|
2
2
|
# Gene gene:MhA1_Contig1040.frz3.gene29
|
3
3
|
MhA1_Contig1040 WormBase gene 1 182 . - . ID=gene:MhA1_Contig1040.frz3.gene29;Name=MhA1_Contig1040.frz3.gene29;Note=PREDICTED protein_coding;public_name=MhA1_Contig1040.frz3.gene29
|
4
|
-
MhA1_Contig1040 WormBase mRNA 1 182 . - . ID=transcript:MhA1_Contig1040.
|
5
|
-
|
6
|
-
|
7
|
-
MhA1_Contig1040 WormBase exon 1 182 . - . ID=exon:MhA1_Contig1040.frz3.g
|
8
|
-
ene29.1;Parent=transcript:MhA1_Contig1040.frz3.gene29
|
9
|
-
MhA1_Contig1040 WormBase CDS 1 180 . - 2 ID=cds:MhA1_Contig1040.frz3.gene
|
10
|
-
29;Parent=transcript:MhA1_Contig1040.frz3.gene29
|
4
|
+
MhA1_Contig1040 WormBase mRNA 1 182 . - . ID=transcript:MhA1_Contig1040.frz3.gene29;Parent=gene:MhA1_Contig1040.frz3.gene29;Name=MhA1_Contig1040.frz3.gene29;public_name=MhA1_Contig1040.frz3.gene29
|
5
|
+
MhA1_Contig1040 WormBase exon 1 182 . - . ID=exon:MhA1_Contig1040.frz3.gene29.1;Parent=transcript:MhA1_Contig1040.frz3.gene29
|
6
|
+
MhA1_Contig1040 WormBase CDS 1 180 . - 2 ID=cds:MhA1_Contig1040.frz3.gene29;Parent=transcript:MhA1_Contig1040.frz3.gene29
|
11
7
|
|
12
8
|
|
13
9
|
|
@@ -0,0 +1,61 @@
|
|
1
|
+
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
|
2
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
4
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
5
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
6
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
7
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
8
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
9
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
|
10
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
11
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
14
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
|
+
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
17
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
|
18
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
|
19
|
+
INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
|
20
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
|
21
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
25
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
26
|
+
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
27
|
+
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
28
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
30
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
31
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
32
|
+
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
33
|
+
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
34
|
+
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
35
|
+
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
36
|
+
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
37
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
38
|
+
INFO bio-gff3: Added contig with component ID test01 <>
|
39
|
+
INFO bio-gff3: Adding mRNA <mrna01short> <>
|
40
|
+
INFO bio-gff3: Adding mRNA <mrna01> <>
|
41
|
+
INFO bio-gff3: Adding mRNA <mrna01a> <>
|
42
|
+
INFO bio-gff3: Adding exon <exon01> <>
|
43
|
+
INFO bio-gff3: Adding exon <exon02> <>
|
44
|
+
INFO bio-gff3: Adding exon <exon02a> <>
|
45
|
+
INFO bio-gff3: Adding CDS <cds_short> <>
|
46
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
47
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
48
|
+
INFO bio-gff3: Adding CDS <cds2> <>
|
49
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
50
|
+
WARN bio-gff3: No sequence information for <mRNA:trans-8>
|
51
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
|
52
|
+
WARN bio-gff3: No sequence information for <Misc:thing2>
|
53
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
54
|
+
>mrna01short Sequence:test01_1:400 (3:14)
|
55
|
+
GAAGATTTGTAT
|
56
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
57
|
+
>mrna01 Sequence:test01_1:400 (101:230)
|
58
|
+
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTC
|
59
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
60
|
+
>mrna01a Sequence:test01_1:400 (101:280)
|
61
|
+
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
@@ -0,0 +1,65 @@
|
|
1
|
+
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
|
2
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
4
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
5
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
6
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
7
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
8
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
9
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
|
10
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
11
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
14
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
|
+
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
17
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
|
18
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
|
19
|
+
INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
|
20
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
|
21
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
25
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
26
|
+
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
27
|
+
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
28
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
30
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
31
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
32
|
+
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
33
|
+
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
34
|
+
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
35
|
+
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
36
|
+
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
37
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
38
|
+
INFO bio-gff3: Added contig with component ID test01 <>
|
39
|
+
INFO bio-gff3: Added gene with component ID gene01 <>
|
40
|
+
INFO bio-gff3: Adding gene <gene01> <>
|
41
|
+
INFO bio-gff3: Adding mRNA <mrna01short> <>
|
42
|
+
INFO bio-gff3: Adding mRNA <mrna01> <>
|
43
|
+
INFO bio-gff3: Adding mRNA <mrna01a> <>
|
44
|
+
INFO bio-gff3: Adding exon <exon01> <>
|
45
|
+
INFO bio-gff3: Adding exon <exon02> <>
|
46
|
+
INFO bio-gff3: Adding exon <exon02a> <>
|
47
|
+
INFO bio-gff3: Adding CDS <cds_short> <>
|
48
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
49
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
50
|
+
INFO bio-gff3: Adding CDS <cds2> <>
|
51
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Transcript:trans-1>
|
52
|
+
WARN bio-gff3: No sequence information for <Transcript:trans-1>
|
53
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
54
|
+
WARN bio-gff3: No sequence information for <mRNA:trans-8>
|
55
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
|
56
|
+
WARN bio-gff3: No sequence information for <Misc:thing3>
|
57
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
58
|
+
>cds_short Sequence:test01_1:400 (3:14)
|
59
|
+
GAAGATTTGTAT
|
60
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
61
|
+
>cds1 Sequence:test01_1:400 (164:190, 192:200)
|
62
|
+
TGGCGACTATCGGTCGAAGTTAAGACATTCATGGGC
|
63
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
64
|
+
>cds2 Sequence:test01_1:400 (192:200)
|
65
|
+
TTCATGGGC
|
@@ -0,0 +1,56 @@
|
|
1
|
+
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash (NoCache) <>
|
2
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
4
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
5
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
6
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
7
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
8
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
9
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
|
10
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
11
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
14
|
+
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
15
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
16
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
|
17
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
|
18
|
+
INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
|
19
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
|
20
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
21
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
22
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
23
|
+
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
24
|
+
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
25
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
26
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
27
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
28
|
+
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
29
|
+
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
30
|
+
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
31
|
+
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
32
|
+
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
33
|
+
INFO bio-gff3: Added contig with component ID test01 <>
|
34
|
+
INFO bio-gff3: Adding mRNA <mrna01short> <>
|
35
|
+
INFO bio-gff3: Adding mRNA <mrna01> <>
|
36
|
+
INFO bio-gff3: Adding mRNA <mrna01a> <>
|
37
|
+
INFO bio-gff3: Adding exon <exon01> <>
|
38
|
+
INFO bio-gff3: Adding exon <exon02> <>
|
39
|
+
INFO bio-gff3: Adding exon <exon02a> <>
|
40
|
+
INFO bio-gff3: Adding CDS <cds_short> <>
|
41
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
42
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
43
|
+
INFO bio-gff3: Adding CDS <cds2> <>
|
44
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
45
|
+
WARN bio-gff3: No sequence information for <mRNA:trans-8>
|
46
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
|
47
|
+
WARN bio-gff3: No sequence information for <Misc:thing2>
|
48
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
49
|
+
>mrna01short Sequence:test01_1:400 (3:14)
|
50
|
+
GAAGATTTGTAT
|
51
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
52
|
+
>mrna01 Sequence:test01_1:400 (101:230)
|
53
|
+
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTC
|
54
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
55
|
+
>mrna01a Sequence:test01_1:400 (101:280)
|
56
|
+
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
@@ -0,0 +1,65 @@
|
|
1
|
+
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
|
2
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
4
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
5
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
6
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
7
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
8
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
9
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
|
10
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
11
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
14
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
|
+
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
17
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
|
18
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
|
19
|
+
INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
|
20
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
|
21
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
25
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
26
|
+
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
27
|
+
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
28
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
30
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
31
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
32
|
+
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
33
|
+
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
34
|
+
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
35
|
+
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
36
|
+
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
37
|
+
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
38
|
+
INFO bio-gff3: Added contig with component ID test01 <>
|
39
|
+
INFO bio-gff3: Added gene with component ID gene01 <>
|
40
|
+
INFO bio-gff3: Adding gene <gene01> <>
|
41
|
+
INFO bio-gff3: Adding mRNA <mrna01short> <>
|
42
|
+
INFO bio-gff3: Adding mRNA <mrna01> <>
|
43
|
+
INFO bio-gff3: Adding mRNA <mrna01a> <>
|
44
|
+
INFO bio-gff3: Adding exon <exon01> <>
|
45
|
+
INFO bio-gff3: Adding exon <exon02> <>
|
46
|
+
INFO bio-gff3: Adding exon <exon02a> <>
|
47
|
+
INFO bio-gff3: Adding CDS <cds_short> <>
|
48
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
49
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
50
|
+
INFO bio-gff3: Adding CDS <cds2> <>
|
51
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Transcript:trans-1>
|
52
|
+
WARN bio-gff3: No sequence information for <Transcript:trans-1>
|
53
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
54
|
+
WARN bio-gff3: No sequence information for <mRNA:trans-8>
|
55
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
|
56
|
+
WARN bio-gff3: No sequence information for <Misc:thing3>
|
57
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
58
|
+
>cds_short Sequence:test01_1:400 (3:14)
|
59
|
+
GAAGATTTGTAT
|
60
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
61
|
+
>cds1 Sequence:test01_1:400 (164:190, 192:200)
|
62
|
+
TGGCGACTATCGGTCGAAGTTAAGACATTCATGGGC
|
63
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
64
|
+
>cds2 Sequence:test01_1:400 (192:200)
|
65
|
+
TTCATGGGC
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Regression tester
|
2
|
+
#
|
3
|
+
# Info:: Pjotr's shared Ruby modules
|
4
|
+
# Author:: Pjotr Prins
|
5
|
+
# mail:: pjotr.prins@thebird.nl
|
6
|
+
# Copyright:: July 2007
|
7
|
+
# License:: Ruby License
|
8
|
+
|
9
|
+
module RegressionTest
|
10
|
+
|
11
|
+
def RegressionTest.create b
|
12
|
+
@@test_create = b
|
13
|
+
end
|
14
|
+
|
15
|
+
# Invoke the regression test by passing a string - which ends up a file
|
16
|
+
# in test/regression with +filename+. When +create+ is +true+ the file
|
17
|
+
# will be created/overwritten. Otherwise it is tested against returning
|
18
|
+
# whether it has equal or not. When a test fails both test file and new
|
19
|
+
# file exist in the regrssion directory - so you can execute a diff.
|
20
|
+
#
|
21
|
+
# Example:
|
22
|
+
# RegressionTest.test `#{cfrubybin} --help`,'cfruby_helptext',$test_create
|
23
|
+
|
24
|
+
def RegressionTest.test text, filename, testdir, create = @@test_create
|
25
|
+
fn = testdir+'/'+filename+'.rtest'
|
26
|
+
fntest = fn+'.new'
|
27
|
+
|
28
|
+
if create
|
29
|
+
f = File.open(fn,'w')
|
30
|
+
f.write text
|
31
|
+
File.unlink fntest if File.exist? fntest
|
32
|
+
else
|
33
|
+
# ---- here we have to compare info
|
34
|
+
if ! File.exist?(fn)
|
35
|
+
raise "Cannot execute regression test because file #{fn} does not exist! - use --create option?"
|
36
|
+
end
|
37
|
+
f = File.open(fn)
|
38
|
+
b = ''
|
39
|
+
f.each do | line |
|
40
|
+
b += line
|
41
|
+
end
|
42
|
+
if b!=text
|
43
|
+
# ---- Write newer file
|
44
|
+
f2 = File.open(fntest,'w')
|
45
|
+
f2.write text
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
true
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
data/test/test_bio-gff3.rb
CHANGED
@@ -1,7 +1,37 @@
|
|
1
|
-
|
1
|
+
$: << '.'
|
2
|
+
do_create = if ARGV[0] == '-c' or ARGV[0] == '--create'
|
3
|
+
ARGV.shift
|
4
|
+
end
|
5
|
+
|
6
|
+
require 'test/unit'
|
7
|
+
require 'regressiontest'
|
2
8
|
|
3
|
-
|
4
|
-
|
5
|
-
|
9
|
+
RegressionTest.create(do_create)
|
10
|
+
|
11
|
+
class Gff3Test < Test::Unit::TestCase
|
12
|
+
|
13
|
+
rootpath = File.join(File.dirname(__FILE__),'..')
|
14
|
+
BIN = rootpath + '/bin/gff3-fetch'
|
15
|
+
DAT = rootpath + '/test/data'
|
16
|
+
def test_cache
|
17
|
+
assert_equal(true,single_run("mRNA #{DAT}/gff/test-ext-fasta.fa #{DAT}/gff/test-ext-fasta.gff3",'test_ext_gff3'))
|
18
|
+
assert_equal(true,single_run("CDS #{DAT}/gff/test.gff3",'test_gff3'))
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_nocache
|
22
|
+
assert_equal(true,single_run("mRNA --cache none #{DAT}/gff/test-ext-fasta.fa #{DAT}/gff/test-ext-fasta.gff3",this_method+'_ext_gff3'))
|
23
|
+
assert_equal(true,single_run("CDS #{DAT}/gff/test.gff3",this_method+'_gff3'))
|
6
24
|
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def this_method
|
28
|
+
caller[0] =~ /`([^']*)'/ and $1
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def single_run opts, name
|
34
|
+
cmd = "#{BIN} --logger stdout #{opts}"
|
35
|
+
# p cmd
|
36
|
+
RegressionTest.test `#{cmd}`,name,"#{DAT}/regression"
|
7
37
|
end
|