bio-gff3 0.8.4 → 0.8.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/Gemfile +4 -5
  2. data/Gemfile.lock +4 -0
  3. data/Rakefile +12 -10
  4. data/VERSION +1 -1
  5. data/bin/gff3-fetch +104 -47
  6. data/bio-gff3.gemspec +40 -12
  7. data/lib/bio-gff3.rb +29 -1
  8. data/lib/bio/db/gff/{gffinmemory.rb → digest/gffinmemory.rb} +15 -2
  9. data/lib/bio/db/gff/{gffnocache.rb → digest/gffnocache.rb} +2 -0
  10. data/lib/bio/db/gff/{gffparser.rb → digest/gffparser.rb} +40 -49
  11. data/lib/bio/db/gff/{gfffasta.rb → file/gfffasta.rb} +1 -1
  12. data/lib/bio/db/gff/{gfffileiterator.rb → file/gfffileiterator.rb} +3 -1
  13. data/lib/bio/db/gff/{gffdb.rb → gff3.rb} +6 -11
  14. data/lib/bio/db/gff/gff3fasta.rb +25 -0
  15. data/lib/bio/db/gff/gff3parsefile.rb +33 -0
  16. data/lib/bio/db/gff/gff3parserec.rb +63 -0
  17. data/lib/bio/db/gff/gffcomponent.rb +94 -0
  18. data/lib/bio/db/gff/gffrecord.rb +71 -0
  19. data/lib/bio/db/gff/gffsection.rb +41 -0
  20. data/lib/bio/db/gff/gffsequence.rb +160 -0
  21. data/lib/bio/db/gff/gffvalidate.rb +90 -0
  22. data/lib/bio/output/gfflogger.rb +33 -0
  23. data/spec/gff3_assemble2_spec.rb +3 -3
  24. data/spec/gff3_assemble3_spec.rb +4 -4
  25. data/spec/gff3_assemble_spec.rb +3 -3
  26. data/spec/gffdb_spec.rb +15 -15
  27. data/spec/gffparserec.rb +44 -0
  28. data/test/data/gff/test-cds.gff3 +3 -7
  29. data/test/data/regression/test_ext_gff3.rtest +61 -0
  30. data/test/data/regression/test_gff3.rtest +65 -0
  31. data/test/data/regression/test_nocache_ext_gff3.rtest +56 -0
  32. data/test/data/regression/test_nocache_gff3.rtest +65 -0
  33. data/test/regressiontest.rb +52 -0
  34. data/test/test_bio-gff3.rb +34 -4
  35. metadata +103 -29
  36. data/lib/bio/db/gff/gffassemble.rb +0 -341
@@ -42,44 +42,44 @@ def iterators_should_be_implemented
42
42
  end
43
43
  end
44
44
 
45
- describe GFFdb, "GFF3 API (InMemory) with everything in memory" do
45
+ describe GFF3, "GFF3 API (InMemory) with everything in memory" do
46
46
 
47
47
  before :all do
48
48
  # initialize
49
- gffdb = Bio::GFFbrowser::GFFdb.new(TESTGFF1)
50
- @gff = gffdb.assembler
49
+ gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1)
50
+ @gff = gff3.assembler
51
51
  end
52
52
 
53
53
  iterators_should_be_implemented
54
54
  end
55
55
 
56
- describe GFFdb, "GFF3 API with :cache_components => 1000, :cache_records => :cache_none" do
56
+ describe GFF3, "GFF3 API with :cache_components => 1000, :cache_records => :cache_none" do
57
57
  # iterators_should_be_implemented
58
58
  end
59
59
 
60
- describe GFFdb, "GFF3 API with :cache_components => 1000, :cache_records => 1000" do
60
+ describe GFF3, "GFF3 API with :cache_components => 1000, :cache_records => 1000" do
61
61
  it "should implement real caching"
62
62
  # iterators_should_be_implemented
63
63
  end
64
64
 
65
- describe GFFdb, "GFF3 API with :cache_records => :cache_none" do
65
+ describe GFF3, "GFF3 API with :cache_records => :cache_none" do
66
66
  # iterators_should_be_implemented
67
67
  end
68
68
 
69
- describe GFFdb, "GFF3 API (NoCache) with :cache_components => :cache_none, :cache_records => :cache_none" do
69
+ describe GFF3, "GFF3 API (NoCache) with :cache_components => :cache_none, :cache_records => :cache_none" do
70
70
  before :all do
71
71
  # initialize
72
- gffdb = Bio::GFFbrowser::GFFdb.new(TESTGFF1, :cache_components => :cache_none, :cache_records => :cache_none)
73
- @gff = gffdb.assembler
72
+ gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1, :cache_components => :cache_none, :cache_records => :cache_none)
73
+ @gff = gff3.assembler
74
74
  end
75
75
 
76
76
  iterators_should_be_implemented
77
77
  end
78
78
 
79
- describe GFFdb, "GFF3 API (InMemory) with external FASTA" do
79
+ describe GFF3, "GFF3 API (InMemory) with external FASTA" do
80
80
  before :all do
81
- gffdb = Bio::GFFbrowser::GFFdb.new(TESTGFF1EXT, :fasta_filename => TESTGFF1FASTA)
82
- @gff = gffdb.assembler
81
+ gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1EXT, :parser => :line, :fasta_filename => TESTGFF1FASTA)
82
+ @gff = gff3.assembler
83
83
  end
84
84
 
85
85
  it "should have a sequence list" do
@@ -89,10 +89,10 @@ describe GFFdb, "GFF3 API (InMemory) with external FASTA" do
89
89
  iterators_should_be_implemented
90
90
  end
91
91
 
92
- describe GFFdb, "GFF3 API (NoCache) with external FASTA" do
92
+ describe GFF3, "GFF3 API (NoCache) with external FASTA" do
93
93
  before :all do
94
- gffdb = Bio::GFFbrowser::GFFdb.new(TESTGFF1EXT, :fasta_filename => TESTGFF1FASTA, :cache_components => :cache_none, :cache_records => :cache_none)
95
- @gff = gffdb.assembler
94
+ gff3 = Bio::GFFbrowser::GFF3.new(TESTGFF1EXT, :fasta_filename => TESTGFF1FASTA, :cache_components => :cache_none, :cache_records => :cache_none)
95
+ @gff = gff3.assembler
96
96
  end
97
97
 
98
98
  iterators_should_be_implemented
@@ -0,0 +1,44 @@
1
+ # RSpec for BioRuby-GFF3-Plugin. Run with something like:
2
+ #
3
+ # rspec -I ../bioruby/lib/ spec/gffdb_spec.rb
4
+ #
5
+ # Copyright (C) 2010 Pjotr Prins <pjotr.prins@thebird.nl>
6
+ #
7
+ $: << "../lib"
8
+
9
+
10
+ require 'bio-gff3'
11
+
12
+ include Bio::GFFbrowser
13
+
14
+ describe FastLineParser, "GFF3 Fast line parser" do
15
+ include Bio::GFFbrowser::FastLineParser
16
+
17
+ it "should parse attributes" do
18
+ parse_attributes_fast("id=1").should == { "id"=>"1" }
19
+ parse_attributes_fast("id=1;parent=45").should == { "id"=>"1", "parent" => "45" }
20
+ end
21
+ it "should parse escaped attributes" do
22
+ parse_attributes_fast("id%3D1=1").should == { "id%3D1"=>"1" }
23
+ parse_attributes_fast("id=1%3Bparent=45").should == { "id"=>"1%3Bparent=45" }
24
+ end
25
+ it "should parse records" do
26
+ parse_line_fast("ctg123\t.\tCDS\t1201\t1500\t.\t+\t0\tID=cds00001;Parent=mRNA00001;Name=edenprotein.1").should == ["ctg123", ".", "CDS", 1201, 1500, 0.0, "+", 0, {"ID"=>"cds00001", "Parent"=>"mRNA00001", "Name"=>"edenprotein.1"}]
27
+ end
28
+ it "should handle a Record interface" do
29
+ fields =
30
+ parse_line_fast("ctg123\t.\tCDS\t1201\t1500\t.\t+\t0\tID=cds00001;Parent=mRNA00001;Name=edenprotein.1")
31
+ rec = Bio::GFFbrowser::FastLineRecord.new(fields)
32
+ rec.seqname.should == "ctg123"
33
+ rec.id.should == "cds00001"
34
+ rec.phase.should == 0
35
+ rec.start.should == 1201
36
+ rec.end.should == 1500
37
+ rec.score.should == 0.0
38
+ rec.strand.should == '+'
39
+ rec.feature.should == 'CDS'
40
+ rec.source.should == '.'
41
+ end
42
+ end
43
+
44
+
@@ -1,13 +1,9 @@
1
1
  ##gff-version 3 ##sequence-regio
2
2
  # Gene gene:MhA1_Contig1040.frz3.gene29
3
3
  MhA1_Contig1040 WormBase gene 1 182 . - . ID=gene:MhA1_Contig1040.frz3.gene29;Name=MhA1_Contig1040.frz3.gene29;Note=PREDICTED protein_coding;public_name=MhA1_Contig1040.frz3.gene29
4
- MhA1_Contig1040 WormBase mRNA 1 182 . - . ID=transcript:MhA1_Contig1040.
5
- frz3.gene29;Parent=gene:MhA1_Contig1040.frz3.gene29;Name=MhA1_Contig1040.frz3.ge
6
- ne29;public_name=MhA1_Contig1040.frz3.gene29
7
- MhA1_Contig1040 WormBase exon 1 182 . - . ID=exon:MhA1_Contig1040.frz3.g
8
- ene29.1;Parent=transcript:MhA1_Contig1040.frz3.gene29
9
- MhA1_Contig1040 WormBase CDS 1 180 . - 2 ID=cds:MhA1_Contig1040.frz3.gene
10
- 29;Parent=transcript:MhA1_Contig1040.frz3.gene29
4
+ MhA1_Contig1040 WormBase mRNA 1 182 . - . ID=transcript:MhA1_Contig1040.frz3.gene29;Parent=gene:MhA1_Contig1040.frz3.gene29;Name=MhA1_Contig1040.frz3.gene29;public_name=MhA1_Contig1040.frz3.gene29
5
+ MhA1_Contig1040 WormBase exon 1 182 . - . ID=exon:MhA1_Contig1040.frz3.gene29.1;Parent=transcript:MhA1_Contig1040.frz3.gene29
6
+ MhA1_Contig1040 WormBase CDS 1 180 . - 2 ID=cds:MhA1_Contig1040.frz3.gene29;Parent=transcript:MhA1_Contig1040.frz3.gene29
11
7
 
12
8
 
13
9
 
@@ -0,0 +1,61 @@
1
+ INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
2
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
3
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
4
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
5
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
6
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
7
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
8
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
9
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
10
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
11
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
12
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
13
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
14
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
15
+ INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
16
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
17
+ INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
18
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
19
+ INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
20
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
21
+ INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
22
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
23
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
24
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
25
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
26
+ INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
27
+ INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
28
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
29
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
30
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
31
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
32
+ INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
33
+ INFO bio-gff3: Added gene with component ID Misc:thing1 <>
34
+ INFO bio-gff3: Adding gene <Misc:thing1> <>
35
+ INFO bio-gff3: Adding mRNA <Misc:thing2> <>
36
+ INFO bio-gff3: Adding CDS <Misc:thing3> <>
37
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
38
+ INFO bio-gff3: Added contig with component ID test01 <>
39
+ INFO bio-gff3: Adding mRNA <mrna01short> <>
40
+ INFO bio-gff3: Adding mRNA <mrna01> <>
41
+ INFO bio-gff3: Adding mRNA <mrna01a> <>
42
+ INFO bio-gff3: Adding exon <exon01> <>
43
+ INFO bio-gff3: Adding exon <exon02> <>
44
+ INFO bio-gff3: Adding exon <exon02a> <>
45
+ INFO bio-gff3: Adding CDS <cds_short> <>
46
+ INFO bio-gff3: Adding CDS <cds1> <>
47
+ INFO bio-gff3: Adding CDS <cds1> <>
48
+ INFO bio-gff3: Adding CDS <cds2> <>
49
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
50
+ WARN bio-gff3: No sequence information for <mRNA:trans-8>
51
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
52
+ WARN bio-gff3: No sequence information for <Misc:thing2>
53
+ INFO bio-gff3: find_component: Matched seqname <test01>
54
+ >mrna01short Sequence:test01_1:400 (3:14)
55
+ GAAGATTTGTAT
56
+ INFO bio-gff3: find_component: Matched seqname <test01>
57
+ >mrna01 Sequence:test01_1:400 (101:230)
58
+ TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTC
59
+ INFO bio-gff3: find_component: Matched seqname <test01>
60
+ >mrna01a Sequence:test01_1:400 (101:280)
61
+ TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
@@ -0,0 +1,65 @@
1
+ INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
2
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
3
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
4
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
5
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
6
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
7
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
8
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
9
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
10
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
11
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
12
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
13
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
14
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
15
+ INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
16
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
17
+ INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
18
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
19
+ INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
20
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
21
+ INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
22
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
23
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
24
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
25
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
26
+ INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
27
+ INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
28
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
29
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
30
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
31
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
32
+ INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
33
+ INFO bio-gff3: Added gene with component ID Misc:thing1 <>
34
+ INFO bio-gff3: Adding gene <Misc:thing1> <>
35
+ INFO bio-gff3: Adding mRNA <Misc:thing2> <>
36
+ INFO bio-gff3: Adding CDS <Misc:thing3> <>
37
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
38
+ INFO bio-gff3: Added contig with component ID test01 <>
39
+ INFO bio-gff3: Added gene with component ID gene01 <>
40
+ INFO bio-gff3: Adding gene <gene01> <>
41
+ INFO bio-gff3: Adding mRNA <mrna01short> <>
42
+ INFO bio-gff3: Adding mRNA <mrna01> <>
43
+ INFO bio-gff3: Adding mRNA <mrna01a> <>
44
+ INFO bio-gff3: Adding exon <exon01> <>
45
+ INFO bio-gff3: Adding exon <exon02> <>
46
+ INFO bio-gff3: Adding exon <exon02a> <>
47
+ INFO bio-gff3: Adding CDS <cds_short> <>
48
+ INFO bio-gff3: Adding CDS <cds1> <>
49
+ INFO bio-gff3: Adding CDS <cds1> <>
50
+ INFO bio-gff3: Adding CDS <cds2> <>
51
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Transcript:trans-1>
52
+ WARN bio-gff3: No sequence information for <Transcript:trans-1>
53
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
54
+ WARN bio-gff3: No sequence information for <mRNA:trans-8>
55
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
56
+ WARN bio-gff3: No sequence information for <Misc:thing3>
57
+ INFO bio-gff3: find_component: Matched seqname <test01>
58
+ >cds_short Sequence:test01_1:400 (3:14)
59
+ GAAGATTTGTAT
60
+ INFO bio-gff3: find_component: Matched seqname <test01>
61
+ >cds1 Sequence:test01_1:400 (164:190, 192:200)
62
+ TGGCGACTATCGGTCGAAGTTAAGACATTCATGGGC
63
+ INFO bio-gff3: find_component: Matched seqname <test01>
64
+ >cds2 Sequence:test01_1:400 (192:200)
65
+ TTCATGGGC
@@ -0,0 +1,56 @@
1
+ INFO bio-gff3: ---- Digest DB and store data in mRNA Hash (NoCache) <>
2
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
3
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
4
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
5
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
6
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
7
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
8
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
9
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
10
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
11
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
12
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
13
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
14
+ INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
15
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
16
+ INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
17
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
18
+ INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
19
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
20
+ INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
21
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
22
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
23
+ INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
24
+ INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
25
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
26
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
27
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
28
+ INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
29
+ INFO bio-gff3: Added gene with component ID Misc:thing1 <>
30
+ INFO bio-gff3: Adding gene <Misc:thing1> <>
31
+ INFO bio-gff3: Adding mRNA <Misc:thing2> <>
32
+ INFO bio-gff3: Adding CDS <Misc:thing3> <>
33
+ INFO bio-gff3: Added contig with component ID test01 <>
34
+ INFO bio-gff3: Adding mRNA <mrna01short> <>
35
+ INFO bio-gff3: Adding mRNA <mrna01> <>
36
+ INFO bio-gff3: Adding mRNA <mrna01a> <>
37
+ INFO bio-gff3: Adding exon <exon01> <>
38
+ INFO bio-gff3: Adding exon <exon02> <>
39
+ INFO bio-gff3: Adding exon <exon02a> <>
40
+ INFO bio-gff3: Adding CDS <cds_short> <>
41
+ INFO bio-gff3: Adding CDS <cds1> <>
42
+ INFO bio-gff3: Adding CDS <cds1> <>
43
+ INFO bio-gff3: Adding CDS <cds2> <>
44
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
45
+ WARN bio-gff3: No sequence information for <mRNA:trans-8>
46
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
47
+ WARN bio-gff3: No sequence information for <Misc:thing2>
48
+ INFO bio-gff3: find_component: Matched seqname <test01>
49
+ >mrna01short Sequence:test01_1:400 (3:14)
50
+ GAAGATTTGTAT
51
+ INFO bio-gff3: find_component: Matched seqname <test01>
52
+ >mrna01 Sequence:test01_1:400 (101:230)
53
+ TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTC
54
+ INFO bio-gff3: find_component: Matched seqname <test01>
55
+ >mrna01a Sequence:test01_1:400 (101:280)
56
+ TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
@@ -0,0 +1,65 @@
1
+ INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
2
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
3
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
4
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
5
+ INFO bio-gff3: Adding exon <Transcript:trans-1> <>
6
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
7
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
8
+ INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
9
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
10
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
11
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
12
+ INFO bio-gff3: Adding exon <Transcript:trans-2> <>
13
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
14
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
15
+ INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
16
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
17
+ INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
18
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
19
+ INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
20
+ WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
21
+ INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
22
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
23
+ INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
24
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
25
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
26
+ INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
27
+ INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
28
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
29
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
30
+ INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
31
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
32
+ INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
33
+ INFO bio-gff3: Added gene with component ID Misc:thing1 <>
34
+ INFO bio-gff3: Adding gene <Misc:thing1> <>
35
+ INFO bio-gff3: Adding mRNA <Misc:thing2> <>
36
+ INFO bio-gff3: Adding CDS <Misc:thing3> <>
37
+ WARN bio-gff3: Record with unknown ID. . . . . . . . .
38
+ INFO bio-gff3: Added contig with component ID test01 <>
39
+ INFO bio-gff3: Added gene with component ID gene01 <>
40
+ INFO bio-gff3: Adding gene <gene01> <>
41
+ INFO bio-gff3: Adding mRNA <mrna01short> <>
42
+ INFO bio-gff3: Adding mRNA <mrna01> <>
43
+ INFO bio-gff3: Adding mRNA <mrna01a> <>
44
+ INFO bio-gff3: Adding exon <exon01> <>
45
+ INFO bio-gff3: Adding exon <exon02> <>
46
+ INFO bio-gff3: Adding exon <exon02a> <>
47
+ INFO bio-gff3: Adding CDS <cds_short> <>
48
+ INFO bio-gff3: Adding CDS <cds1> <>
49
+ INFO bio-gff3: Adding CDS <cds1> <>
50
+ INFO bio-gff3: Adding CDS <cds2> <>
51
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Transcript:trans-1>
52
+ WARN bio-gff3: No sequence information for <Transcript:trans-1>
53
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
54
+ WARN bio-gff3: No sequence information for <mRNA:trans-8>
55
+ INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
56
+ WARN bio-gff3: No sequence information for <Misc:thing3>
57
+ INFO bio-gff3: find_component: Matched seqname <test01>
58
+ >cds_short Sequence:test01_1:400 (3:14)
59
+ GAAGATTTGTAT
60
+ INFO bio-gff3: find_component: Matched seqname <test01>
61
+ >cds1 Sequence:test01_1:400 (164:190, 192:200)
62
+ TGGCGACTATCGGTCGAAGTTAAGACATTCATGGGC
63
+ INFO bio-gff3: find_component: Matched seqname <test01>
64
+ >cds2 Sequence:test01_1:400 (192:200)
65
+ TTCATGGGC
@@ -0,0 +1,52 @@
1
+ # Regression tester
2
+ #
3
+ # Info:: Pjotr's shared Ruby modules
4
+ # Author:: Pjotr Prins
5
+ # mail:: pjotr.prins@thebird.nl
6
+ # Copyright:: July 2007
7
+ # License:: Ruby License
8
+
9
+ module RegressionTest
10
+
11
+ def RegressionTest.create b
12
+ @@test_create = b
13
+ end
14
+
15
+ # Invoke the regression test by passing a string - which ends up a file
16
+ # in test/regression with +filename+. When +create+ is +true+ the file
17
+ # will be created/overwritten. Otherwise it is tested against returning
18
+ # whether it has equal or not. When a test fails both test file and new
19
+ # file exist in the regrssion directory - so you can execute a diff.
20
+ #
21
+ # Example:
22
+ # RegressionTest.test `#{cfrubybin} --help`,'cfruby_helptext',$test_create
23
+
24
+ def RegressionTest.test text, filename, testdir, create = @@test_create
25
+ fn = testdir+'/'+filename+'.rtest'
26
+ fntest = fn+'.new'
27
+
28
+ if create
29
+ f = File.open(fn,'w')
30
+ f.write text
31
+ File.unlink fntest if File.exist? fntest
32
+ else
33
+ # ---- here we have to compare info
34
+ if ! File.exist?(fn)
35
+ raise "Cannot execute regression test because file #{fn} does not exist! - use --create option?"
36
+ end
37
+ f = File.open(fn)
38
+ b = ''
39
+ f.each do | line |
40
+ b += line
41
+ end
42
+ if b!=text
43
+ # ---- Write newer file
44
+ f2 = File.open(fntest,'w')
45
+ f2.write text
46
+ return false
47
+ end
48
+ end
49
+ true
50
+ end
51
+
52
+ end
@@ -1,7 +1,37 @@
1
- require 'helper'
1
+ $: << '.'
2
+ do_create = if ARGV[0] == '-c' or ARGV[0] == '--create'
3
+ ARGV.shift
4
+ end
5
+
6
+ require 'test/unit'
7
+ require 'regressiontest'
2
8
 
3
- class TestBioGff3 < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
9
+ RegressionTest.create(do_create)
10
+
11
+ class Gff3Test < Test::Unit::TestCase
12
+
13
+ rootpath = File.join(File.dirname(__FILE__),'..')
14
+ BIN = rootpath + '/bin/gff3-fetch'
15
+ DAT = rootpath + '/test/data'
16
+ def test_cache
17
+ assert_equal(true,single_run("mRNA #{DAT}/gff/test-ext-fasta.fa #{DAT}/gff/test-ext-fasta.gff3",'test_ext_gff3'))
18
+ assert_equal(true,single_run("CDS #{DAT}/gff/test.gff3",'test_gff3'))
19
+ end
20
+
21
+ def test_nocache
22
+ assert_equal(true,single_run("mRNA --cache none #{DAT}/gff/test-ext-fasta.fa #{DAT}/gff/test-ext-fasta.gff3",this_method+'_ext_gff3'))
23
+ assert_equal(true,single_run("CDS #{DAT}/gff/test.gff3",this_method+'_gff3'))
6
24
  end
25
+
26
+ private
27
+ def this_method
28
+ caller[0] =~ /`([^']*)'/ and $1
29
+ end
30
+
31
+ end
32
+
33
+ def single_run opts, name
34
+ cmd = "#{BIN} --logger stdout #{opts}"
35
+ # p cmd
36
+ RegressionTest.test `#{cmd}`,name,"#{DAT}/regression"
7
37
  end