bio-gff3 0.8.5 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -2
- data/Rakefile +13 -4
- data/VERSION +1 -1
- data/bin/gff3-fetch +50 -14
- data/bio-gff3.gemspec +15 -20
- data/lib/bio/db/gff/block/gffblockparser.rb +93 -0
- data/lib/bio/db/gff/digest/gffinmemory.rb +2 -0
- data/lib/bio/db/gff/digest/gfflrucache.rb +208 -0
- data/lib/bio/db/gff/digest/gffnocache.rb +28 -9
- data/lib/bio/db/gff/digest/gffparser.rb +1 -1
- data/lib/bio/db/gff/file/gfffileiterator.rb +16 -7
- data/lib/bio/db/gff/gff3.rb +15 -5
- data/lib/bio/db/gff/gff3parserec.rb +1 -6
- data/lib/bio/db/gff/gffcomponent.rb +8 -6
- data/lib/bio/db/gff/gffrecord.rb +13 -8
- data/lib/bio/db/gff/gffsection.rb +0 -1
- data/lib/bio/db/gff/gffsequence.rb +3 -9
- data/lib/bio/db/gff/gffvalidate.rb +1 -1
- data/lib/bio/output/gfflogger.rb +10 -1
- data/spec/gff3_fileiterator_spec.rb +5 -4
- data/spec/gffdb_spec.rb +7 -1
- data/spec/gffparserec.rb +1 -1
- data/test/data/regression/test_ext_gff3.rtest +4 -5
- data/test/data/regression/test_gff3.rtest +4 -5
- data/test/data/regression/test_lrucache_ext_gff3.rtest +64 -0
- data/test/data/regression/test_lrucache_gff3.rtest +68 -0
- data/test/data/regression/test_nocache_ext_gff3.rtest +2 -0
- data/test/data/regression/test_nocache_gff3.rtest +3 -6
- data/test/test_bio-gff3.rb +6 -1
- metadata +37 -77
data/spec/gffparserec.rb
CHANGED
@@ -23,7 +23,7 @@ describe FastLineParser, "GFF3 Fast line parser" do
|
|
23
23
|
parse_attributes_fast("id=1%3Bparent=45").should == { "id"=>"1%3Bparent=45" }
|
24
24
|
end
|
25
25
|
it "should parse records" do
|
26
|
-
parse_line_fast("ctg123\t.\tCDS\t1201\t1500\t.\t+\t0\tID=cds00001;Parent=mRNA00001;Name=edenprotein.1").should ==
|
26
|
+
parse_line_fast("ctg123\t.\tCDS\t1201\t1500\t.\t+\t0\tID=cds00001;Parent=mRNA00001;Name=edenprotein.1").should == ["ctg123", ".", "CDS", "1201", "1500", ".", "+", "0", "ID=cds00001;Parent=mRNA00001;Name=edenprotein.1"]
|
27
27
|
end
|
28
28
|
it "should handle a Record interface" do
|
29
29
|
fields =
|
@@ -1,3 +1,4 @@
|
|
1
|
+
INFO bio-gff3: Memory used BaseLine RAM 10M, VMEM 14M
|
1
2
|
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
|
2
3
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
4
|
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
@@ -10,7 +11,6 @@
|
|
10
11
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
11
12
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
13
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
14
14
|
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
15
|
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
16
|
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
@@ -21,20 +21,16 @@
|
|
21
21
|
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
22
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
23
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
25
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
26
24
|
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
27
25
|
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
28
26
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
27
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
30
28
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
31
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
32
29
|
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
33
30
|
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
34
31
|
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
35
32
|
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
36
33
|
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
37
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
38
34
|
INFO bio-gff3: Added contig with component ID test01 <>
|
39
35
|
INFO bio-gff3: Adding mRNA <mrna01short> <>
|
40
36
|
INFO bio-gff3: Adding mRNA <mrna01> <>
|
@@ -46,6 +42,8 @@
|
|
46
42
|
INFO bio-gff3: Adding CDS <cds1> <>
|
47
43
|
INFO bio-gff3: Adding CDS <cds1> <>
|
48
44
|
INFO bio-gff3: Adding CDS <cds2> <>
|
45
|
+
INFO bio-gff3: Memory used After reading GFF RAM 10M, VMEM 14M
|
46
|
+
INFO bio-gff3: Memory used After reading FASTA RAM 10M, VMEM 14M
|
49
47
|
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
50
48
|
WARN bio-gff3: No sequence information for <mRNA:trans-8>
|
51
49
|
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
|
@@ -59,3 +57,4 @@ TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGA
|
|
59
57
|
INFO bio-gff3: find_component: Matched seqname <test01>
|
60
58
|
>mrna01a Sequence:test01_1:400 (101:280)
|
61
59
|
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
60
|
+
INFO bio-gff3: Memory used Done RAM 10M, VMEM 14M
|
@@ -1,3 +1,4 @@
|
|
1
|
+
INFO bio-gff3: Memory used BaseLine RAM 10M, VMEM 14M
|
1
2
|
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash <>
|
2
3
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
4
|
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
@@ -10,7 +11,6 @@
|
|
10
11
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
11
12
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
13
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
14
14
|
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
15
|
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
16
|
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
@@ -21,20 +21,16 @@
|
|
21
21
|
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
22
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
23
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
25
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
26
24
|
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
27
25
|
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
28
26
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
27
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
30
28
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
31
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
32
29
|
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
33
30
|
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
34
31
|
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
35
32
|
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
36
33
|
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
37
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
38
34
|
INFO bio-gff3: Added contig with component ID test01 <>
|
39
35
|
INFO bio-gff3: Added gene with component ID gene01 <>
|
40
36
|
INFO bio-gff3: Adding gene <gene01> <>
|
@@ -48,6 +44,8 @@
|
|
48
44
|
INFO bio-gff3: Adding CDS <cds1> <>
|
49
45
|
INFO bio-gff3: Adding CDS <cds1> <>
|
50
46
|
INFO bio-gff3: Adding CDS <cds2> <>
|
47
|
+
INFO bio-gff3: Memory used After reading GFF RAM 10M, VMEM 14M
|
48
|
+
INFO bio-gff3: Memory used After reading FASTA RAM 10M, VMEM 14M
|
51
49
|
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Transcript:trans-1>
|
52
50
|
WARN bio-gff3: No sequence information for <Transcript:trans-1>
|
53
51
|
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
@@ -63,3 +61,4 @@ TGGCGACTATCGGTCGAAGTTAAGACATTCATGGGC
|
|
63
61
|
INFO bio-gff3: find_component: Matched seqname <test01>
|
64
62
|
>cds2 Sequence:test01_1:400 (192:200)
|
65
63
|
TTCATGGGC
|
64
|
+
INFO bio-gff3: Memory used Done RAM 10M, VMEM 14M
|
@@ -0,0 +1,64 @@
|
|
1
|
+
INFO bio-gff3: Memory used BaseLine RAM 10M, VMEM 14M
|
2
|
+
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash (LruCache) <>
|
3
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
4
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
5
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
6
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
7
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
8
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
9
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
10
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
|
11
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
14
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
|
+
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
17
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
|
18
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
|
19
|
+
INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
|
20
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
|
21
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
+
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
25
|
+
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
26
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
27
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
28
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
|
+
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
30
|
+
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
31
|
+
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
32
|
+
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
33
|
+
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
34
|
+
INFO bio-gff3: Added contig with component ID test01 <>
|
35
|
+
INFO bio-gff3: Adding mRNA <mrna01short> <>
|
36
|
+
INFO bio-gff3: Adding mRNA <mrna01> <>
|
37
|
+
INFO bio-gff3: Adding mRNA <mrna01a> <>
|
38
|
+
INFO bio-gff3: Adding exon <exon01> <>
|
39
|
+
INFO bio-gff3: Adding exon <exon02> <>
|
40
|
+
INFO bio-gff3: Adding exon <exon02a> <>
|
41
|
+
INFO bio-gff3: Adding CDS <cds_short> <>
|
42
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
43
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
44
|
+
INFO bio-gff3: Adding CDS <cds2> <>
|
45
|
+
INFO bio-gff3: Cache calls After reading files = 0 <>
|
46
|
+
INFO bio-gff3: Cache hits After reading files = 0 <>
|
47
|
+
INFO bio-gff3: Cache misses After reading files = 0 <>
|
48
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
49
|
+
WARN bio-gff3: No sequence information for <mRNA:trans-8>
|
50
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
|
51
|
+
WARN bio-gff3: No sequence information for <Misc:thing2>
|
52
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
53
|
+
>mrna01short Sequence:test01_1:400 (3:14)
|
54
|
+
GAAGATTTGTAT
|
55
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
56
|
+
>mrna01 Sequence:test01_1:400 (101:230)
|
57
|
+
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTC
|
58
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
59
|
+
>mrna01a Sequence:test01_1:400 (101:280)
|
60
|
+
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
61
|
+
INFO bio-gff3: Cache calls After iterating = 54 <>
|
62
|
+
INFO bio-gff3: Cache hits After iterating = 37 <>
|
63
|
+
INFO bio-gff3: Cache misses After iterating = 17 <>
|
64
|
+
INFO bio-gff3: Memory used Done RAM 10M, VMEM 14M
|
@@ -0,0 +1,68 @@
|
|
1
|
+
INFO bio-gff3: Memory used BaseLine RAM 10M, VMEM 14M
|
2
|
+
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash (LruCache) <>
|
3
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
4
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
5
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
6
|
+
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
7
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
8
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
9
|
+
INFO bio-gff3: Adding CDS <Transcript:trans-1> <>
|
10
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-2 <>
|
11
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
+
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
14
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
|
+
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
17
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 5000 <>
|
18
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 5001 20000>
|
19
|
+
INFO bio-gff3: Added Component with component ID Contig2 5001 20000 <>
|
20
|
+
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 37450>
|
21
|
+
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
|
+
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
+
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
25
|
+
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
26
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
27
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
28
|
+
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
|
+
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
30
|
+
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
31
|
+
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
32
|
+
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
33
|
+
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
34
|
+
INFO bio-gff3: Added contig with component ID test01 <>
|
35
|
+
INFO bio-gff3: Added gene with component ID gene01 <>
|
36
|
+
INFO bio-gff3: Adding gene <gene01> <>
|
37
|
+
INFO bio-gff3: Adding mRNA <mrna01short> <>
|
38
|
+
INFO bio-gff3: Adding mRNA <mrna01> <>
|
39
|
+
INFO bio-gff3: Adding mRNA <mrna01a> <>
|
40
|
+
INFO bio-gff3: Adding exon <exon01> <>
|
41
|
+
INFO bio-gff3: Adding exon <exon02> <>
|
42
|
+
INFO bio-gff3: Adding exon <exon02a> <>
|
43
|
+
INFO bio-gff3: Adding CDS <cds_short> <>
|
44
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
45
|
+
INFO bio-gff3: Adding CDS <cds1> <>
|
46
|
+
INFO bio-gff3: Adding CDS <cds2> <>
|
47
|
+
INFO bio-gff3: Cache calls After reading files = 0 <>
|
48
|
+
INFO bio-gff3: Cache hits After reading files = 0 <>
|
49
|
+
INFO bio-gff3: Cache misses After reading files = 0 <>
|
50
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Transcript:trans-1>
|
51
|
+
WARN bio-gff3: No sequence information for <Transcript:trans-1>
|
52
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:AL12345.2>
|
53
|
+
WARN bio-gff3: No sequence information for <mRNA:trans-8>
|
54
|
+
INFO bio-gff3: find_component: Matched (long search) column 0 and location <Clone:ABC123>
|
55
|
+
WARN bio-gff3: No sequence information for <Misc:thing3>
|
56
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
57
|
+
>cds_short Sequence:test01_1:400 (3:14)
|
58
|
+
GAAGATTTGTAT
|
59
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
60
|
+
>cds1 Sequence:test01_1:400 (164:190, 192:200)
|
61
|
+
TGGCGACTATCGGTCGAAGTTAAGACATTCATGGGC
|
62
|
+
INFO bio-gff3: find_component: Matched seqname <test01>
|
63
|
+
>cds2 Sequence:test01_1:400 (192:200)
|
64
|
+
TTCATGGGC
|
65
|
+
INFO bio-gff3: Cache calls After iterating = 76 <>
|
66
|
+
INFO bio-gff3: Cache hits After iterating = 52 <>
|
67
|
+
INFO bio-gff3: Cache misses After iterating = 24 <>
|
68
|
+
INFO bio-gff3: Memory used Done RAM 10M, VMEM 14M
|
@@ -1,3 +1,4 @@
|
|
1
|
+
INFO bio-gff3: Memory used BaseLine RAM 10M, VMEM 14M
|
1
2
|
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash (NoCache) <>
|
2
3
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
4
|
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
@@ -54,3 +55,4 @@ TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGA
|
|
54
55
|
INFO bio-gff3: find_component: Matched seqname <test01>
|
55
56
|
>mrna01a Sequence:test01_1:400 (101:280)
|
56
57
|
TATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACACCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGATAATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
58
|
+
INFO bio-gff3: Memory used Done RAM 10M, VMEM 14M
|
@@ -1,4 +1,5 @@
|
|
1
|
-
INFO bio-gff3:
|
1
|
+
INFO bio-gff3: Memory used BaseLine RAM 10M, VMEM 14M
|
2
|
+
INFO bio-gff3: ---- Digest DB and store data in mRNA Hash (NoCache) <>
|
2
3
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-1 <>
|
3
4
|
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
4
5
|
INFO bio-gff3: Adding exon <Transcript:trans-1> <>
|
@@ -10,7 +11,6 @@
|
|
10
11
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
11
12
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
12
13
|
INFO bio-gff3: Adding exon <Transcript:trans-2> <>
|
13
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
14
14
|
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 1 2000>
|
15
15
|
INFO bio-gff3: Added Component with component ID Contig2 1 2000 <>
|
16
16
|
WARN bio-gff3: Container <Component> has no ID, so using sequence name instead <Contig2 2001 5000>
|
@@ -21,20 +21,16 @@
|
|
21
21
|
INFO bio-gff3: Added Component with component ID Contig2 2001 37450 <>
|
22
22
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-3 <>
|
23
23
|
INFO bio-gff3: Added transcript with component ID Transcript:trans-4 <>
|
24
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
25
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
26
24
|
INFO bio-gff3: Added Component with component ID Clone:AL12345.2 <>
|
27
25
|
INFO bio-gff3: Adding mRNA <mRNA:trans-8> <>
|
28
26
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
29
27
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
30
28
|
INFO bio-gff3: Adding CDS <mRNA:trans-8> <>
|
31
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
32
29
|
INFO bio-gff3: Added Component with component ID Clone:ABC123 <>
|
33
30
|
INFO bio-gff3: Added gene with component ID Misc:thing1 <>
|
34
31
|
INFO bio-gff3: Adding gene <Misc:thing1> <>
|
35
32
|
INFO bio-gff3: Adding mRNA <Misc:thing2> <>
|
36
33
|
INFO bio-gff3: Adding CDS <Misc:thing3> <>
|
37
|
-
WARN bio-gff3: Record with unknown ID. . . . . . . . .
|
38
34
|
INFO bio-gff3: Added contig with component ID test01 <>
|
39
35
|
INFO bio-gff3: Added gene with component ID gene01 <>
|
40
36
|
INFO bio-gff3: Adding gene <gene01> <>
|
@@ -63,3 +59,4 @@ TGGCGACTATCGGTCGAAGTTAAGACATTCATGGGC
|
|
63
59
|
INFO bio-gff3: find_component: Matched seqname <test01>
|
64
60
|
>cds2 Sequence:test01_1:400 (192:200)
|
65
61
|
TTCATGGGC
|
62
|
+
INFO bio-gff3: Memory used Done RAM 10M, VMEM 14M
|
data/test/test_bio-gff3.rb
CHANGED
@@ -20,7 +20,12 @@ class Gff3Test < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
def test_nocache
|
22
22
|
assert_equal(true,single_run("mRNA --cache none #{DAT}/gff/test-ext-fasta.fa #{DAT}/gff/test-ext-fasta.gff3",this_method+'_ext_gff3'))
|
23
|
-
assert_equal(true,single_run("CDS #{DAT}/gff/test.gff3",this_method+'_gff3'))
|
23
|
+
assert_equal(true,single_run("CDS --cache none #{DAT}/gff/test.gff3",this_method+'_gff3'))
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_lrucache
|
27
|
+
assert_equal(true,single_run("mRNA --cache lru #{DAT}/gff/test-ext-fasta.fa #{DAT}/gff/test-ext-fasta.gff3",this_method+'_ext_gff3'))
|
28
|
+
assert_equal(true,single_run("CDS --cache lru #{DAT}/gff/test.gff3",this_method+'_gff3'))
|
24
29
|
end
|
25
30
|
|
26
31
|
private
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 8
|
8
|
-
-
|
9
|
-
version: 0.8.
|
8
|
+
- 6
|
9
|
+
version: 0.8.6
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Pjotr Prins
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-17 00:00:00 +01:00
|
18
18
|
default_executable: gff3-fetch
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -33,50 +33,50 @@ dependencies:
|
|
33
33
|
prerelease: false
|
34
34
|
version_requirements: *id001
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
36
|
+
name: log4r
|
37
37
|
requirement: &id002 !ruby/object:Gem::Requirement
|
38
38
|
none: false
|
39
39
|
requirements:
|
40
40
|
- - ">"
|
41
41
|
- !ruby/object:Gem::Version
|
42
42
|
segments:
|
43
|
-
-
|
44
|
-
-
|
45
|
-
-
|
46
|
-
version:
|
43
|
+
- 1
|
44
|
+
- 1
|
45
|
+
- 6
|
46
|
+
version: 1.1.6
|
47
47
|
type: :runtime
|
48
48
|
prerelease: false
|
49
49
|
version_requirements: *id002
|
50
50
|
- !ruby/object:Gem::Dependency
|
51
|
-
name:
|
51
|
+
name: bio-logger
|
52
52
|
requirement: &id003 !ruby/object:Gem::Requirement
|
53
53
|
none: false
|
54
54
|
requirements:
|
55
|
-
- - "
|
55
|
+
- - ">"
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
segments:
|
58
58
|
- 0
|
59
|
-
|
60
|
-
|
59
|
+
- 8
|
60
|
+
- 0
|
61
|
+
version: 0.8.0
|
62
|
+
type: :runtime
|
61
63
|
prerelease: false
|
62
64
|
version_requirements: *id003
|
63
65
|
- !ruby/object:Gem::Dependency
|
64
|
-
name:
|
66
|
+
name: shoulda
|
65
67
|
requirement: &id004 !ruby/object:Gem::Requirement
|
66
68
|
none: false
|
67
69
|
requirements:
|
68
|
-
- -
|
70
|
+
- - ">="
|
69
71
|
- !ruby/object:Gem::Version
|
70
72
|
segments:
|
71
|
-
- 1
|
72
|
-
- 0
|
73
73
|
- 0
|
74
|
-
version:
|
74
|
+
version: "0"
|
75
75
|
type: :development
|
76
76
|
prerelease: false
|
77
77
|
version_requirements: *id004
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
79
|
+
name: bundler
|
80
80
|
requirement: &id005 !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
@@ -84,99 +84,55 @@ dependencies:
|
|
84
84
|
- !ruby/object:Gem::Version
|
85
85
|
segments:
|
86
86
|
- 1
|
87
|
-
-
|
88
|
-
-
|
89
|
-
version: 1.
|
87
|
+
- 0
|
88
|
+
- 0
|
89
|
+
version: 1.0.0
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: *id005
|
93
93
|
- !ruby/object:Gem::Dependency
|
94
|
-
name:
|
94
|
+
name: jeweler
|
95
95
|
requirement: &id006 !ruby/object:Gem::Requirement
|
96
96
|
none: false
|
97
97
|
requirements:
|
98
|
-
- -
|
98
|
+
- - ~>
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
segments:
|
101
|
-
-
|
102
|
-
|
101
|
+
- 1
|
102
|
+
- 5
|
103
|
+
- 2
|
104
|
+
version: 1.5.2
|
103
105
|
type: :development
|
104
106
|
prerelease: false
|
105
107
|
version_requirements: *id006
|
106
108
|
- !ruby/object:Gem::Dependency
|
107
|
-
name:
|
109
|
+
name: rcov
|
108
110
|
requirement: &id007 !ruby/object:Gem::Requirement
|
109
111
|
none: false
|
110
112
|
requirements:
|
111
113
|
- - ">="
|
112
114
|
- !ruby/object:Gem::Version
|
113
115
|
segments:
|
114
|
-
- 2
|
115
|
-
- 0
|
116
116
|
- 0
|
117
|
-
version:
|
117
|
+
version: "0"
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: *id007
|
121
121
|
- !ruby/object:Gem::Dependency
|
122
|
-
name:
|
122
|
+
name: rspec
|
123
123
|
requirement: &id008 !ruby/object:Gem::Requirement
|
124
124
|
none: false
|
125
125
|
requirements:
|
126
126
|
- - ">="
|
127
|
-
- !ruby/object:Gem::Version
|
128
|
-
segments:
|
129
|
-
- 1
|
130
|
-
- 4
|
131
|
-
- 1
|
132
|
-
version: 1.4.1
|
133
|
-
type: :runtime
|
134
|
-
prerelease: false
|
135
|
-
version_requirements: *id008
|
136
|
-
- !ruby/object:Gem::Dependency
|
137
|
-
name: log4r
|
138
|
-
requirement: &id009 !ruby/object:Gem::Requirement
|
139
|
-
none: false
|
140
|
-
requirements:
|
141
|
-
- - ">"
|
142
|
-
- !ruby/object:Gem::Version
|
143
|
-
segments:
|
144
|
-
- 1
|
145
|
-
- 1
|
146
|
-
- 6
|
147
|
-
version: 1.1.6
|
148
|
-
type: :runtime
|
149
|
-
prerelease: false
|
150
|
-
version_requirements: *id009
|
151
|
-
- !ruby/object:Gem::Dependency
|
152
|
-
name: bio-logger
|
153
|
-
requirement: &id010 !ruby/object:Gem::Requirement
|
154
|
-
none: false
|
155
|
-
requirements:
|
156
|
-
- - ">="
|
157
|
-
- !ruby/object:Gem::Version
|
158
|
-
segments:
|
159
|
-
- 0
|
160
|
-
- 6
|
161
|
-
- 1
|
162
|
-
version: 0.6.1
|
163
|
-
type: :runtime
|
164
|
-
prerelease: false
|
165
|
-
version_requirements: *id010
|
166
|
-
- !ruby/object:Gem::Dependency
|
167
|
-
name: rspec
|
168
|
-
requirement: &id011 !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
|
-
requirements:
|
171
|
-
- - ">"
|
172
127
|
- !ruby/object:Gem::Version
|
173
128
|
segments:
|
174
129
|
- 2
|
130
|
+
- 3
|
175
131
|
- 0
|
176
|
-
version:
|
132
|
+
version: 2.3.0
|
177
133
|
type: :development
|
178
134
|
prerelease: false
|
179
|
-
version_requirements: *
|
135
|
+
version_requirements: *id008
|
180
136
|
description: |
|
181
137
|
GFF3 (genome browser) information and digest mRNA and CDS sequences.
|
182
138
|
Options for low memory use and caching of records.
|
@@ -200,7 +156,9 @@ files:
|
|
200
156
|
- bin/gff3-fetch
|
201
157
|
- bio-gff3.gemspec
|
202
158
|
- lib/bio-gff3.rb
|
159
|
+
- lib/bio/db/gff/block/gffblockparser.rb
|
203
160
|
- lib/bio/db/gff/digest/gffinmemory.rb
|
161
|
+
- lib/bio/db/gff/digest/gfflrucache.rb
|
204
162
|
- lib/bio/db/gff/digest/gffnocache.rb
|
205
163
|
- lib/bio/db/gff/digest/gffparser.rb
|
206
164
|
- lib/bio/db/gff/file/gfffasta.rb
|
@@ -234,6 +192,8 @@ files:
|
|
234
192
|
- test/data/gff/test.gff3
|
235
193
|
- test/data/regression/test_ext_gff3.rtest
|
236
194
|
- test/data/regression/test_gff3.rtest
|
195
|
+
- test/data/regression/test_lrucache_ext_gff3.rtest
|
196
|
+
- test/data/regression/test_lrucache_gff3.rtest
|
237
197
|
- test/data/regression/test_nocache_ext_gff3.rtest
|
238
198
|
- test/data/regression/test_nocache_gff3.rtest
|
239
199
|
- test/helper.rb
|