bio-gff3 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +14 -0
- data/Gemfile.lock +22 -0
- data/LICENSE.txt +20 -0
- data/README +65 -0
- data/README.rdoc +19 -0
- data/Rakefile +56 -0
- data/VERSION +1 -0
- data/bin/gff3-fetch +99 -0
- data/bio-gff3.gemspec +101 -0
- data/lib/bio-gff3.rb +0 -0
- data/lib/bio/db/gff/gffassemble.rb +300 -0
- data/lib/bio/db/gff/gffdb.rb +40 -0
- data/lib/bio/db/gff/gfffasta.rb +68 -0
- data/lib/bio/db/gff/gfffileiterator.rb +77 -0
- data/lib/bio/db/gff/gffinmemory.rb +63 -0
- data/lib/bio/db/gff/gffnocache.rb +124 -0
- data/lib/bio/db/gff/gffparser.rb +154 -0
- data/lib/bio/system/lruhash.rb +268 -0
- data/spec/gff3_assemble2_spec.rb +73 -0
- data/spec/gff3_assemble3_spec.rb +62 -0
- data/spec/gff3_assemble_spec.rb +291 -0
- data/spec/gff3_fileiterator_spec.rb +43 -0
- data/spec/gffdb_spec.rb +99 -0
- data/test/data/gff/MhA1_Contig1133.fa +2 -0
- data/test/data/gff/MhA1_Contig1133.gff3 +1862 -0
- data/test/data/gff/MhA1_Contig125.fa +673 -0
- data/test/data/gff/MhA1_Contig125.gff3 +2177 -0
- data/test/data/gff/standard.gff3 +25 -0
- data/test/data/gff/test-cds.gff3 +98 -0
- data/test/data/gff/test-ext-fasta.fa +16 -0
- data/test/data/gff/test-ext-fasta.gff3 +57 -0
- data/test/data/gff/test.gff3 +74 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-gff3.rb +7 -0
- metadata +180 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
##gff-version 3 ##sequence-region
|
2
|
+
ctg123 1 1497228
|
3
|
+
ctg123 . gene 1000 9000 . + . ID=gene00001;Name=EDEN
|
4
|
+
ctg123 . TF_binding_site 1000 1012 . + . Parent=gene00001
|
5
|
+
ctg123 . mRNA 1050 9000 . + . ID=mRNA00001;Parent=gene00001
|
6
|
+
ctg123 . mRNA 1050 9000 . + . ID=mRNA00002;Parent=gene00001
|
7
|
+
ctg123 . mRNA 1300 9000 . + . ID=mRNA00003;Parent=gene00001
|
8
|
+
ctg123 . exon 1300 1500 . + . Parent=mRNA00003
|
9
|
+
ctg123 . exon 1050 1500 . + . Parent=mRNA00001,mRNA00002
|
10
|
+
ctg123 . exon 3000 3902 . + . Parent=mRNA00001,mRNA00003
|
11
|
+
ctg123 . exon 5000 5500 . + . Parent=mRNA00001,mRNA00002,mRNA00003
|
12
|
+
ctg123 . exon 7000 9000 . + . Parent=mRNA00001,mRNA00002,mRNA00003
|
13
|
+
ctg123 . CDS 1201 1500 . + 0 ID=cds00001;Parent=mRNA00001
|
14
|
+
ctg123 . CDS 3000 3902 . + 0 ID=cds00001;Parent=mRNA00001
|
15
|
+
ctg123 . CDS 5000 5500 . + 0 ID=cds00001;Parent=mRNA00001
|
16
|
+
ctg123 . CDS 7000 7600 . + 0 ID=cds00001;Parent=mRNA00001
|
17
|
+
ctg123 . CDS 1201 1500 . + 0 ID=cds00002;Parent=mRNA00002
|
18
|
+
ctg123 . CDS 5000 5500 . + 0 ID=cds00002;Parent=mRNA00002
|
19
|
+
ctg123 . CDS 7000 7600 . + 0 ID=cds00002;Parent=mRNA00002
|
20
|
+
ctg123 . CDS 3301 3902 . + 0 ID=cds00003;Parent=mRNA00003
|
21
|
+
ctg123 . CDS 5000 5500 . + 1 ID=cds00003;Parent=mRNA00003
|
22
|
+
ctg123 . CDS 7000 7600 . + 1 ID=cds00003;Parent=mRNA00003
|
23
|
+
ctg123 . CDS 3391 3902 . + 0 ID=cds00004;Parent=mRNA00003
|
24
|
+
ctg123 . CDS 5000 5500 . + 1 ID=cds00004;Parent=mRNA00003
|
25
|
+
ctg123 . CDS 7000 7600 . + 1 ID=cds00004;Parent=mRNA00003
|
@@ -0,0 +1,98 @@
|
|
1
|
+
##gff-version 3 ##sequence-regio
|
2
|
+
# Gene gene:MhA1_Contig1040.frz3.gene29
|
3
|
+
MhA1_Contig1040 WormBase gene 1 182 . - . ID=gene:MhA1_Contig1040.frz3.gene29;Name=MhA1_Contig1040.frz3.gene29;Note=PREDICTED protein_coding;public_name=MhA1_Contig1040.frz3.gene29
|
4
|
+
MhA1_Contig1040 WormBase mRNA 1 182 . - . ID=transcript:MhA1_Contig1040.
|
5
|
+
frz3.gene29;Parent=gene:MhA1_Contig1040.frz3.gene29;Name=MhA1_Contig1040.frz3.ge
|
6
|
+
ne29;public_name=MhA1_Contig1040.frz3.gene29
|
7
|
+
MhA1_Contig1040 WormBase exon 1 182 . - . ID=exon:MhA1_Contig1040.frz3.g
|
8
|
+
ene29.1;Parent=transcript:MhA1_Contig1040.frz3.gene29
|
9
|
+
MhA1_Contig1040 WormBase CDS 1 180 . - 2 ID=cds:MhA1_Contig1040.frz3.gene
|
10
|
+
29;Parent=transcript:MhA1_Contig1040.frz3.gene29
|
11
|
+
|
12
|
+
##gff-version 3 ##sequence-regio
|
13
|
+
# Gene gene:MhA1_Contig2992.frz3.gene1
|
14
|
+
MhA1_Contig2992 WormBase gene 577 2176 . - . ID=gene:MhA1_Contig2992.frz3.gene1;Name=MhA1_Contig2992.frz3.gene1;Note=PREDICTED protein_coding;public_name=MhA1_Contig2992.frz3.gene1
|
15
|
+
MhA1_Contig2992 WormBase mRNA 577 2176 . - . ID=transcript:MhA1_Contig2992.frz3.gene1;Parent=gene:MhA1_Contig2992.frz3.gene1;Name=MhA1_Contig2992.frz3.gene1;public_name=MhA1_Contig2992.frz3.gene1
|
16
|
+
MhA1_Contig2992 WormBase exon 2078 2176 . - . ID=exon:MhA1_Contig2992.frz3.gene1.1;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
17
|
+
MhA1_Contig2992 WormBase exon 1692 1944 . - . ID=exon:MhA1_Contig2992.frz3.gene1.2;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
18
|
+
MhA1_Contig2992 WormBase exon 1439 1587 . - . ID=exon:MhA1_Contig2992.frz3.gene1.3;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
19
|
+
MhA1_Contig2992 WormBase exon 1096 1241 . - . ID=exon:MhA1_Contig2992.frz3.gene1.4;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
20
|
+
MhA1_Contig2992 WormBase exon 927 1049 . - . ID=exon:MhA1_Contig2992.frz3.gene1.5;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
21
|
+
MhA1_Contig2992 WormBase exon 577 732 . - . ID=exon:MhA1_Contig2992.frz3.gene1.6;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
22
|
+
MhA1_Contig2992 WormBase CDS 2078 2174 . - 2 ID=cds:MhA1_Contig2992.frz3.gene1;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
23
|
+
MhA1_Contig2992 WormBase CDS 1692 1944 . - 2 ID=cds:MhA1_Contig2992.frz3.gene1;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
24
|
+
MhA1_Contig2992 WormBase CDS 1439 1587 . - 1 ID=cds:MhA1_Contig2992.frz3.gene1;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
25
|
+
MhA1_Contig2992 WormBase CDS 1096 1241 . - 2 ID=cds:MhA1_Contig2992.frz3.gene1;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
26
|
+
MhA1_Contig2992 WormBase CDS 927 1049 . - 0 ID=cds:MhA1_Contig2992.frz3.gene1;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
27
|
+
MhA1_Contig2992 WormBase CDS 577 732 . - 0 ID=cds:MhA1_Contig2992.frz3.gene1;Parent=transcript:MhA1_Contig2992.frz3.gene1
|
28
|
+
MhA1_Contig2992 blat_nembase_ests nucleotide_match 1778 2214 45.29 - . ID=blat_nembase_ests.1828564;Name=MJC03471;Target=MJC03471 88 372;Gap=30M29D31I66M165D11I27M18I33M19D22I10M21D37M
|
29
|
+
MhA1_Contig2992 blat_washu_ests nucleotide_match 1529 2164 36.82 - . ID=blat_washu_ests.2375120;Name=MJ04126;Target=MJ04126 159 530;Gap=80M159D5I24M56D77I12M21D38M3I48M131D18I67M
|
30
|
+
MhA1_Contig2992 blat_washu_ests nucleotide_match 1778 2214 34.08 - . ID=blat_washu_ests.2425103;Name=MJ04126;Target=MJ04126 106 391;Gap=59M3I66M165D11I27M18I33M19D22I10M21D37M
|
31
|
+
MhA1_Contig2992 TRF repeat_region 83 147 102 - . ID=TRF.57974
|
32
|
+
MhA1_Contig2992 RepeatMask repeat_region 169 404 2011 - . ID=RepeatMask.324631
|
33
|
+
MhA1_Contig2992 Dust repeat_region 397 405 . - . ID=Dust.42101
|
34
|
+
MhA1_Contig2992 RepeatMask repeat_region 405 565 35 + . ID=RepeatMask.324632
|
35
|
+
MhA1_Contig2992 Dust repeat_region 493 565 . - . ID=Dust.42102
|
36
|
+
MhA1_Contig2992 RepeatMask repeat_region 1243 1266 24 + . ID=RepeatMask.324633
|
37
|
+
MhA1_Contig2992 RepeatMask repeat_region 1695 1834 200 + . ID=RepeatMask.324634
|
38
|
+
MhA1_Contig2992 Dust repeat_region 1709 1795 . - . ID=Dust.42103
|
39
|
+
MhA1_Contig2992 Dust repeat_region 1881 1931 . - . ID=Dust.42104
|
40
|
+
MhA1_Contig2992 Dust repeat_region 1944 2071 . - . ID=Dust.42105
|
41
|
+
MhA1_Contig2992 RepeatMask repeat_region 1946 2076 45 + . ID=RepeatMask.324635
|
42
|
+
MhA1_Contig2992 RepeatMask repeat_region 2136 2257 66 + . ID=RepeatMask.324636
|
43
|
+
MhA1_Contig2992 Dust repeat_region 2159 2281 . - . ID=Dust.42107
|
44
|
+
MhA1_Contig2992 TRF repeat_region 2305 2364 76 - . ID=TRF.57978
|
45
|
+
MhA1_Contig2992 Dust repeat_region 2319 2380 . - . ID=Dust.42109
|
46
|
+
MhA1_Contig2992 RepeatMask repeat_region 2333 2437 42 + . ID=RepeatMask.324637
|
47
|
+
MhA1_Contig2992 Dust repeat_region 2395 2437 . - . ID=Dust.42110
|
48
|
+
MhA1_Contig2992 TRF repeat_region 2688 2730 56 - . ID=TRF.57984
|
49
|
+
MhA1_Contig2992 RepeatMask repeat_region 2957 3019 28 + . ID=RepeatMask.324638
|
50
|
+
MhA1_Contig2992 Dust repeat_region 2959 3019 . - . ID=Dust.42112
|
51
|
+
MhA1_Contig2992 RepeatMask repeat_region 3194 3237 23 + . ID=RepeatMask.324639
|
52
|
+
MhA1_Contig2992 Dust repeat_region 3222 3277 . - . ID=Dust.42114
|
53
|
+
##FASTA
|
54
|
+
>MhA1_Contig2992
|
55
|
+
TTTTGGTGACCAAAGTTCCTATTGGTGACCAAAATTCCAGTGCCCAATATTCCGTTTTTTGACTTGGTGACCAAAATTCC
|
56
|
+
GCTGGTGACCAAAATTCCAAAAAATTGGTGACCAAAGTTCCGAAAAATCTTGGTGACCAAAATTCCGGTGACCAAAATTC
|
57
|
+
TGGGACTCCTCAGGTTCGATGCCTGGCGGCAGCTGGTGGCCGGTTTAGTGTAGTCCCTGTATAGCACTTACACAGGTGGC
|
58
|
+
ACGCCCTGAGTGGGGAGGCAATTGGGTCTAGCGTGCTTGTAAATACCGAGCCGGCAAAAGGTATTGACACATCCACTAAC
|
59
|
+
AAGTATATGTAAATCCTTAACACTCCCCCTCCACATGTAAGTGCCTAAAAGCCTCTGTGGTTGATTTAATTACACCAAAA
|
60
|
+
AAAAATTATAAATTTTATTTTCTTAAACTTTTGTTTTATGATTTAATTAATTTATAACACATCATATTATTTAACCAGTT
|
61
|
+
TTCATAATTAGTAAATAATTTTTCATTCAATAAATTTCATGTAATTTAGAAATAAGAAAAATTTTAAAAAATTATAATTT
|
62
|
+
AAAAACGATTATTATTCTATGTAAGATTTGGCGGTATATTTCCATCTAATGGTCCAATAACAGATATTTGTCTTTGTAAT
|
63
|
+
TCCTGACGAATACTATCGTCTGTACCGCCGAGGATTTGTTCAATTTGAAGAATTAAAGCTCTTTCAGCAATATCAGGATT
|
64
|
+
ATTCAGAAGATCCTTTAAACTTTGAATTATTTGTAAATAAATGTAGGGATACTAAATTATTTTTATTAAATAGAGTTATT
|
65
|
+
TTACAGAAAAATATTCTATGAGGCAAAATAAAAGCAATATTTATTATTAAACTATCAAGCAATAAATTCTTTCCTTGACG
|
66
|
+
AAAATTTTTGACTTAAAAAAATCAGGCCTTAACAATTAATTATTACCTGAATTCTTTTATCAACATTCTGTGCATTTCTG
|
67
|
+
CTTAATTTAATTTGAACATTTCGTTGACGTTCACTTTTAAATTGTTTTTCTTTTTGTTGTAAAGATTGTTGAAATTGTTT
|
68
|
+
ATAAAGATCCTCAAAATAGAACAAATTTAATTGTTAACTCTAAAAATAAACAAACAGAAATTCTCTGCCCTTGTTGATCA
|
69
|
+
GCCCAATTCTGTTTATCTCTTCGTATAGAAGACTTGGACCTGTCATTTTCTGCTTCAAGTTGAAGAAATTTCTGACGCTC
|
70
|
+
TGAGAGACTGGCCGTTTTCAAAAATTCTGGTAATTGATCATCTAAATAAAATTTAAATATTAATTTGTTTCAACATTTAT
|
71
|
+
CCTTATATTTCTCAAATTTTCATTTAAATAACTTTCTTAATTCCAGATAACCCCAAATTTGTAATGTCTTTGTCATTCGA
|
72
|
+
CATAGAAAAACGAATCCATAAATTGGGCGAAGGATCCTTAAAATTTTCACGCCACGAAAATAAAAATATAAAGATTACCA
|
73
|
+
TTCGATCTTTTTAAATTATTTGTTCCATCTAATGTGCCGCTATCTAAGCCTCTTTGTGATTTATCATTATTGCCAGAATC
|
74
|
+
AAAACTATCACGTTGATTATTTCTGTCTTGATTCTGCTCATCAAACCTTTCAGAAGCCTTATTTCTTCTAAATTGTTAAA
|
75
|
+
TATTGAAATACTTTTTCGATCTCTAAGTATTATCGAAAATTAAAATATACAGTGATTTATTATAATAAAAGAACATAATT
|
76
|
+
TGATAACATACTCATTTTTATCCCTTCCTTTATTCCTTCCTAATTCATACTTTTCTATCATTCCTTTTTTCCCTTTCTTT
|
77
|
+
TTTCTATTATCCTCTTCTTTTTCATATTCACTTTCATTATCCCCAAAATTACTTATTTGTTTTTTGCTTTCTTTTGAAGG
|
78
|
+
TATTTCATTATTATCAAAACCTTCACTTTGAATATCCAACTCTTTATTTCCTTCTTTTTGATTTTTACCGAATTTTTTGT
|
79
|
+
TGCCTTTTTTTGTTGCTATATTGTCTAAATAAAATAATGTTAAAATCTAATTTAAATGTTAAAATTTAATTTGTGATTTT
|
80
|
+
TTTCAATTTTCGGCTTATAAAAATCTTTTTTTAAAAAACAAAATTTTAATTAAATAAGTTTTAATAAAAATATTTACCTT
|
81
|
+
CATTTTGTTCAGAATTAATTCCATCATTAAACAAATTTTCATTCGAATCAGATTGAATATTAAAAGAATTATCATTTATT
|
82
|
+
TTTTTATTAATTTTAGCTTTGATAATTAATAAATAAAAAGTAAATAATAATAATTTTATTAATATTTTCATATTAATTTT
|
83
|
+
AGAATTTTATTTATTTTGACAGTTAATAAGTATTATTATTAGAAAACTTGCCAATAATTGATTTCTAAATTTAAATGGAT
|
84
|
+
TTTTGTTTCAGCTAAATTTAAATGTTTTATTTCATTTAAATTTAGAAATTTATTATGTTTGTATAGCATTATTCAATAAT
|
85
|
+
TTTTAATAAATAAATTTAAATTTTTTGAATAAAAAATCGAGAAAACCTAAATTATATATTAAGAATTTCTAAATTTCTCG
|
86
|
+
AAGATATTTACAAAACAACTTGCAAAGTGATATTTTTTCTAAACATGTCAAAAGCCCTAATTATTACATGCATTAAAGCA
|
87
|
+
AAAATTTTCTTTTTAATATTTGAAATAAATTCCAATCAAAGGATTTACGATAATCAAGAAATAATATAGTTCATATATTC
|
88
|
+
TAATAAAGTCTTTAATATAATATACCTAGTCTTTAGAAGTAGAATTCAGAAAAGTTTATAAAGAAGAGAAATAGTTTAAA
|
89
|
+
GGAAAGAAAAAATCATTCAAAAATTGTTTTCTTATCATATATTCTTTACATCTTTTGAAGTTTGTGTATAAACCAAATTG
|
90
|
+
TAGAATATATTGAAAATAAAATCACTCCAATAATAATTAACAAAATACATAAAAAATGGCGTGGTTGACAACCGCATTTT
|
91
|
+
CTCTCAATATCGAAGCCACGTTTGCGGCGATTTTTATTTTGAAGAGGAAAATCTGATATTTATTGTATAGATTTAGTTAT
|
92
|
+
TTAATAATTAAATTTCAAAAATTACTTGTCTTATTTAATCTTTTATTTATTTTATTTTTGCATTCAAAAGAAGGAATCTC
|
93
|
+
AAATTTTAAAGTAAATGCTTTCCAAATTGAAATAGCCATATCAAAAGATCCTTCATTTATTAAAGGTTCTTTTCTCACAA
|
94
|
+
TAATATTCTCCATTTCTTTCCAATCAAAATTATTCTTTATTTCAAAACTATTCCAAAAATTATCCAAAATTCCAATTAAA
|
95
|
+
TATTTTTTGTTAAATAAAAGGTTTAAATTAATTATTTGTGCTTTTTCGAATTTTTCATTTAAATCCTTTATTTTTTTGAA
|
96
|
+
ATTATCATAAAGCTCTAATGATGCTTTTTGAATTTTTGAGACATTTTCAATATCAAAATTTGGTCCGGAAAATTTATTTA
|
97
|
+
>MhA1_Contig1040
|
98
|
+
TTAATTAATTTGCCTAGAAAAACAAAGGCATAACATGCTTGCAGTCATCATACGGTAAGAGAGAAACCAACGATATGTTAATAATGTTGATGGGGGAATATCCTCATTAGAATTCTTTTTTGGGTGAATTGAAATTGCCATATTATTAGTATTATTAGAAAATATTAAATTTGTTGATAAAC
|
@@ -0,0 +1,16 @@
|
|
1
|
+
>test01
|
2
|
+
ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
|
3
|
+
GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
|
4
|
+
CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
|
5
|
+
AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
6
|
+
GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
|
7
|
+
CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
|
8
|
+
>test02
|
9
|
+
ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
|
10
|
+
GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
|
11
|
+
CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
|
12
|
+
AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
13
|
+
GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
|
14
|
+
CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
##gff-version 3
|
2
|
+
## sequence-region Contig1 1 37450
|
3
|
+
Contig1 confirmed transcript 1001 2000 42 + . ID=Transcript:trans-1;Gene=abc-1;Gene=xyz-2;Note=function+unknown
|
4
|
+
Contig1 confirmed exon 1001 1100 . + . ID=Transcript:trans-1
|
5
|
+
Contig1 confirmed exon 1201 1300 . + . ID=Transcript:trans-1
|
6
|
+
Contig1 confirmed exon 1401 1450 . + . ID=Transcript:trans-1
|
7
|
+
Contig1 confirmed CDS 1051 1100 . + 0 ID=Transcript:trans-1
|
8
|
+
Contig1 confirmed CDS 1201 1300 . + 2 ID=Transcript:trans-1
|
9
|
+
Contig1 confirmed CDS 1401 1440 . + 0 ID=Transcript:trans-1
|
10
|
+
Contig1 est similarity 1001 1100 96 . . Target=EST:CEESC13F 1 100 +
|
11
|
+
Contig1 est similarity 1201 1300 99 . . Target=EST:CEESC13F 101 200 +
|
12
|
+
Contig1 est similarity 1401 1450 99 . . Target=EST:CEESC13F 201 250 +
|
13
|
+
Contig1 tc1 transposon 5001 6000 . + . ID=Transposon:c128.1
|
14
|
+
Contig1 tc1 transposon 8001 9000 . - . ID=Transposon:c128.2
|
15
|
+
Contig1 confirmed transcript 30001 31000 . - . ID=Transcript:trans-2;Gene=xyz-2;Note=Terribly+interesting
|
16
|
+
Contig1 confirmed exon 30001 30100 . - . ID=Transcript:trans-2;Gene=abc-1;Note=function+unknown
|
17
|
+
Contig1 confirmed exon 30701 30800 . - . ID=Transcript:trans-2
|
18
|
+
Contig1 confirmed exon 30801 31000 . - . ID=Transcript:trans-2
|
19
|
+
|
20
|
+
## sequence-region Contig2 1 37450
|
21
|
+
Contig2 clone Component 1 2000 . . . Target=Clone:AL12345.1 1 2000 +;Note=Terribly+interesting
|
22
|
+
Contig2 clone Component 2001 5000 . . . Target=Clone:AL11111.1 6000 3001 +
|
23
|
+
Contig2 clone Component 5001 20000 . . . Target=Clone:AC13221.2 1 15000 +
|
24
|
+
Contig2 clone Component 2001 37450 . . . Target=Clone:M7.3 1001 36450 +
|
25
|
+
Contig2 predicted transcript 2501 4500 . + . ID=Transcript:trans-3;Alias=trans-18
|
26
|
+
Contig2 predicted transcript 5001 8001 . - . ID=Transcript:trans-4
|
27
|
+
|
28
|
+
|
29
|
+
#processed_transcript
|
30
|
+
Contig3 clone Component 1 50000 . . . ID=Clone:AL12345.2
|
31
|
+
Contig3 confirmed mRNA 32000 35000 . + . ID=mRNA:trans-8
|
32
|
+
Contig3 confirmed UTR 32000 32100 . + . ID=mRNA:trans-8
|
33
|
+
Contig3 confirmed CDS 32101 33000 . + . ID=mRNA:trans-8
|
34
|
+
Contig3 confirmed CDS 34000 34500 . + . ID=mRNA:trans-8
|
35
|
+
Contig3 confirmed CDS 34600 34900 . + . ID=mRNA:trans-8
|
36
|
+
Contig3 confirmed UTR 34901 35000 . + . ID=mRNA:trans-8
|
37
|
+
|
38
|
+
## preferred group assignments
|
39
|
+
Contig4 clone Component 1 50000 . . . ID=Clone:ABC123
|
40
|
+
Contig4 confirmed gene 32000 35000 . + . ID=Misc:thing1;gene=gene-9
|
41
|
+
Contig4 confirmed mRNA 32000 35000 . + . ID=Misc:thing2;mRNA=trans-9;gene=gene-9
|
42
|
+
Contig4 confirmed CDS 32000 35000 . + . ID=Misc:thing3;mRNA=trans-9
|
43
|
+
|
44
|
+
##gff-version 3
|
45
|
+
##sequence-region test01 1 400
|
46
|
+
test01 RANDOM contig 1 400 . + . ID=test01;Note=this is test
|
47
|
+
test01 . mRNA 3 14 . + . ID=mrna01short;Name=testmRNA;Note=this is test mRNA
|
48
|
+
test01 . mRNA 101 230 . + . ID=mrna01;Name=testmRNA;Note=this is test mRNA
|
49
|
+
test01 . mRNA 101 280 . + . ID=mrna01a;Name=testmRNAalterative;Note=test of alternative splicing variant
|
50
|
+
test01 . exon 101 160 . + . ID=exon01;Name=exon01;Alias=exon 1;Parent=mrna01,mrna01a
|
51
|
+
test01 . exon 201 230 . + . ID=exon02;Name=exon02;Alias=exon 2;Parent=mrna01
|
52
|
+
test01 . exon 251 280 . + . ID=exon02a;Name=exon02a;Alias=exon 2a;Parent=mrna01a
|
53
|
+
test01 . CDS 3 14 . + . ID=cds_short;Name=testmRNA;Parent=mrna01short;Note=this is test mRNA
|
54
|
+
test01 . CDS 192 200 . + . ID=cds1;Parent=mrna01a
|
55
|
+
test01 . CDS 164 190 . + . ID=cds1;Parent=mrna01a
|
56
|
+
test01 . CDS 192 200 . + . ID=cds2;Parent=mrna01a
|
57
|
+
test01 . Match 101 123 . . . ID=match01;Name=match01;Target=EST101 1 21;Gap=M8 D3 M6 I1 M6
|
@@ -0,0 +1,74 @@
|
|
1
|
+
##gff-version 3
|
2
|
+
## sequence-region Contig1 1 37450
|
3
|
+
Contig1 confirmed transcript 1001 2000 42 + . ID=Transcript:trans-1;Gene=abc-1;Gene=xyz-2;Note=function+unknown
|
4
|
+
Contig1 confirmed exon 1001 1100 . + . ID=Transcript:trans-1
|
5
|
+
Contig1 confirmed exon 1201 1300 . + . ID=Transcript:trans-1
|
6
|
+
Contig1 confirmed exon 1401 1450 . + . ID=Transcript:trans-1
|
7
|
+
Contig1 confirmed CDS 1051 1100 . + 0 ID=Transcript:trans-1
|
8
|
+
Contig1 confirmed CDS 1201 1300 . + 2 ID=Transcript:trans-1
|
9
|
+
Contig1 confirmed CDS 1401 1440 . + 0 ID=Transcript:trans-1
|
10
|
+
Contig1 est similarity 1001 1100 96 . . Target=EST:CEESC13F 1 100 +
|
11
|
+
Contig1 est similarity 1201 1300 99 . . Target=EST:CEESC13F 101 200 +
|
12
|
+
Contig1 est similarity 1401 1450 99 . . Target=EST:CEESC13F 201 250 +
|
13
|
+
Contig1 tc1 transposon 5001 6000 . + . ID=Transposon:c128.1
|
14
|
+
Contig1 tc1 transposon 8001 9000 . - . ID=Transposon:c128.2
|
15
|
+
Contig1 confirmed transcript 30001 31000 . - . ID=Transcript:trans-2;Gene=xyz-2;Note=Terribly+interesting
|
16
|
+
Contig1 confirmed exon 30001 30100 . - . ID=Transcript:trans-2;Gene=abc-1;Note=function+unknown
|
17
|
+
Contig1 confirmed exon 30701 30800 . - . ID=Transcript:trans-2
|
18
|
+
Contig1 confirmed exon 30801 31000 . - . ID=Transcript:trans-2
|
19
|
+
|
20
|
+
## sequence-region Contig2 1 37450
|
21
|
+
Contig2 clone Component 1 2000 . . . Target=Clone:AL12345.1 1 2000 +;Note=Terribly+interesting
|
22
|
+
Contig2 clone Component 2001 5000 . . . Target=Clone:AL11111.1 6000 3001 +
|
23
|
+
Contig2 clone Component 5001 20000 . . . Target=Clone:AC13221.2 1 15000 +
|
24
|
+
Contig2 clone Component 2001 37450 . . . Target=Clone:M7.3 1001 36450 +
|
25
|
+
Contig2 predicted transcript 2501 4500 . + . ID=Transcript:trans-3;Alias=trans-18
|
26
|
+
Contig2 predicted transcript 5001 8001 . - . ID=Transcript:trans-4
|
27
|
+
|
28
|
+
|
29
|
+
#processed_transcript
|
30
|
+
Contig3 clone Component 1 50000 . . . ID=Clone:AL12345.2
|
31
|
+
Contig3 confirmed mRNA 32000 35000 . + . ID=mRNA:trans-8
|
32
|
+
Contig3 confirmed UTR 32000 32100 . + . ID=mRNA:trans-8
|
33
|
+
Contig3 confirmed CDS 32101 33000 . + . ID=mRNA:trans-8
|
34
|
+
Contig3 confirmed CDS 34000 34500 . + . ID=mRNA:trans-8
|
35
|
+
Contig3 confirmed CDS 34600 34900 . + . ID=mRNA:trans-8
|
36
|
+
Contig3 confirmed UTR 34901 35000 . + . ID=mRNA:trans-8
|
37
|
+
|
38
|
+
## preferred group assignments
|
39
|
+
Contig4 clone Component 1 50000 . . . ID=Clone:ABC123
|
40
|
+
Contig4 confirmed gene 32000 35000 . + . ID=Misc:thing1;gene=gene-9
|
41
|
+
Contig4 confirmed mRNA 32000 35000 . + . ID=Misc:thing2;mRNA=trans-9;gene=gene-9
|
42
|
+
Contig4 confirmed CDS 32000 35000 . + . ID=Misc:thing3;mRNA=trans-9
|
43
|
+
|
44
|
+
##gff-version 3
|
45
|
+
##sequence-region test01 1 400
|
46
|
+
test01 RANDOM contig 1 400 . + . ID=test01;Note=this is test
|
47
|
+
test01 . mRNA 3 14 . + . ID=mrna01short;Name=testmRNA;Note=this is test mRNA
|
48
|
+
test01 . mRNA 101 230 . + . ID=mrna01;Name=testmRNA;Note=this is test mRNA
|
49
|
+
test01 . mRNA 101 280 . + . ID=mrna01a;Name=testmRNAalterative;Note=test of alternative splicing variant
|
50
|
+
test01 . exon 101 160 . + . ID=exon01;Name=exon01;Alias=exon 1;Parent=mrna01,mrna01a
|
51
|
+
test01 . exon 201 230 . + . ID=exon02;Name=exon02;Alias=exon 2;Parent=mrna01
|
52
|
+
test01 . exon 251 280 . + . ID=exon02a;Name=exon02a;Alias=exon 2a;Parent=mrna01a
|
53
|
+
test01 . CDS 3 14 . + . ID=cds_short;Name=testmRNA;Parent=mrna01short;Note=this is test mRNA
|
54
|
+
test01 . CDS 192 200 . + . ID=cds1;Parent=mrna01a
|
55
|
+
test01 . CDS 164 190 . + . ID=cds1;Parent=mrna01a
|
56
|
+
test01 . CDS 192 200 . + . ID=cds2;Parent=mrna01a
|
57
|
+
test01 . Match 101 123 . . . ID=match01;Name=match01;Target=EST101 1 21;Gap=M8 D3 M6 I1 M6
|
58
|
+
##FASTA
|
59
|
+
>test01
|
60
|
+
ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
|
61
|
+
GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
|
62
|
+
CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
|
63
|
+
AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
64
|
+
GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
|
65
|
+
CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
|
66
|
+
>test02
|
67
|
+
ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
|
68
|
+
GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
|
69
|
+
CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
|
70
|
+
AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
71
|
+
GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
|
72
|
+
CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
|
73
|
+
|
74
|
+
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gff3'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
metadata
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-gff3
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 6
|
8
|
+
- 0
|
9
|
+
version: 0.6.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Pjotr Prins
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-29 00:00:00 +01:00
|
18
|
+
default_executable: gff3-fetch
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: shoulda
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :development
|
31
|
+
prerelease: false
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: bundler
|
35
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
segments:
|
41
|
+
- 1
|
42
|
+
- 0
|
43
|
+
- 0
|
44
|
+
version: 1.0.0
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: jeweler
|
50
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 1
|
57
|
+
- 5
|
58
|
+
- 2
|
59
|
+
version: 1.5.2
|
60
|
+
type: :development
|
61
|
+
prerelease: false
|
62
|
+
version_requirements: *id003
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: rcov
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
type: :development
|
74
|
+
prerelease: false
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: bio
|
78
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
79
|
+
none: false
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
segments:
|
84
|
+
- 1
|
85
|
+
- 4
|
86
|
+
- 1
|
87
|
+
version: 1.4.1
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *id005
|
91
|
+
description: |
|
92
|
+
GFF3 (genome browser) information and digest mRNA and CDS sequences.
|
93
|
+
Options for low memory use and caching of records.
|
94
|
+
Support for external FASTA files.
|
95
|
+
|
96
|
+
email: pjotr.prins@thebird.nl
|
97
|
+
executables:
|
98
|
+
- gff3-fetch
|
99
|
+
extensions: []
|
100
|
+
|
101
|
+
extra_rdoc_files:
|
102
|
+
- LICENSE.txt
|
103
|
+
- README
|
104
|
+
- README.rdoc
|
105
|
+
files:
|
106
|
+
- Gemfile
|
107
|
+
- Gemfile.lock
|
108
|
+
- LICENSE.txt
|
109
|
+
- README
|
110
|
+
- README.rdoc
|
111
|
+
- Rakefile
|
112
|
+
- VERSION
|
113
|
+
- bin/gff3-fetch
|
114
|
+
- bio-gff3.gemspec
|
115
|
+
- lib/bio-gff3.rb
|
116
|
+
- lib/bio/db/gff/gffassemble.rb
|
117
|
+
- lib/bio/db/gff/gffdb.rb
|
118
|
+
- lib/bio/db/gff/gfffasta.rb
|
119
|
+
- lib/bio/db/gff/gfffileiterator.rb
|
120
|
+
- lib/bio/db/gff/gffinmemory.rb
|
121
|
+
- lib/bio/db/gff/gffnocache.rb
|
122
|
+
- lib/bio/db/gff/gffparser.rb
|
123
|
+
- lib/bio/system/lruhash.rb
|
124
|
+
- spec/gff3_assemble2_spec.rb
|
125
|
+
- spec/gff3_assemble3_spec.rb
|
126
|
+
- spec/gff3_assemble_spec.rb
|
127
|
+
- spec/gff3_fileiterator_spec.rb
|
128
|
+
- spec/gffdb_spec.rb
|
129
|
+
- test/data/gff/MhA1_Contig1133.fa
|
130
|
+
- test/data/gff/MhA1_Contig1133.gff3
|
131
|
+
- test/data/gff/MhA1_Contig125.fa
|
132
|
+
- test/data/gff/MhA1_Contig125.gff3
|
133
|
+
- test/data/gff/standard.gff3
|
134
|
+
- test/data/gff/test-cds.gff3
|
135
|
+
- test/data/gff/test-ext-fasta.fa
|
136
|
+
- test/data/gff/test-ext-fasta.gff3
|
137
|
+
- test/data/gff/test.gff3
|
138
|
+
- test/helper.rb
|
139
|
+
- test/test_bio-gff3.rb
|
140
|
+
has_rdoc: true
|
141
|
+
homepage: http://github.com/pjotrp/bioruby-gff3
|
142
|
+
licenses:
|
143
|
+
- MIT
|
144
|
+
post_install_message:
|
145
|
+
rdoc_options: []
|
146
|
+
|
147
|
+
require_paths:
|
148
|
+
- lib
|
149
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
150
|
+
none: false
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
hash: -266764915
|
155
|
+
segments:
|
156
|
+
- 0
|
157
|
+
version: "0"
|
158
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
159
|
+
none: false
|
160
|
+
requirements:
|
161
|
+
- - ">="
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
segments:
|
164
|
+
- 0
|
165
|
+
version: "0"
|
166
|
+
requirements: []
|
167
|
+
|
168
|
+
rubyforge_project:
|
169
|
+
rubygems_version: 1.3.7
|
170
|
+
signing_key:
|
171
|
+
specification_version: 3
|
172
|
+
summary: BioRuby GFF3 plugin for big data
|
173
|
+
test_files:
|
174
|
+
- spec/gff3_assemble2_spec.rb
|
175
|
+
- spec/gff3_assemble3_spec.rb
|
176
|
+
- spec/gff3_assemble_spec.rb
|
177
|
+
- spec/gff3_fileiterator_spec.rb
|
178
|
+
- spec/gffdb_spec.rb
|
179
|
+
- test/helper.rb
|
180
|
+
- test/test_bio-gff3.rb
|