reubypathdb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Ben J Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = eupathdb
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 Ben J Woodcroft. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "reubypathdb"
8
+ gem.summary = %Q{Classes to help parsing EuPathDB data files}
9
+ gem.description = %Q{Classes to help parsing EuPathDB data files}
10
+ gem.email = "donttrustben near gmail.com"
11
+ gem.homepage = "http://github.com/wwood/reubypathdb"
12
+ gem.authors = ["Ben J Woodcroft"]
13
+ gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "reubypathdb #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,103 @@
1
+
2
+ class EuPathDBGeneInformationTable
3
+ include Enumerable
4
+
5
+ def initialize(io)
6
+ @io = io
7
+ end
8
+
9
+ def each
10
+ while g = next_gene
11
+ yield g
12
+ end
13
+ end
14
+
15
+ # Returns a EuPathDBGeneInformation object with all the data you could
16
+ # possibly want.
17
+ def next_gene
18
+ info = EuPathDBGeneInformation.new
19
+
20
+ # first, read the table, which should start with the ID column
21
+ line = @io.readline.strip
22
+ while line == ''
23
+ return nil if @io.eof?
24
+ line = @io.readline.strip
25
+ end
26
+
27
+ while line != ''
28
+ if matches = line.match(/^(.*?)\: (.*)$/)
29
+ info.add_information(matches[1], matches[2])
30
+ else
31
+ raise Exception, "EuPathDBGeneInformationTable Couldn't parse this line: #{line}"
32
+ end
33
+
34
+ line = @io.readline.strip
35
+ end
36
+
37
+ # now read each of the tables, which should start with the
38
+ # 'TABLE: <name>' entry
39
+ line = @io.readline.strip
40
+ table_name = nil
41
+ headers = nil
42
+ data = []
43
+ while line != '------------------------------------------------------------'
44
+ if line == ''
45
+ # add it to the stack unless we are just starting out
46
+ info.add_table(table_name, headers, data) unless table_name.nil?
47
+
48
+ # reset things
49
+ table_name = nil
50
+ headers = nil
51
+ data = []
52
+ elsif matches = line.match(/^TABLE\: (.*)$/)
53
+ # name of a table
54
+ table_name = matches[1]
55
+ elsif line.match(/^\[.*\]/)
56
+ # headings of the table
57
+ headers = line.split("\t").collect do |header|
58
+ header.gsub(/^\[/,'').gsub(/\]$/,'')
59
+ end
60
+ else
61
+ # a proper data row
62
+ data.push line.split("\t")
63
+ end
64
+ line = @io.readline.strip
65
+ end
66
+
67
+ # return the object that has been created
68
+ return info
69
+ end
70
+ end
71
+
72
+ class EuPathDBGeneInformation
73
+ def info
74
+ @info
75
+ end
76
+
77
+ def get_info(key)
78
+ @info[key]
79
+ end
80
+ alias_method :[], :get_info
81
+
82
+ def get_table(table_name)
83
+ @tables[table_name]
84
+ end
85
+
86
+ def add_information(key, value)
87
+ @info ||= {}
88
+ @info[key] = value
89
+ "Added info #{key}, now is #{@info[key]}"
90
+ end
91
+
92
+ def add_table(name, headers, data)
93
+ @tables ||= {}
94
+ @tables[name] = []
95
+ data.each do |row|
96
+ final = {}
97
+ row.each_with_index do |cell, i|
98
+ final[headers[i]] = cell
99
+ end
100
+ @tables[name].push final
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,278 @@
1
+ ID: TGME49_000010
2
+ Gene Group: TG_9133
3
+ Data Source: T.gondii ME49 contigs from Genbank
4
+ Genomic Sequence ID: TGME49_chrXII
5
+ Chromosome: XII
6
+ Genomic Location: TGME49_chrXII: 2,230,843 - 2,234,577 (-)
7
+ Gene Strand: reverse
8
+ Gene Type: protein coding
9
+ # Exons: 2
10
+ Transcript Length: 3391
11
+ CDS Length: 1242
12
+ Product Description: hypothetical protein
13
+ Protein Length: 413
14
+ # TM Domains: 0
15
+ Molecular Weight: 45197
16
+ Isoelectric Point: 7.92
17
+ EC Numbers: null
18
+ Ortholog count: 3
19
+ Paralog count: 2
20
+ Temporary Ortholog Group: OG3_85809
21
+ Total SNPs All Strains: 5
22
+ Nonsynonymous SNPs All Strains: 1
23
+ Synonymous SNPs All Strains: 0
24
+ Nonsyn/Syn SNP Ratio All Strains: 0
25
+ SignalP Scores: null
26
+ SignalP Peptide: null
27
+ Annotated GO Function: null
28
+ Annotated GO Process: null
29
+ Annotated GO Component: null
30
+ Predicted GO Function: null
31
+ Predicted GO Process: null
32
+ Predicted GO Component: null
33
+ TypeIII / TypeI fold-change: -11.6288769802844
34
+ TypeIII / TypeII fold-change: -7.03490049704923
35
+ TypeII / TypeI fold-change: -1.65302650480445
36
+ Organism: Toxoplasma gondii ME49
37
+ Release4 IDs: 1.m00014
38
+ = Release4 Protein?: Yes
39
+ Predicted Protein Sequence: MHSRNCVGGSFYDGAVCCSPPSRAFGNMHNLVLVLLAFLSSWEDNETRRGDVVEQQNPFRLAIHGKTERDRDGNCTQHGVRSNDFRFVDEGDMSAEVGRGLALQKHVRVPLQASIARRLSDRESEKRSQGSDNGNGTTPPLPPRRTPSPRPSGASGVVQQGAAGVGTPTMLGTGGQHLSDQGATGSVAPSVQSSSSSAVSVEGALPLIQFFGSRSSSPEVPLCHACHTVEAAVGVLQSIRHRRRAGAREAAERIAALESQHALLVSSSTQGEHSQGARPSDILVSHRALQRARREATRQEETLRGEEASLEAFIRVASEKREREGQRSLEVLRQRVENQTAELAVLRVEAALERQREMVELERRRGDLLALALVAEDDEEYLRLSAEEEAVQRQIEQLAEETRRPRRGRRKPA
40
+ Predicted RNA/mRNA Sequence (introns spliced out): TCTCACGTGAACCGGCCCCAACTGAGGAACACATACGACGGTGGAACCAGGGTGCAGTCGGCACGTCGCTAGGGTTCGTTATTCTGCACCACTGTCTGATTTGGCTGTGCGCCCGAATCTGTGGAACACGTGCACTCGCGAATATTCATGAGAACACATTGTTTTGGAATCAACTAGCCAAATCCTTCCCTCTGCCACACATGCTGAAAGACGAGCTTGGGGGTGAAAGCTCTACTTCAACAAACGCACGGGCAGACGGTTCTCACATGTGCGACGTCGCTAGACGAGTTCGAAGTCGTAGACATCATACTTTTGGGTTGTGTGTGAAGTTAACAACTAACCACTAGATACGTGTTCTGAACAACCGGGAACACCGGACAGGCGCCCACTGCTTTCTGGATAACAAACGAATGGAGGAAGCTCAGCCGCACAGCTTCTTCTGGAGGACTGGGAGAGAAGTGGTCCGACAGAGGCTCAGCTTCGAAACGGTTTTGTGGGCTTTAGTTCTGAACTGAGCTTTTTTTGTTCGACTCATCACCCCAAAATCAAGACTCGGTTGATACATCAGCCGAAGTTGGGGCGGGCTGACGACACCTTAAACGTTCACAGAGACGAGAGAACCCGTTCTCCCGGTGTTGCAATTCATGCTGGGAATCCCCGATGTATGGGGATACAGCTTCAGAGCGAGCCACCACCACCAAGGTTGCCGTTGATGCGGCAGTGACGCGAAGTAGAAGAGCGAAAGAGTGTGCCAATACTTTTGTCTGTTTCACGTGTTTTAAATTGAAGCAACACCGTTCACTCATGCATTTCGCGCCACAGAAGCAGGTAGCAGAGAACCAAAGTTTATTCGCTAACAATTGAACAACACAAGGATACAATCGAGGATCATCGCCACGCAAGTTGGGATCACGGGCCGACTTCAAACAAACGAGGGGTGTACGTGCGAGGGCCCCTGTTCAGCGACGCGTAGTTCCTGCCATGTACACACTTCAGTACTCGTATGAATCCGTTCACGTTGCCGCTGTTTTGTGATTTAGCGTACAATTTTCAAACAGACGAAAGCTACAAATGCTGTACAGGGATAGACATGATGCGGTGTCTTTCCTATCTCTGTCTTGGGTCGGACCTATCCTTTCATTCGGCGTTGCTCTGCCAGCGTGTGACGTTTGAACAGCTGCCATGGAACGCATGAAGTTTGAGAATGGACGCATCCTTGTTTCACAGGGAGAGCTGAGTGAAGGAACTTGCTGCAGAAGTTCAGTCAATCGTCGCTCCTGCCTGCGAAGATTTGCGTATTTGCGCTACAAGTGTGACCAGCCTGCTAAACTGACTGGCGAGCTGTGGCGTTAACGTCGTGGCGAAACGCGTAGCTAAGGGCAGAGAGCCAACAAGCGTACTTTGAATATCAATTACGAGCTCGTTGTTCGTACTTCATCATTCTGGGTTCGCAGAAAAGGTGACGCTTACCCCGTCGTGTGGAGTTTAGTACCTTCAGGAAAGTCACCCTGAGGATGCATAGCCGGAACTGCGTCGGAGGATCGTTTTATGACGGGGCCGTGTGTTGTTCGCCTCCATCGCGTGCTTTCGGCAATATGCACAACCTAGTCCTGGTGTTGCTGGCGTTTCTGTCTTCGTGGGAAGACAACGAAACCAGGCGCGGCGATGTGGTGGAACAGCAGAATCCTTTTCGCCTAGCGATTCACGGCAAAACTGAGAGAGACCGTGACGGGAATTGCACACAACACGGCGTGAGATCAAATGATTTCCGTTTCGTGGATGAAGGTGACATGTCTGCTGAAGTAGGTCGTGGTCTGGCTTTACAGAAACATGTCCGAGTTCCGCTGCAGGCTTCGATTGCTCGGCGGCTTTCTGACAGGGAGTCGGAAAAGAGGAGCCAAGGTTCCGACAACGGAAACGGCACTACCCCACCTTTGCCACCACGGCGGACCCCGTCTCCGCGGCCTTCAGGAGCCAGTGGAGTTGTCCAACAAGGAGCTGCCGGTGTGGGCACGCCTACGATGTTGGGGACAGGTGGCCAACATCTCTCTGACCAGGGAGCTACAGGTTCAGTCGCACCATCAGTGCAATCTTCCTCATCTAGTGCTGTCAGTGTTGAGGGTGCATTGCCTCTAATCCAGTTTTTCGGAAGCCGTAGCAGTTCTCCTGAAGTCCCTCTATGTCATGCGTGCCACACTGTAGAGGCAGCTGTAGGCGTGCTTCAAAGCATAAGACACCGCAGACGGGCTGGCGCGCGTGAGGCAGCTGAGCGAATAGCGGCACTGGAGTCCCAACATGCTCTTCTTGTCAGTTCCTCCACCCAGGGAGAGCACAGTCAAGGTGCACGGCCAAGTGATATCCTTGTGAGTCACCGGGCGCTACAACGGGCGCGCCGAGAGGCCACCCGACAGGAAGAAACGCTACGGGGAGAAGAAGCCTCCCTGGAAGCGTTTATACGCGTGGCCTCTGAAAAGCGTGAAAGAGAAGGGCAGAGGTCTCTCGAGGTGCTCCGCCAGCGCGTGGAAAATCAGACTGCAGAACTTGCTGTGTTACGCGTCGAGGCAGCACTAGAGCGGCAACGTGAAATGGTGGAGCTCGAGAGGAGGCGCGGAGACTTACTGGCCCTGGCCTTAGTCGCAGAAGACGACGAAGAATATCTTCGGCTGAGTGCAGAAGAAGAAGCAGTCCAACGGCAAATCGAGCAATTGGCGGAGGAAACGAGGAGACCGCGCCGTGGCCGTAGAAAGCCCGCGTGATGCAGAAGTGAACAGTGATTCCAAGTGTCTTAAGCACAGTTATTTGAAAAAACAACAATGCATTCTGAATGCGAAACGGTGGTATCGGAGGTATAATGTAGGTAGGGGAACGGGGCACCCCCCGATCTTATTTTGTCAGTCCTGTTACGGAAAGGAAACTGCCTTGTCGGGAGCAGAGGTAAATCCCATTAGGCGCGAGGGTAGCAGAAACCCGCTTCGTGCTGCGTTATGCTTTTGATACTCGCACGCTATTTTGATTACCTCGCAAGCACCTTTGGCGACACTGTGAAAATGCTTTTCTGACAAAGCGATACACGATTGCGGATTTCCCAGTTGGTGTCGTGGTGCGCCTGTCGCCGAATCAAGAGGACATCCTAATTGCACTTCATATGTGAATCCGGCACATACGTTCGTTACCATTTTCTCAGCAACAGGCTTCTGGAGAAACCGAAACTGGTTTTCCAGTGTAGATGGCAATGTCACTCGTGGGGATAACGAAACCCTGAAGATTGCCCTGGCGGGTTCGAAAGTATACGTTCACAGGCGTCCACCGAACATATCCTATTCATGATGTGAACAGCCTATTGATGATGTCTCTACTGAACACACTAGTTGAAAGACACCTGCGAGAGCGG
41
+ Coding Sequence: ATGCATAGCCGGAACTGCGTCGGAGGATCGTTTTATGACGGGGCCGTGTGTTGTTCGCCTCCATCGCGTGCTTTCGGCAATATGCACAACCTAGTCCTGGTGTTGCTGGCGTTTCTGTCTTCGTGGGAAGACAACGAAACCAGGCGCGGCGATGTGGTGGAACAGCAGAATCCTTTTCGCCTAGCGATTCACGGCAAAACTGAGAGAGACCGTGACGGGAATTGCACACAACACGGCGTGAGATCAAATGATTTCCGTTTCGTGGATGAAGGTGACATGTCTGCTGAAGTAGGTCGTGGTCTGGCTTTACAGAAACATGTCCGAGTTCCGCTGCAGGCTTCGATTGCTCGGCGGCTTTCTGACAGGGAGTCGGAAAAGAGGAGCCAAGGTTCCGACAACGGAAACGGCACTACCCCACCTTTGCCACCACGGCGGACCCCGTCTCCGCGGCCTTCAGGAGCCAGTGGAGTTGTCCAACAAGGAGCTGCCGGTGTGGGCACGCCTACGATGTTGGGGACAGGTGGCCAACATCTCTCTGACCAGGGAGCTACAGGTTCAGTCGCACCATCAGTGCAATCTTCCTCATCTAGTGCTGTCAGTGTTGAGGGTGCATTGCCTCTAATCCAGTTTTTCGGAAGCCGTAGCAGTTCTCCTGAAGTCCCTCTATGTCATGCGTGCCACACTGTAGAGGCAGCTGTAGGCGTGCTTCAAAGCATAAGACACCGCAGACGGGCTGGCGCGCGTGAGGCAGCTGAGCGAATAGCGGCACTGGAGTCCCAACATGCTCTTCTTGTCAGTTCCTCCACCCAGGGAGAGCACAGTCAAGGTGCACGGCCAAGTGATATCCTTGTGAGTCACCGGGCGCTACAACGGGCGCGCCGAGAGGCCACCCGACAGGAAGAAACGCTACGGGGAGAAGAAGCCTCCCTGGAAGCGTTTATACGCGTGGCCTCTGAAAAGCGTGAAAGAGAAGGGCAGAGGTCTCTCGAGGTGCTCCGCCAGCGCGTGGAAAATCAGACTGCAGAACTTGCTGTGTTACGCGTCGAGGCAGCACTAGAGCGGCAACGTGAAATGGTGGAGCTCGAGAGGAGGCGCGGAGACTTACTGGCCCTGGCCTTAGTCGCAGAAGACGACGAAGAATATCTTCGGCTGAGTGCAGAAGAAGAAGCAGTCCAACGGCAAATCGAGCAATTGGCGGAGGAAACGAGGAGACCGCGCCGTGGCCGTAGAAAGCCCGCGTGA
42
+
43
+ TABLE: SNPs Summary
44
+ [strain_a_query] [strain_b_query] [Strain A] [Strain B] [gene_source_id] [CDS SNP Density] [Non-synonymous] [Synonymous] [Nonsense] [Non-coding] [Total]
45
+ RH (type I) VEG (type III) RH VEG TGME49_000010 .81 1 0 0 4 5
46
+ ME49 (type II) RH (type I) ME49 RH TGME49_000010 .81 1 0 0 4 5
47
+
48
+ TABLE: Gene Model
49
+ [Type] [Start] [End]
50
+ exon 2230843 2232576
51
+ intron 2232577 2232920
52
+ exon 2232921 2234577
53
+
54
+ TABLE: ME49 Microarray Expression Data
55
+ [ME49 Gene Model]
56
+ TGME49_000010
57
+ TGME49_045430
58
+
59
+ TABLE: Three archetypal T. gondii lineages - Percentiles
60
+ [Strain] [Percentile]
61
+ VEG 11.5
62
+ CTG 23.2
63
+ Prugniaud 71.9
64
+ RH 78.7
65
+ GT1 78.2
66
+ ME49 82.7
67
+
68
+ TABLE: InterPro Domains
69
+ [Name] [Primary ID] [Secondary ID] [Start Min] [End Min] [E-value]
70
+
71
+ TABLE: Transmembrane Domains
72
+ [Name] [Parent ID] [Start Min] [End Max] [Sequence] [Topology]
73
+
74
+ TABLE: Low Complexity Regions
75
+ [Name] [Parent ID] [Start Min] [End Max]
76
+ 263625 89354 403 410
77
+ 263623 89354 341 354
78
+ 263622 89354 287 304
79
+ 263620 89354 186 201
80
+ 263619 89354 136 166
81
+ 263621 89354 239 256
82
+ 263624 89354 367 382
83
+
84
+ TABLE: Signal Peptide
85
+ [Name] [NN Conclusion Score] [NN D-Score] [HMM Signal Probabability] [Start Min] [End Max]
86
+
87
+ TABLE: BlastP Hits
88
+ [Feature ID] [Name] [Parent ID] [Score] [Start] [End] [Description] [Expect Value] [Percent Identity] [Percent Positive]
89
+ 728519 TGME49_000010 89354 147 240 400 E() = 9.9e-6, 30% identity, gi|183598034|ref|ZP_02959527.1| hypothetical protein PROSTU_01390 [Providencia stuartii ATCC 25827]gi|188022813|gb|EDU60853.1| hypothetical protein PROSTU_01390 [Providencia stuartii ATCC 25827] 9.9e-6 0 0
90
+
91
+ TABLE: Gene Location
92
+ [Sequence Level] [Genomic Sequence] [Location] [start] [end] [is_reversed] [start_context] [end_context] [Feature ID]
93
+ Chromosome TGME49_chrXII 2230843 - 2234577 (-) 2230843 2234577 - 2225843 2239577 TGME49_000010
94
+ Scaffold DS984812 1393 - 5127 (+) 1393 5127 + -3607 10127 TGME49_000010
95
+
96
+ TABLE: Release 4 Genes
97
+ [Gene ID] [Product] [Sequence ID] [Location] [=Release5 protein?] [cyc_gene_id] [start] [end] [strand] [context_start] [context_end]
98
+ 1.m00014 hypothetical protein TGG_995340 1393 - 5127 (+) Yes 1.M00014 1393 5127 + 0 10127
99
+
100
+ TABLE: Notes
101
+ [Note]
102
+ encoded by transcript TGME49_000010A
103
+
104
+ TABLE: User Comments
105
+ [Comment ID] [stable_id] [comment_target_id] [Headline] [PubMed ID(s)] [# Related Genes] [# Uploaded Files] [Made by] [Date]
106
+
107
+ TABLE: EC Number
108
+ [EC Number] [Source] [EC Description]
109
+
110
+ TABLE: Mass Spec.-based Expression Evidence
111
+ [Experiment Name] [Sequences] [Sequence Count] [Spectrum Count]
112
+
113
+ TABLE: GO Terms
114
+ [GO ID] [Ontology] [GO Term Name] [Source] [Evidence Code] [lowercase_evidence_code] [Is Not]
115
+
116
+ TABLE: Orthologs and Paralogs within ToxoDB
117
+ [Gene] [Species] [Product]
118
+ TGME49_087740 Toxoplasma gondii ME49 hypothetical protein
119
+
120
+ TABLE: Metabolic Pathways
121
+ [pathway_id] [Pathway] [activity]
122
+
123
+ TABLE: Strains summary
124
+ [Gene] [Strain] [Product] [Protein Length] [# Exons] [# TM] [Signal Peptide] [Proteomics?] [User Comment?]
125
+ TGME49_000010 ME49 hypothetical protein 413 2 0 No No Yes
126
+
127
+ TABLE: Toxoplasma Genome Sequencing Project Annotation
128
+ [TASK Annotation]
129
+ Unsure of gene model. There is difference between the manatee and the Gbrowse gene models.<br/><b>Reviewed by</b>: DS
130
+
131
+ TABLE: Antibody Reagents
132
+ [remark] [Reagent ID] [Reagent Type]
133
+
134
+ TABLE: External Links
135
+ [Database] [source_id] [link_url]
136
+
137
+ TABLE: Epitopes from IEDB
138
+ [Epitope] [Sequence] [Location on Protein] [Strain] [Confidence]
139
+
140
+ TABLE: Product
141
+ [Product]
142
+ hypothetical protein
143
+
144
+
145
+ ------------------------------------------------------------
146
+
147
+ ID: TGME49_000110
148
+ Gene Group: TG_16806
149
+ Data Source: T.gondii ME49 contigs from Genbank
150
+ Genomic Sequence ID: TGME49_chrIX
151
+ Chromosome: IX
152
+ Genomic Location: TGME49_chrIX: 106,477 - 106,854 (+)
153
+ Gene Strand: forward
154
+ Gene Type: protein coding
155
+ # Exons: 1
156
+ Transcript Length: 378
157
+ CDS Length: 378
158
+ Product Description: hypothetical protein, conserved
159
+ Protein Length: 125
160
+ # TM Domains: 0
161
+ Molecular Weight: 13492
162
+ Isoelectric Point: 5.63
163
+ EC Numbers: null
164
+ Ortholog count: 1
165
+ Paralog count: 0
166
+ Temporary Ortholog Group: ORTH_16806.tmp
167
+ Total SNPs All Strains: 11
168
+ Nonsynonymous SNPs All Strains: 7
169
+ Synonymous SNPs All Strains: 4
170
+ Nonsyn/Syn SNP Ratio All Strains: 1.75
171
+ SignalP Scores: null
172
+ SignalP Peptide: null
173
+ Annotated GO Function: null
174
+ Annotated GO Process: null
175
+ Annotated GO Component: null
176
+ Predicted GO Function: null
177
+ Predicted GO Process: null
178
+ Predicted GO Component: null
179
+ TypeIII / TypeI fold-change: 1.01844018466976
180
+ TypeIII / TypeII fold-change: -2.02099469638244
181
+ TypeII / TypeI fold-change: 2.05826221180034
182
+ Organism: Toxoplasma gondii ME49
183
+ Release4 IDs: 2.m00067
184
+ = Release4 Protein?: Yes
185
+ Predicted Protein Sequence: MVLKENLRVKQVHQGEDPMDIGYQPGNVWSVGSEVDVSCCVFVERAVKPAEISGTVRECRHAPAGNSCNADCLSSPTVHVLIFLTTTTEGGVCQMAKAKYVGRRQEAICLQAALGEMLSEITNPL
186
+ Predicted RNA/mRNA Sequence (introns spliced out): ATGGTATTAAAAGAGAATTTGCGCGTGAAGCAGGTACACCAGGGGGAGGATCCCATGGATATTGGTTACCAGCCGGGGAATGTTTGGTCCGTCGGCTCGGAGGTGGATGTGAGCTGTTGTGTATTTGTGGAACGAGCGGTGAAGCCTGCCGAAATTAGCGGAACCGTCAGAGAATGCCGCCACGCGCCTGCGGGAAACAGTTGCAACGCCGATTGTTTGTCGTCACCGACTGTGCACGTCCTTATTTTCCTCACAACGACCACCGAGGGAGGTGTTTGCCAGATGGCGAAGGCGAAGTACGTTGGCCGCCGCCAAGAAGCGATCTGTCTGCAGGCCGCCCTCGGAGAAATGCTGAGCGAGATCACAAATCCGCTATGA
187
+ Coding Sequence: ATGGTATTAAAAGAGAATTTGCGCGTGAAGCAGGTACACCAGGGGGAGGATCCCATGGATATTGGTTACCAGCCGGGGAATGTTTGGTCCGTCGGCTCGGAGGTGGATGTGAGCTGTTGTGTATTTGTGGAACGAGCGGTGAAGCCTGCCGAAATTAGCGGAACCGTCAGAGAATGCCGCCACGCGCCTGCGGGAAACAGTTGCAACGCCGATTGTTTGTCGTCACCGACTGTGCACGTCCTTATTTTCCTCACAACGACCACCGAGGGAGGTGTTTGCCAGATGGCGAAGGCGAAGTACGTTGGCCGCCGCCAAGAAGCGATCTGTCTGCAGGCCGCCCTCGGAGAAATGCTGAGCGAGATCACAAATCCGCTATGA
188
+
189
+ TABLE: SNPs Summary
190
+ [strain_a_query] [strain_b_query] [Strain A] [Strain B] [gene_source_id] [CDS SNP Density] [Non-synonymous] [Synonymous] [Nonsense] [Non-coding] [Total]
191
+ 454 Me49 454 Me49 TGME49_000110 29.1 7 4 1 0 11
192
+
193
+ TABLE: Gene Model
194
+ [Type] [Start] [End]
195
+ exon 106477 106854
196
+
197
+ TABLE: ME49 Microarray Expression Data
198
+ [ME49 Gene Model]
199
+ TGME49_000110
200
+
201
+ TABLE: Three archetypal T. gondii lineages - Percentiles
202
+ [Strain] [Percentile]
203
+ VEG 31
204
+ CTG 41
205
+ ME49 51.2
206
+ RH 32.7
207
+ GT1 37.8
208
+ Prugniaud 63.8
209
+
210
+ TABLE: InterPro Domains
211
+ [Name] [Primary ID] [Secondary ID] [Start Min] [End Min] [E-value]
212
+
213
+ TABLE: Transmembrane Domains
214
+ [Name] [Parent ID] [Start Min] [End Max] [Sequence] [Topology]
215
+
216
+ TABLE: Low Complexity Regions
217
+ [Name] [Parent ID] [Start Min] [End Max]
218
+
219
+ TABLE: Signal Peptide
220
+ [Name] [NN Conclusion Score] [NN D-Score] [HMM Signal Probabability] [Start Min] [End Max]
221
+
222
+ TABLE: BlastP Hits
223
+ [Feature ID] [Name] [Parent ID] [Score] [Start] [End] [Description] [Expect Value] [Percent Identity] [Percent Positive]
224
+
225
+ TABLE: Gene Location
226
+ [Sequence Level] [Genomic Sequence] [Location] [start] [end] [is_reversed] [start_context] [end_context] [Feature ID]
227
+ Chromosome TGME49_chrIX 106477 - 106854 (+) 106477 106854 + 101477 111854 TGME49_000110
228
+ Scaffold DS984780 3292 - 3669 (-) 3292 3669 - -1708 8669 TGME49_000110
229
+
230
+ TABLE: Release 4 Genes
231
+ [Gene ID] [Product] [Sequence ID] [Location] [=Release5 protein?] [cyc_gene_id] [start] [end] [strand] [context_start] [context_end]
232
+ 2.m00067 conserved hypothetical protein IX 106377 - 106754 (+) Yes 2.M00067 106377 106754 + 101377 111754
233
+
234
+ TABLE: Notes
235
+ [Note]
236
+ encoded by transcript TGME49_000110A
237
+
238
+ TABLE: User Comments
239
+ [Comment ID] [stable_id] [comment_target_id] [Headline] [PubMed ID(s)] [# Related Genes] [# Uploaded Files] [Made by] [Date]
240
+
241
+ TABLE: EC Number
242
+ [EC Number] [Source] [EC Description]
243
+
244
+ TABLE: Mass Spec.-based Expression Evidence
245
+ [Experiment Name] [Sequences] [Sequence Count] [Spectrum Count]
246
+
247
+ TABLE: GO Terms
248
+ [GO ID] [Ontology] [GO Term Name] [Source] [Evidence Code] [lowercase_evidence_code] [Is Not]
249
+
250
+ TABLE: Orthologs and Paralogs within ToxoDB
251
+ [Gene] [Species] [Product] [is syntenic] [has comments]
252
+
253
+ TABLE: Metabolic Pathways
254
+ [pathway_id] [Pathway] [activity]
255
+
256
+ TABLE: Strains summary
257
+ [Gene] [Strain] [Product] [Protein Length] [# Exons] [# TM] [Signal Peptide] [Proteomics?] [User Comment?]
258
+ TGME49_000110 ME49 hypothetical protein, conserved 125 1 0 No No No
259
+
260
+ TABLE: Toxoplasma Genome Sequencing Project Annotation
261
+ [TASK Annotation]
262
+
263
+ TABLE: Antibody Reagents
264
+ [remark] [Reagent ID] [Reagent Type]
265
+
266
+ TABLE: External Links
267
+ [Database] [source_id] [link_url]
268
+
269
+ TABLE: Epitopes from IEDB
270
+ [Epitope] [Sequence] [Location on Protein] [Strain] [Confidence]
271
+
272
+ TABLE: Product
273
+ [Product]
274
+ hypothetical protein, conserved
275
+
276
+
277
+ ------------------------------------------------------------
278
+
data/test/helper.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+
8
+ class Test::Unit::TestCase
9
+ end
@@ -0,0 +1,60 @@
1
+
2
+ require 'helper'
3
+ require 'eupathdb_gene_information_table'
4
+
5
+ class EuPathDBGeneInformationTableTest < Test::Unit::TestCase
6
+ def test_gene_splitting
7
+ eu = EuPathDBGeneInformationTable.new(File.open(File.join(File.dirname(__FILE__),'data','eupathGeneInformation.txt'),'r'))
8
+ genes = %w(TGME49_000010 TGME49_000110)
9
+ total = 0
10
+ eu.each_with_index do |info, i|
11
+ total += 1
12
+ assert_equal genes[i], info.get_info('ID')
13
+ end
14
+ assert_equal 2, total
15
+ end
16
+
17
+ def test_table
18
+ eu = EuPathDBGeneInformationTable.new(File.open(File.join(File.dirname(__FILE__),'data','eupathGeneInformation.txt'),'r'))
19
+ genes = [
20
+ {
21
+ 'Type' => 'exon',
22
+ 'Start' => '2230843',
23
+ 'End' => '2232576'
24
+ },
25
+ {
26
+ 'Type' => 'intron',
27
+ 'Start' => '2232577',
28
+ 'End' => '2232920'
29
+ },
30
+ {
31
+ 'Type' => 'exon',
32
+ 'Start' => '2232921',
33
+ 'End' => '2234577'
34
+ },
35
+ ]
36
+ assert_equal genes, eu.to_a[0].get_table('Gene Model')
37
+ end
38
+
39
+ def test_last_entry
40
+ eu = EuPathDBGeneInformationTable.new(File.open(File.join(File.dirname(__FILE__),'data','eupathGeneInformation.txt'),'r'))
41
+ a = eu.to_a
42
+ first = a[0]
43
+ last = a[1]
44
+ assert_equal [{'Product' => 'hypothetical protein, conserved'}],
45
+ last.get_table('Product')
46
+ assert_equal [{'Product' => 'hypothetical protein'}],
47
+ first.get_table('Product')
48
+ end
49
+
50
+ def test_alias_brackets
51
+ eu = EuPathDBGeneInformationTable.new(File.open(File.join(File.dirname(__FILE__),'data','eupathGeneInformation.txt'),'r'))
52
+ genes = %w(TGME49_000010 TGME49_000110)
53
+ total = 0
54
+ eu.each_with_index do |info, i|
55
+ total += 1
56
+ assert_equal genes[i], info['ID']
57
+ end
58
+ assert_equal 2, total
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reubypathdb
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Ben J Woodcroft
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-01 00:00:00 +11:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: thoughtbot-shoulda
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: Classes to help parsing EuPathDB data files
36
+ email: donttrustben near gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - .document
46
+ - .gitignore
47
+ - LICENSE
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - lib/eupathdb_gene_information_table.rb
52
+ - test/data/eupathGeneInformation.txt
53
+ - test/helper.rb
54
+ - test/test_eupathdb_gene_information_table.rb
55
+ has_rdoc: true
56
+ homepage: http://github.com/wwood/reubypathdb
57
+ licenses: []
58
+
59
+ post_install_message:
60
+ rdoc_options:
61
+ - --charset=UTF-8
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ hash: 3
70
+ segments:
71
+ - 0
72
+ version: "0"
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ hash: 3
79
+ segments:
80
+ - 0
81
+ version: "0"
82
+ requirements: []
83
+
84
+ rubyforge_project:
85
+ rubygems_version: 1.3.7
86
+ signing_key:
87
+ specification_version: 3
88
+ summary: Classes to help parsing EuPathDB data files
89
+ test_files:
90
+ - test/helper.rb
91
+ - test/test_eupathdb_gene_information_table.rb