bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,73 @@
1
+ #
2
+ # bio/db/genbank/genpept.rb - GenPept database class
3
+ #
4
+ # Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: genpept.rb,v 1.10 2005/10/23 07:20:37 k Exp $
21
+ #
22
+
23
+ require 'bio/db/genbank/common'
24
+ require 'bio/db/genbank/genbank'
25
+
26
+ module Bio
27
+ class GenPept < NCBIDB
28
+
29
+ include Bio::NCBIDB::Common
30
+
31
+ # LOCUS
32
+ class Locus
33
+ def initialize(locus_line)
34
+ @entry_id = locus_line[12..27].strip
35
+ @length = locus_line[29..39].to_i
36
+ @circular = locus_line[55..62].strip # always linear
37
+ @division = locus_line[63..66].strip
38
+ @date = locus_line[68..78].strip
39
+ end
40
+ attr_accessor :entry_id, :length, :circular, :division, :date
41
+ end
42
+
43
+ def locus
44
+ @data['LOCUS'] ||= Locus.new(get('LOCUS'))
45
+ end
46
+ def entry_id; locus.entry_id; end
47
+ def length; locus.length; end
48
+ def circular; locus.circular; end
49
+ def division; locus.division; end
50
+ def date; locus.date; end
51
+
52
+
53
+ # ORIGIN
54
+ def seq
55
+ unless @data['SEQUENCE']
56
+ origin
57
+ end
58
+ Bio::Sequence::AA.new(@data['SEQUENCE'])
59
+ end
60
+ alias aaseq seq
61
+ alias aalen length
62
+
63
+ def seq_len
64
+ seq.length
65
+ end
66
+
67
+ # DBSOURCE
68
+ def dbsource
69
+ get('DBSOURCE')
70
+ end
71
+
72
+ end # GenPept
73
+ end # Bio
@@ -0,0 +1,31 @@
1
+ #
2
+ # bio/db/genbank/refseq.rb - RefSeq database class
3
+ #
4
+ # Copyright (C) 2000-2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: refseq.rb,v 1.6 2004/08/23 23:40:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/genbank/genbank'
24
+
25
+ module Bio
26
+
27
+ class RefSeq < GenBank
28
+ # Nothing to do (RefSeq database format is completely same as GenBank)
29
+ end
30
+
31
+ end # Bio
data/lib/bio/db/gff.rb ADDED
@@ -0,0 +1,106 @@
1
+ #
2
+ # = bio/db/gff.rb - GFF format class
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: gff.rb,v 1.5 2005/12/18 15:58:41 k Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ #
13
+ # == Example
14
+ #
15
+ #
16
+ # == References
17
+ #
18
+ # * http://www.sanger.ac.uk/Software/formats/GFF/
19
+ #
20
+ #--
21
+ #
22
+ # This library is free software; you can redistribute it and/or
23
+ # modify it under the terms of the GNU Lesser General Public
24
+ # License as published by the Free Software Foundation; either
25
+ # version 2 of the License, or (at your option) any later version.
26
+ #
27
+ # This library is distributed in the hope that it will be useful,
28
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
29
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30
+ # Lesser General Public License for more details.
31
+ #
32
+ # You should have received a copy of the GNU Lesser General Public
33
+ # License along with this library; if not, write to the Free Software
34
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
35
+ #
36
+ #++
37
+ #
38
+
39
+ module Bio
40
+
41
+ class GFF
42
+
43
+ attr_accessor :records
44
+
45
+ def initialize(str = '')
46
+ @records = Array.new
47
+ str.each_line do |line|
48
+ @records << Record.new(line)
49
+ end
50
+ end
51
+
52
+ class Record
53
+
54
+ attr_accessor :seqname
55
+ attr_accessor :source
56
+ attr_accessor :feature
57
+ attr_accessor :start
58
+ attr_accessor :end
59
+ attr_accessor :score
60
+ attr_accessor :strand
61
+ attr_accessor :frame
62
+ attr_accessor :attributes
63
+ attr_accessor :comments
64
+
65
+ def initialize(str)
66
+ @comments = str.chomp[/#.*/]
67
+ return if /^#/.match(str)
68
+ @seqname, @source, @feature, @start, @end, @score, @strand, @frame,
69
+ attributes, = str.chomp.split("\t")
70
+ @attributes = parse_attributes(attributes) if attributes
71
+ end
72
+
73
+ private
74
+
75
+ def parse_attributes(attributes)
76
+ hash = Hash.new
77
+ attributes.split(/[^\\];/).each do |atr|
78
+ key, value = atr.split(' ', 2)
79
+ hash[key] = value
80
+ end
81
+ return hash
82
+ end
83
+ end
84
+
85
+ class GFF2 < GFF
86
+ VERSION = 2
87
+ end
88
+
89
+ class GFF3 < GFF
90
+ VERSION = 3
91
+ end
92
+
93
+ end # class GFF
94
+
95
+ end # module Bio
96
+
97
+
98
+ if __FILE__ == $0
99
+ begin
100
+ require 'pp'
101
+ alias p pp
102
+ rescue LoadError
103
+ end
104
+
105
+ p Bio::GFF.new(ARGF.read)
106
+ end
data/lib/bio/db/go.rb ADDED
@@ -0,0 +1,497 @@
1
+ #
2
+ # = bio/db/go.rb - Classes for Gene Ontology
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License::
6
+ #
7
+ # $Id: go.rb,v 1.9 2005/10/31 18:32:36 nakao Exp $
8
+ #
9
+ # == Gene Ontology
10
+ #
11
+ # == Example
12
+ #
13
+ # == References
14
+ #--
15
+ #
16
+ # This library is free software; you can redistribute it and/or
17
+ # modify it under the terms of the GNU Lesser General Public
18
+ # License as published by the Free Software Foundation; either
19
+ # version 2 of the License, or (at your option) any later version.
20
+ #
21
+ # This library is distributed in the hope that it will be useful,
22
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
23
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24
+ # Lesser General Public License for more details.
25
+ #
26
+ # You should have received a copy of the GNU Lesser General Public
27
+ # License along with this library; if not, write to the Free Software
28
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29
+ #
30
+ #++
31
+ #
32
+
33
+ require 'bio/pathway'
34
+
35
+ module Bio
36
+
37
+ # = Bio::GO
38
+ # Classes for Gene Ontology http://www.geneontology.org
39
+ class GO
40
+
41
+ # = Bio::GO::Ontology
42
+ #
43
+ # Container class for ontologies in the DAG Edit format.
44
+ #
45
+ # == Example
46
+ #
47
+ # c_data = File.open('component.oontology').read
48
+ # go_c = Bio::GO::Ontology.new(c_data)
49
+ # p go_c.bfs_shortest_path('0003673','0005632')
50
+ class Ontology < Bio::Pathway
51
+
52
+ # Bio::GO::Ontology.parse_ogids(line)
53
+ #
54
+ # Parsing GOID line in the DAGEdit format
55
+ # GO:ID[ ; GO:ID...]
56
+ def self.parse_goids(line)
57
+ goids = []
58
+ loop {
59
+ if /^ *[$%<]\S.+?;/ =~ line
60
+ endpoint = line.index(';') + 1
61
+ line = line[endpoint..line.size]
62
+ elsif /^,* GO:(\d{7}),*/ =~ line
63
+ goids << $1.clone
64
+ endpoint = line.index(goids.last) + goids.last.size
65
+ line = line[endpoint..line.size]
66
+ else
67
+ break
68
+ end
69
+ }
70
+ return goids
71
+ end
72
+
73
+ # Returns a Hash instance of the header lines in ontology flatfile.
74
+ attr_reader :header_lines
75
+
76
+ #
77
+ attr_reader :id2term
78
+
79
+ #
80
+ attr_reader :id2id
81
+
82
+
83
+ # Bio::GO::Ontology.new(str)
84
+ # The DAG Edit format ontology data parser.
85
+ def initialize(str)
86
+ @id2term = {}
87
+ @header_lines = {}
88
+ @id2id = {}
89
+ adj_list = dag_edit_format_parser(str)
90
+ super(adj_list)
91
+ end
92
+
93
+
94
+ # Returns a GO_Term correspondig with the given GO_ID.
95
+ def goid2term(goid)
96
+ term = id2term[goid]
97
+ term = id2term[id2id[goid]] if term == nil
98
+ return term
99
+ end
100
+
101
+ private
102
+
103
+ # constructing adjaency list for the given ontology
104
+ def dag_edit_format_parser(str)
105
+ stack = []
106
+ adj_list = []
107
+
108
+ str.each {|line|
109
+ if /^!(.+?):\s+(\S.+)$/ =~ line # Parsing head lines
110
+ tag = $1
111
+ value = $2
112
+ tag.gsub!(/-/,'_')
113
+ next if tag == 'type'
114
+ instance_eval("@header_lines['#{tag}'] = '#{value}'")
115
+ next
116
+ end
117
+
118
+ case line
119
+ when /^( *)([$<%])(.+?) ; GO:(\d{7})(\n*)/ # GO Term ; GO:ID
120
+ depth = $1.length.to_i
121
+ rel = $2
122
+ term = $3
123
+ goid1 = goid = $4
124
+ en = $5
125
+ goids = parse_goids(line) # GO:ID[ ; GO:ID...]
126
+ synonyms = parse_synonyms(line) # synonym:Term[ ; synonym:Term...]
127
+ stack[depth] = goids.first
128
+ @id2term[goid] = term
129
+
130
+ next if depth == 0
131
+
132
+ goids.each {|goid|
133
+ @id2term[goid] = term
134
+ @id2id[goid] = goids.first
135
+ adj_list << Bio::Relation.new(stack[depth - 1], goid, rel)
136
+ }
137
+
138
+ if en == ""
139
+ loop {
140
+ case line
141
+ when /^\n$/
142
+ break
143
+ when /^ *([<%]) (.+?) ; GO:(\d{7})/ # <%GO Term ; GO:ID
144
+ rel1 = $1
145
+ term1 = $2
146
+ goid1 = $3
147
+ goids1 = parse_goids(line)
148
+ synonyms1 = parse_synonyms(line)
149
+
150
+ @id2term[goid1] = term1
151
+ goids.each {|goid|
152
+ adj_list << Bio::Relation.new(goid1, goid, rel1)
153
+ }
154
+ else
155
+ break
156
+ end
157
+ }
158
+ end
159
+ end
160
+ }
161
+ return adj_list
162
+ end
163
+
164
+
165
+ # Returns an ary of GO IDs by parsing an entry line in the DAG Edit
166
+ # format.
167
+ def parse_goids(line)
168
+ Ontology.parse_goids(line)
169
+ end
170
+
171
+ # Bio::GO::Ontology#parse_synonyms(line)
172
+ def parse_synonyms(line)
173
+ synonyms = []
174
+ loop {
175
+ if / ; synonym:(\S.+?) *[;<%\n]/ =~ line
176
+ synonyms << $1.clone
177
+ endpoint = line.index(synonyms.last) + synonyms.last.size
178
+ line = line[endpoint..line.size]
179
+ else
180
+ break
181
+ end
182
+ }
183
+ return synonyms
184
+ end
185
+
186
+ end # class Ontology
187
+
188
+
189
+
190
+ # = Bio::GO::GeneAssociation
191
+ # $CVSROOT/go/gene-associations/gene_association.*
192
+ #
193
+ # Data parser for the gene_association go annotation.
194
+ # See also the file format http://www.geneontology.org/doc/GO.annotation.html#file
195
+ #
196
+ # == Example
197
+ #
198
+ # mgi_data = File.open('gene_association.mgi').read
199
+ # mgi = Bio::GO::GeneAssociation.parser(mgi_data)
200
+ #
201
+ # Bio::GO::GeneAssociation.parser(mgi_data) do |entry|
202
+ # p [entry.entry_id, entry.evidence, entry.goid]
203
+ # end
204
+ #
205
+ class GeneAssociation # < Bio::DB
206
+
207
+ # Delimiter
208
+ DELIMITER = "\n"
209
+
210
+ # Delimiter
211
+ RS = DELIMITER
212
+
213
+ # Retruns an Array of parsed gene_association flatfile.
214
+ # Block is acceptable.
215
+ def self.parser(str)
216
+ if block_given?
217
+ str.each(DELIMITER) {|line|
218
+ next if /^!/ =~ line
219
+ yield GeneAssociation.new(line)
220
+ }
221
+ else
222
+ galist = []
223
+ str.each(DELIMITER) {|line|
224
+ next if /^!/ =~ line
225
+ galist << GeneAssociation.new(line)
226
+ }
227
+ return galist
228
+ end
229
+ end
230
+
231
+ # Returns DB variable.
232
+ attr_reader :db # -> aStr
233
+
234
+ # Returns Db_Object_Id variable. Alias to entry_id.
235
+ attr_reader :db_object_id # -> aStr
236
+
237
+ # Returns Db_Object_Symbol variable.
238
+ attr_reader :db_object_symbol
239
+
240
+ # Returns Db_Object_Name variable.
241
+ attr_reader :qualifier
242
+
243
+ # Returns Db_Reference variable.
244
+ attr_reader :db_reference # -> []
245
+
246
+ # Retruns Evidence code variable.
247
+ attr_reader :evidence
248
+
249
+ # Returns the entry is associated with this value.
250
+ attr_reader :with # -> []
251
+
252
+ # Returns Aspect valiable.
253
+ attr_reader :aspect
254
+
255
+ #
256
+ attr_reader :db_object_name
257
+
258
+ #
259
+ attr_reader :db_object_synonym # -> []
260
+
261
+ # Returns Db_Object_Type variable.
262
+ attr_reader :db_object_type
263
+
264
+ # Returns Taxon variable.
265
+ attr_reader :taxon
266
+
267
+ # Returns Date variable.
268
+ attr_reader :date
269
+
270
+ #
271
+ attr_reader :assigned_by
272
+
273
+ alias entry_id db_object_id
274
+
275
+
276
+ # Parsing an entry (in a line) in the gene_association flatfile.
277
+ def initialize(entry)
278
+ tmp = entry.chomp.split(/\t/)
279
+ @db = tmp[0]
280
+ @db_object_id = tmp[1]
281
+ @db_object_symbol = tmp[2]
282
+ @qualifier = tmp[3] #
283
+ @goid = tmp[4]
284
+ @db_reference = tmp[5].split(/\|/) #
285
+ @evidence = tmp[6]
286
+ @with = tmp[7].split(/\|/) #
287
+ @aspect = tmp[8]
288
+ @db_object_name = tmp[9] #
289
+ @db_object_synonym = tmp[10].split(/\|/) #
290
+ @db_object_type = tmp[11]
291
+ @taxon = tmp[12] # taxon:4932
292
+ @date = tmp[13] # 20010118
293
+ @assigned_by = tmp[14]
294
+ end
295
+
296
+
297
+ # Returns GO_ID in /\d{7}/ format. Giving not nil arg, returns
298
+ # /GO:\d{7}/ style.
299
+ #
300
+ # * Bio::GO::GeneAssociation#goid -> "001234"
301
+ # * Bio::GO::GeneAssociation#goid(true) -> "GO:001234"
302
+ def goid(org = nil)
303
+ if org
304
+ @goid
305
+ else
306
+ @goid.sub('GO:','')
307
+ end
308
+ end
309
+
310
+ # Bio::GO::GeneAssociation#to_str -> a line of gene_association file.
311
+ def to_str
312
+ return [@db, @db_object_id, @db_object_symbol, @quialifier, @goid,
313
+ @qualifier.join("|"), @evidence, @with.join("|"), @aspect,
314
+ @db_object_name, @db_object_synonym.join("|"), @db_object_type,
315
+ @taxon, @date, @assigned_by].join("\t")
316
+ end
317
+
318
+ end # class GeneAssociation
319
+
320
+
321
+
322
+ # = Container class for files in geneontology.org/go/external2go/*2go.
323
+ #
324
+ # The line syntax is:
325
+ #
326
+ # database:<identifier> > GO:<term> ; GO:<GO_id>
327
+ #
328
+ # == Example
329
+ #
330
+ # spkw2go = Bio::GO::External2go.new(File.read("spkw2go"))
331
+ # spkw2go.size
332
+ # spkw2go.each do |relation|
333
+ # relation # -> {:db => "", :db_id => "", :go_term => "", :go_id => ""}
334
+ # end
335
+ # spkw2go.dbs
336
+ #
337
+ # == SAMPLE
338
+ # !date: 2005/02/08 18:02:54
339
+ # !Mapping of SWISS-PROT KEYWORDS to GO terms.
340
+ # !Evelyn Camon, SWISS-PROT.
341
+ # !
342
+ # SP_KW:ATP synthesis > GO:ATP biosynthesis ; GO:0006754
343
+ # ...
344
+ #
345
+ class External2go < Array
346
+
347
+ # Returns aHash of the external2go header information
348
+ attr_reader :header
349
+
350
+ # Constructor from parsing external2go file.
351
+ def self.parser(str)
352
+ e2g = self.new
353
+ str.each_line do |line|
354
+ line.chomp!
355
+ if line =~ /^\!date: (.+)/
356
+ e2g.header[:date] = $1
357
+ elsif line =~ /^\!(.*)/
358
+ e2g.header[:desc] << $1
359
+ elsif ary = line.scan(/^(.+?):(.+) > GO:(.+) ; (GO:\d{7})/).first
360
+ e2g << {:db_id => ary[1], :db => ary[0], :go_term => ary[2], :go_id => ary[3]}
361
+ else
362
+ raise("Invalid Format Line: \n #{line.inspect}\n")
363
+ end
364
+ end
365
+ return e2g
366
+ end
367
+
368
+
369
+ # Constructor.
370
+ # relation := {:db => aStr, :db_id => aStr, :go_term => aStr, :go_id => aStr}
371
+ def initialize
372
+ @header = {:date => '', :desc => []}
373
+ super
374
+ end
375
+
376
+
377
+ # Bio::GO::External2go#set_date(value)
378
+ def set_date(value)
379
+ @header[:date] = value
380
+ end
381
+
382
+
383
+ # Bio::GO::External2go#set_desc(ary)
384
+ def set_desc(ary)
385
+ @header[:desc] = ary
386
+ end
387
+
388
+
389
+ # Bio::GO::External2go#to_str
390
+ # Returns the contents in the external2go format.
391
+ def to_str
392
+ ["!date: #{@header[:date]}",
393
+ @header[:desc].map {|e| "!#{e}" },
394
+ self.map { |e| [e[:db], ':', e[:db_id], ' > GO:', e[:go_term], ' ; ', e[:go_id]].join }
395
+ ].join("\n")
396
+ end
397
+
398
+
399
+ # Returns ary of databases.
400
+ def dbs
401
+ self.map {|rel| rel[:db] }.uniq
402
+ end
403
+
404
+
405
+ # Returns ary of database IDs.
406
+ def db_ids
407
+ self.map {|rel| rel[:db_id] }.uniq
408
+ end
409
+
410
+ # Returns ary of GO Terms.
411
+ def go_terms
412
+ self.map {|rel| rel[:go_term] }.uniq
413
+ end
414
+
415
+ # Returns ary of GO IDs.
416
+ def go_ids
417
+ self.map {|rel| rel[:go_id] }.uniq
418
+ end
419
+
420
+ end # class External2go
421
+
422
+ end # class GO
423
+
424
+ end # module Bio
425
+
426
+
427
+
428
+
429
+
430
+ if __FILE__ == $0
431
+
432
+ require 'net/http'
433
+
434
+ def wget(url)
435
+ if /http:\/\/(.+?)\// =~ url
436
+ host = $1
437
+ path = url[(url.index(host) + host.size)..url.size]
438
+ else
439
+ raise ArgumentError, "Invalid URL\n#{url}"
440
+ end
441
+
442
+ result = Net::HTTP.new(host).get(path).body
443
+ end
444
+
445
+
446
+
447
+ go_c_url = 'http://www.geneontology.org/ontology/component.ontology'
448
+ ga_url = 'http://www.geneontology.org/gene-associations/gene_association.sgd.gz'
449
+ e2g_url = 'http://www.geneontology.org/external2go/spkw2go'
450
+
451
+
452
+
453
+ puts "\n #==> Bio::GO::Ontology"
454
+ p go_c_url
455
+ component_ontology = wget(go_c_url)
456
+ comp = Bio::GO::Ontology.new(component_ontology)
457
+
458
+ [['0003673', '0005632'],
459
+ ['0003673', '0005619'],
460
+ ['0003673', '0004649']].each {|pair|
461
+ puts
462
+ p pair
463
+ p [:pair, pair.map {|i| [comp.id2term[i], comp.goid2term(i)] }]
464
+ puts "\n #==> comp.bfs_shortest_path(pair[0], pair[1])"
465
+ p comp.bfs_shortest_path(pair[0], pair[1])
466
+ }
467
+
468
+
469
+ puts "\n #==> Bio::GO::External2go"
470
+ p e2g_url
471
+ spkw2go = Bio::GO::External2go.new(wget(e2g_url))
472
+
473
+ puts "\n #==> spkw2go.db"
474
+ p spkw2go.db
475
+
476
+ puts "\n #==> spkw2go[1]"
477
+ p spkw2go[1]
478
+
479
+
480
+
481
+ require 'zlib'
482
+ puts "\n #==> Bio::GO::GeenAssociation"
483
+ p ga_url
484
+ ga = Zlib::Inflate.inflate(wget(ga_url))
485
+ ga = Bio::GO::GeneAssociation.parser(ga)
486
+
487
+ puts "\n #==> ga.size"
488
+ p ga.size
489
+
490
+ puts "\n #==> ga[100]"
491
+ p ga[100]
492
+
493
+
494
+
495
+
496
+
497
+ end