bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,73 @@
1
+ #
2
+ # bio/db/genbank/genpept.rb - GenPept database class
3
+ #
4
+ # Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: genpept.rb,v 1.10 2005/10/23 07:20:37 k Exp $
21
+ #
22
+
23
+ require 'bio/db/genbank/common'
24
+ require 'bio/db/genbank/genbank'
25
+
26
+ module Bio
27
+ class GenPept < NCBIDB
28
+
29
+ include Bio::NCBIDB::Common
30
+
31
+ # LOCUS
32
+ class Locus
33
+ def initialize(locus_line)
34
+ @entry_id = locus_line[12..27].strip
35
+ @length = locus_line[29..39].to_i
36
+ @circular = locus_line[55..62].strip # always linear
37
+ @division = locus_line[63..66].strip
38
+ @date = locus_line[68..78].strip
39
+ end
40
+ attr_accessor :entry_id, :length, :circular, :division, :date
41
+ end
42
+
43
+ def locus
44
+ @data['LOCUS'] ||= Locus.new(get('LOCUS'))
45
+ end
46
+ def entry_id; locus.entry_id; end
47
+ def length; locus.length; end
48
+ def circular; locus.circular; end
49
+ def division; locus.division; end
50
+ def date; locus.date; end
51
+
52
+
53
+ # ORIGIN
54
+ def seq
55
+ unless @data['SEQUENCE']
56
+ origin
57
+ end
58
+ Bio::Sequence::AA.new(@data['SEQUENCE'])
59
+ end
60
+ alias aaseq seq
61
+ alias aalen length
62
+
63
+ def seq_len
64
+ seq.length
65
+ end
66
+
67
+ # DBSOURCE
68
+ def dbsource
69
+ get('DBSOURCE')
70
+ end
71
+
72
+ end # GenPept
73
+ end # Bio
@@ -0,0 +1,31 @@
1
+ #
2
+ # bio/db/genbank/refseq.rb - RefSeq database class
3
+ #
4
+ # Copyright (C) 2000-2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: refseq.rb,v 1.6 2004/08/23 23:40:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/genbank/genbank'
24
+
25
+ module Bio
26
+
27
+ class RefSeq < GenBank
28
+ # Nothing to do (RefSeq database format is completely same as GenBank)
29
+ end
30
+
31
+ end # Bio
data/lib/bio/db/gff.rb ADDED
@@ -0,0 +1,106 @@
1
+ #
2
+ # = bio/db/gff.rb - GFF format class
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: gff.rb,v 1.5 2005/12/18 15:58:41 k Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ #
13
+ # == Example
14
+ #
15
+ #
16
+ # == References
17
+ #
18
+ # * http://www.sanger.ac.uk/Software/formats/GFF/
19
+ #
20
+ #--
21
+ #
22
+ # This library is free software; you can redistribute it and/or
23
+ # modify it under the terms of the GNU Lesser General Public
24
+ # License as published by the Free Software Foundation; either
25
+ # version 2 of the License, or (at your option) any later version.
26
+ #
27
+ # This library is distributed in the hope that it will be useful,
28
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
29
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30
+ # Lesser General Public License for more details.
31
+ #
32
+ # You should have received a copy of the GNU Lesser General Public
33
+ # License along with this library; if not, write to the Free Software
34
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
35
+ #
36
+ #++
37
+ #
38
+
39
+ module Bio
40
+
41
+ class GFF
42
+
43
+ attr_accessor :records
44
+
45
+ def initialize(str = '')
46
+ @records = Array.new
47
+ str.each_line do |line|
48
+ @records << Record.new(line)
49
+ end
50
+ end
51
+
52
+ class Record
53
+
54
+ attr_accessor :seqname
55
+ attr_accessor :source
56
+ attr_accessor :feature
57
+ attr_accessor :start
58
+ attr_accessor :end
59
+ attr_accessor :score
60
+ attr_accessor :strand
61
+ attr_accessor :frame
62
+ attr_accessor :attributes
63
+ attr_accessor :comments
64
+
65
+ def initialize(str)
66
+ @comments = str.chomp[/#.*/]
67
+ return if /^#/.match(str)
68
+ @seqname, @source, @feature, @start, @end, @score, @strand, @frame,
69
+ attributes, = str.chomp.split("\t")
70
+ @attributes = parse_attributes(attributes) if attributes
71
+ end
72
+
73
+ private
74
+
75
+ def parse_attributes(attributes)
76
+ hash = Hash.new
77
+ attributes.split(/[^\\];/).each do |atr|
78
+ key, value = atr.split(' ', 2)
79
+ hash[key] = value
80
+ end
81
+ return hash
82
+ end
83
+ end
84
+
85
+ class GFF2 < GFF
86
+ VERSION = 2
87
+ end
88
+
89
+ class GFF3 < GFF
90
+ VERSION = 3
91
+ end
92
+
93
+ end # class GFF
94
+
95
+ end # module Bio
96
+
97
+
98
+ if __FILE__ == $0
99
+ begin
100
+ require 'pp'
101
+ alias p pp
102
+ rescue LoadError
103
+ end
104
+
105
+ p Bio::GFF.new(ARGF.read)
106
+ end
data/lib/bio/db/go.rb ADDED
@@ -0,0 +1,497 @@
1
+ #
2
+ # = bio/db/go.rb - Classes for Gene Ontology
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License::
6
+ #
7
+ # $Id: go.rb,v 1.9 2005/10/31 18:32:36 nakao Exp $
8
+ #
9
+ # == Gene Ontology
10
+ #
11
+ # == Example
12
+ #
13
+ # == References
14
+ #--
15
+ #
16
+ # This library is free software; you can redistribute it and/or
17
+ # modify it under the terms of the GNU Lesser General Public
18
+ # License as published by the Free Software Foundation; either
19
+ # version 2 of the License, or (at your option) any later version.
20
+ #
21
+ # This library is distributed in the hope that it will be useful,
22
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
23
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24
+ # Lesser General Public License for more details.
25
+ #
26
+ # You should have received a copy of the GNU Lesser General Public
27
+ # License along with this library; if not, write to the Free Software
28
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29
+ #
30
+ #++
31
+ #
32
+
33
+ require 'bio/pathway'
34
+
35
+ module Bio
36
+
37
+ # = Bio::GO
38
+ # Classes for Gene Ontology http://www.geneontology.org
39
+ class GO
40
+
41
+ # = Bio::GO::Ontology
42
+ #
43
+ # Container class for ontologies in the DAG Edit format.
44
+ #
45
+ # == Example
46
+ #
47
+ # c_data = File.open('component.oontology').read
48
+ # go_c = Bio::GO::Ontology.new(c_data)
49
+ # p go_c.bfs_shortest_path('0003673','0005632')
50
+ class Ontology < Bio::Pathway
51
+
52
+ # Bio::GO::Ontology.parse_ogids(line)
53
+ #
54
+ # Parsing GOID line in the DAGEdit format
55
+ # GO:ID[ ; GO:ID...]
56
+ def self.parse_goids(line)
57
+ goids = []
58
+ loop {
59
+ if /^ *[$%<]\S.+?;/ =~ line
60
+ endpoint = line.index(';') + 1
61
+ line = line[endpoint..line.size]
62
+ elsif /^,* GO:(\d{7}),*/ =~ line
63
+ goids << $1.clone
64
+ endpoint = line.index(goids.last) + goids.last.size
65
+ line = line[endpoint..line.size]
66
+ else
67
+ break
68
+ end
69
+ }
70
+ return goids
71
+ end
72
+
73
+ # Returns a Hash instance of the header lines in ontology flatfile.
74
+ attr_reader :header_lines
75
+
76
+ #
77
+ attr_reader :id2term
78
+
79
+ #
80
+ attr_reader :id2id
81
+
82
+
83
+ # Bio::GO::Ontology.new(str)
84
+ # The DAG Edit format ontology data parser.
85
+ def initialize(str)
86
+ @id2term = {}
87
+ @header_lines = {}
88
+ @id2id = {}
89
+ adj_list = dag_edit_format_parser(str)
90
+ super(adj_list)
91
+ end
92
+
93
+
94
+ # Returns a GO_Term correspondig with the given GO_ID.
95
+ def goid2term(goid)
96
+ term = id2term[goid]
97
+ term = id2term[id2id[goid]] if term == nil
98
+ return term
99
+ end
100
+
101
+ private
102
+
103
+ # constructing adjaency list for the given ontology
104
+ def dag_edit_format_parser(str)
105
+ stack = []
106
+ adj_list = []
107
+
108
+ str.each {|line|
109
+ if /^!(.+?):\s+(\S.+)$/ =~ line # Parsing head lines
110
+ tag = $1
111
+ value = $2
112
+ tag.gsub!(/-/,'_')
113
+ next if tag == 'type'
114
+ instance_eval("@header_lines['#{tag}'] = '#{value}'")
115
+ next
116
+ end
117
+
118
+ case line
119
+ when /^( *)([$<%])(.+?) ; GO:(\d{7})(\n*)/ # GO Term ; GO:ID
120
+ depth = $1.length.to_i
121
+ rel = $2
122
+ term = $3
123
+ goid1 = goid = $4
124
+ en = $5
125
+ goids = parse_goids(line) # GO:ID[ ; GO:ID...]
126
+ synonyms = parse_synonyms(line) # synonym:Term[ ; synonym:Term...]
127
+ stack[depth] = goids.first
128
+ @id2term[goid] = term
129
+
130
+ next if depth == 0
131
+
132
+ goids.each {|goid|
133
+ @id2term[goid] = term
134
+ @id2id[goid] = goids.first
135
+ adj_list << Bio::Relation.new(stack[depth - 1], goid, rel)
136
+ }
137
+
138
+ if en == ""
139
+ loop {
140
+ case line
141
+ when /^\n$/
142
+ break
143
+ when /^ *([<%]) (.+?) ; GO:(\d{7})/ # <%GO Term ; GO:ID
144
+ rel1 = $1
145
+ term1 = $2
146
+ goid1 = $3
147
+ goids1 = parse_goids(line)
148
+ synonyms1 = parse_synonyms(line)
149
+
150
+ @id2term[goid1] = term1
151
+ goids.each {|goid|
152
+ adj_list << Bio::Relation.new(goid1, goid, rel1)
153
+ }
154
+ else
155
+ break
156
+ end
157
+ }
158
+ end
159
+ end
160
+ }
161
+ return adj_list
162
+ end
163
+
164
+
165
+ # Returns an ary of GO IDs by parsing an entry line in the DAG Edit
166
+ # format.
167
+ def parse_goids(line)
168
+ Ontology.parse_goids(line)
169
+ end
170
+
171
+ # Bio::GO::Ontology#parse_synonyms(line)
172
+ def parse_synonyms(line)
173
+ synonyms = []
174
+ loop {
175
+ if / ; synonym:(\S.+?) *[;<%\n]/ =~ line
176
+ synonyms << $1.clone
177
+ endpoint = line.index(synonyms.last) + synonyms.last.size
178
+ line = line[endpoint..line.size]
179
+ else
180
+ break
181
+ end
182
+ }
183
+ return synonyms
184
+ end
185
+
186
+ end # class Ontology
187
+
188
+
189
+
190
+ # = Bio::GO::GeneAssociation
191
+ # $CVSROOT/go/gene-associations/gene_association.*
192
+ #
193
+ # Data parser for the gene_association go annotation.
194
+ # See also the file format http://www.geneontology.org/doc/GO.annotation.html#file
195
+ #
196
+ # == Example
197
+ #
198
+ # mgi_data = File.open('gene_association.mgi').read
199
+ # mgi = Bio::GO::GeneAssociation.parser(mgi_data)
200
+ #
201
+ # Bio::GO::GeneAssociation.parser(mgi_data) do |entry|
202
+ # p [entry.entry_id, entry.evidence, entry.goid]
203
+ # end
204
+ #
205
+ class GeneAssociation # < Bio::DB
206
+
207
+ # Delimiter
208
+ DELIMITER = "\n"
209
+
210
+ # Delimiter
211
+ RS = DELIMITER
212
+
213
+ # Retruns an Array of parsed gene_association flatfile.
214
+ # Block is acceptable.
215
+ def self.parser(str)
216
+ if block_given?
217
+ str.each(DELIMITER) {|line|
218
+ next if /^!/ =~ line
219
+ yield GeneAssociation.new(line)
220
+ }
221
+ else
222
+ galist = []
223
+ str.each(DELIMITER) {|line|
224
+ next if /^!/ =~ line
225
+ galist << GeneAssociation.new(line)
226
+ }
227
+ return galist
228
+ end
229
+ end
230
+
231
+ # Returns DB variable.
232
+ attr_reader :db # -> aStr
233
+
234
+ # Returns Db_Object_Id variable. Alias to entry_id.
235
+ attr_reader :db_object_id # -> aStr
236
+
237
+ # Returns Db_Object_Symbol variable.
238
+ attr_reader :db_object_symbol
239
+
240
+ # Returns Db_Object_Name variable.
241
+ attr_reader :qualifier
242
+
243
+ # Returns Db_Reference variable.
244
+ attr_reader :db_reference # -> []
245
+
246
+ # Retruns Evidence code variable.
247
+ attr_reader :evidence
248
+
249
+ # Returns the entry is associated with this value.
250
+ attr_reader :with # -> []
251
+
252
+ # Returns Aspect valiable.
253
+ attr_reader :aspect
254
+
255
+ #
256
+ attr_reader :db_object_name
257
+
258
+ #
259
+ attr_reader :db_object_synonym # -> []
260
+
261
+ # Returns Db_Object_Type variable.
262
+ attr_reader :db_object_type
263
+
264
+ # Returns Taxon variable.
265
+ attr_reader :taxon
266
+
267
+ # Returns Date variable.
268
+ attr_reader :date
269
+
270
+ #
271
+ attr_reader :assigned_by
272
+
273
+ alias entry_id db_object_id
274
+
275
+
276
+ # Parsing an entry (in a line) in the gene_association flatfile.
277
+ def initialize(entry)
278
+ tmp = entry.chomp.split(/\t/)
279
+ @db = tmp[0]
280
+ @db_object_id = tmp[1]
281
+ @db_object_symbol = tmp[2]
282
+ @qualifier = tmp[3] #
283
+ @goid = tmp[4]
284
+ @db_reference = tmp[5].split(/\|/) #
285
+ @evidence = tmp[6]
286
+ @with = tmp[7].split(/\|/) #
287
+ @aspect = tmp[8]
288
+ @db_object_name = tmp[9] #
289
+ @db_object_synonym = tmp[10].split(/\|/) #
290
+ @db_object_type = tmp[11]
291
+ @taxon = tmp[12] # taxon:4932
292
+ @date = tmp[13] # 20010118
293
+ @assigned_by = tmp[14]
294
+ end
295
+
296
+
297
+ # Returns GO_ID in /\d{7}/ format. Giving not nil arg, returns
298
+ # /GO:\d{7}/ style.
299
+ #
300
+ # * Bio::GO::GeneAssociation#goid -> "001234"
301
+ # * Bio::GO::GeneAssociation#goid(true) -> "GO:001234"
302
+ def goid(org = nil)
303
+ if org
304
+ @goid
305
+ else
306
+ @goid.sub('GO:','')
307
+ end
308
+ end
309
+
310
+ # Bio::GO::GeneAssociation#to_str -> a line of gene_association file.
311
+ def to_str
312
+ return [@db, @db_object_id, @db_object_symbol, @quialifier, @goid,
313
+ @qualifier.join("|"), @evidence, @with.join("|"), @aspect,
314
+ @db_object_name, @db_object_synonym.join("|"), @db_object_type,
315
+ @taxon, @date, @assigned_by].join("\t")
316
+ end
317
+
318
+ end # class GeneAssociation
319
+
320
+
321
+
322
+ # = Container class for files in geneontology.org/go/external2go/*2go.
323
+ #
324
+ # The line syntax is:
325
+ #
326
+ # database:<identifier> > GO:<term> ; GO:<GO_id>
327
+ #
328
+ # == Example
329
+ #
330
+ # spkw2go = Bio::GO::External2go.new(File.read("spkw2go"))
331
+ # spkw2go.size
332
+ # spkw2go.each do |relation|
333
+ # relation # -> {:db => "", :db_id => "", :go_term => "", :go_id => ""}
334
+ # end
335
+ # spkw2go.dbs
336
+ #
337
+ # == SAMPLE
338
+ # !date: 2005/02/08 18:02:54
339
+ # !Mapping of SWISS-PROT KEYWORDS to GO terms.
340
+ # !Evelyn Camon, SWISS-PROT.
341
+ # !
342
+ # SP_KW:ATP synthesis > GO:ATP biosynthesis ; GO:0006754
343
+ # ...
344
+ #
345
+ class External2go < Array
346
+
347
+ # Returns aHash of the external2go header information
348
+ attr_reader :header
349
+
350
+ # Constructor from parsing external2go file.
351
+ def self.parser(str)
352
+ e2g = self.new
353
+ str.each_line do |line|
354
+ line.chomp!
355
+ if line =~ /^\!date: (.+)/
356
+ e2g.header[:date] = $1
357
+ elsif line =~ /^\!(.*)/
358
+ e2g.header[:desc] << $1
359
+ elsif ary = line.scan(/^(.+?):(.+) > GO:(.+) ; (GO:\d{7})/).first
360
+ e2g << {:db_id => ary[1], :db => ary[0], :go_term => ary[2], :go_id => ary[3]}
361
+ else
362
+ raise("Invalid Format Line: \n #{line.inspect}\n")
363
+ end
364
+ end
365
+ return e2g
366
+ end
367
+
368
+
369
+ # Constructor.
370
+ # relation := {:db => aStr, :db_id => aStr, :go_term => aStr, :go_id => aStr}
371
+ def initialize
372
+ @header = {:date => '', :desc => []}
373
+ super
374
+ end
375
+
376
+
377
+ # Bio::GO::External2go#set_date(value)
378
+ def set_date(value)
379
+ @header[:date] = value
380
+ end
381
+
382
+
383
+ # Bio::GO::External2go#set_desc(ary)
384
+ def set_desc(ary)
385
+ @header[:desc] = ary
386
+ end
387
+
388
+
389
+ # Bio::GO::External2go#to_str
390
+ # Returns the contents in the external2go format.
391
+ def to_str
392
+ ["!date: #{@header[:date]}",
393
+ @header[:desc].map {|e| "!#{e}" },
394
+ self.map { |e| [e[:db], ':', e[:db_id], ' > GO:', e[:go_term], ' ; ', e[:go_id]].join }
395
+ ].join("\n")
396
+ end
397
+
398
+
399
+ # Returns ary of databases.
400
+ def dbs
401
+ self.map {|rel| rel[:db] }.uniq
402
+ end
403
+
404
+
405
+ # Returns ary of database IDs.
406
+ def db_ids
407
+ self.map {|rel| rel[:db_id] }.uniq
408
+ end
409
+
410
+ # Returns ary of GO Terms.
411
+ def go_terms
412
+ self.map {|rel| rel[:go_term] }.uniq
413
+ end
414
+
415
+ # Returns ary of GO IDs.
416
+ def go_ids
417
+ self.map {|rel| rel[:go_id] }.uniq
418
+ end
419
+
420
+ end # class External2go
421
+
422
+ end # class GO
423
+
424
+ end # module Bio
425
+
426
+
427
+
428
+
429
+
430
+ if __FILE__ == $0
431
+
432
+ require 'net/http'
433
+
434
+ def wget(url)
435
+ if /http:\/\/(.+?)\// =~ url
436
+ host = $1
437
+ path = url[(url.index(host) + host.size)..url.size]
438
+ else
439
+ raise ArgumentError, "Invalid URL\n#{url}"
440
+ end
441
+
442
+ result = Net::HTTP.new(host).get(path).body
443
+ end
444
+
445
+
446
+
447
+ go_c_url = 'http://www.geneontology.org/ontology/component.ontology'
448
+ ga_url = 'http://www.geneontology.org/gene-associations/gene_association.sgd.gz'
449
+ e2g_url = 'http://www.geneontology.org/external2go/spkw2go'
450
+
451
+
452
+
453
+ puts "\n #==> Bio::GO::Ontology"
454
+ p go_c_url
455
+ component_ontology = wget(go_c_url)
456
+ comp = Bio::GO::Ontology.new(component_ontology)
457
+
458
+ [['0003673', '0005632'],
459
+ ['0003673', '0005619'],
460
+ ['0003673', '0004649']].each {|pair|
461
+ puts
462
+ p pair
463
+ p [:pair, pair.map {|i| [comp.id2term[i], comp.goid2term(i)] }]
464
+ puts "\n #==> comp.bfs_shortest_path(pair[0], pair[1])"
465
+ p comp.bfs_shortest_path(pair[0], pair[1])
466
+ }
467
+
468
+
469
+ puts "\n #==> Bio::GO::External2go"
470
+ p e2g_url
471
+ spkw2go = Bio::GO::External2go.new(wget(e2g_url))
472
+
473
+ puts "\n #==> spkw2go.db"
474
+ p spkw2go.db
475
+
476
+ puts "\n #==> spkw2go[1]"
477
+ p spkw2go[1]
478
+
479
+
480
+
481
+ require 'zlib'
482
+ puts "\n #==> Bio::GO::GeenAssociation"
483
+ p ga_url
484
+ ga = Zlib::Inflate.inflate(wget(ga_url))
485
+ ga = Bio::GO::GeneAssociation.parser(ga)
486
+
487
+ puts "\n #==> ga.size"
488
+ p ga.size
489
+
490
+ puts "\n #==> ga[100]"
491
+ p ga[100]
492
+
493
+
494
+
495
+
496
+
497
+ end