bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,991 @@
1
+ #
2
+ # bio/pathway.rb - Binary relations and Graph algorithms
3
+ #
4
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
5
+ # KAWASHIMA Shuichi <s@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: pathway.rb,v 1.34 2005/12/18 16:50:56 k Exp $
22
+ #
23
+
24
+ require 'matrix'
25
+
26
+ module Bio
27
+
28
+ class Pathway
29
+
30
+ # Initial graph (adjacency list) generation from the list of Relation
31
+ def initialize(relations, undirected = false)
32
+ @undirected = undirected
33
+ @relations = relations
34
+ @graph = {} # adjacency list expression of the graph
35
+ @index = {} # numbering each node in matrix
36
+ @label = {} # additional information on each node
37
+ self.to_list # generate adjacency list
38
+ end
39
+ attr_reader :relations, :graph, :index
40
+ attr_accessor :label
41
+
42
+ def directed?
43
+ @undirected ? false : true
44
+ end
45
+
46
+ def undirected?
47
+ @undirected ? true : false
48
+ end
49
+
50
+ def directed
51
+ if undirected?
52
+ @undirected = false
53
+ self.to_list
54
+ end
55
+ end
56
+
57
+ def undirected
58
+ if directed?
59
+ @undirected = true
60
+ self.to_list
61
+ end
62
+ end
63
+
64
+ # clear @relations to reduce the memory usage
65
+ def clear_relations!
66
+ @relations.clear
67
+ end
68
+
69
+ # reconstruct @relations from the adjacency list @graph
70
+ def to_relations
71
+ @relations.clear
72
+ @graph.each_key do |from|
73
+ @graph[from].each do |to, w|
74
+ @relations << Relation.new(from, to, w)
75
+ end
76
+ end
77
+ return @relations
78
+ end
79
+
80
+
81
+ # Graph (adjacency list) generation from the Relations
82
+ def to_list
83
+ @graph.clear
84
+ @relations.each do |rel|
85
+ append(rel, false) # append to @graph without push to @relations
86
+ end
87
+ end
88
+
89
+ def append(rel, add_rel = true)
90
+ @relations.push(rel) if add_rel
91
+ if @graph[rel.from].nil?
92
+ @graph[rel.from] = {}
93
+ end
94
+ if @graph[rel.to].nil?
95
+ @graph[rel.to] = {}
96
+ end
97
+ @graph[rel.from][rel.to] = rel.relation
98
+ @graph[rel.to][rel.from] = rel.relation if @undirected
99
+ end
100
+
101
+ def delete(rel)
102
+ @relations.delete_if do |x|
103
+ x === rel
104
+ end
105
+ @graph[rel.from].delete(rel.to)
106
+ @graph[rel.to].delete(rel.from) if @undirected
107
+ end
108
+
109
+ def nodes
110
+ @graph.keys.length
111
+ end
112
+
113
+ def edges
114
+ edges = 0
115
+ @graph.each_value do |v|
116
+ edges += v.size
117
+ end
118
+ edges
119
+ end
120
+
121
+
122
+ # Convert adjacency list to adjacency matrix
123
+ def to_matrix(default_value = nil, diagonal_value = nil)
124
+
125
+ # Note: following code only fills the outer Array with the reference
126
+ # to the same inner Array object.
127
+ #
128
+ # matrix = Array.new(nodes, Array.new(nodes))
129
+ #
130
+ # so create a new Array object for each row as follows:
131
+
132
+ matrix = Array.new
133
+ nodes.times do
134
+ matrix.push(Array.new(nodes, default_value))
135
+ end
136
+
137
+ if diagonal_value
138
+ nodes.times do |i|
139
+ matrix[i][i] = diagonal_value
140
+ end
141
+ end
142
+
143
+ # assign index number for each node
144
+ @graph.keys.each_with_index do |k, i|
145
+ @index[k] = i
146
+ end
147
+
148
+ if @relations.empty? # only used after clear_relations!
149
+ @graph.each do |from, hash|
150
+ hash.each do |to, relation|
151
+ x = @index[from]
152
+ y = @index[to]
153
+ matrix[x][y] = relation
154
+ end
155
+ end
156
+ else
157
+ @relations.each do |rel|
158
+ x = @index[rel.from]
159
+ y = @index[rel.to]
160
+ matrix[x][y] = rel.relation
161
+ matrix[y][x] = rel.relation if @undirected
162
+ end
163
+ end
164
+ Matrix[*matrix]
165
+ end
166
+
167
+
168
+ # pretty printer of the adjacency matrix
169
+ def dump_matrix(*arg)
170
+ matrix = self.to_matrix(*arg)
171
+ sorted = @index.sort {|a,b| a[1] <=> b[1]}
172
+ "[# " + sorted.collect{|x| x[0]}.join(", ") + "\n" +
173
+ matrix.to_a.collect{|row| ' ' + row.inspect}.join(",\n") + "\n]"
174
+ end
175
+
176
+ # pretty printer of the adjacency list
177
+ def dump_list
178
+ list = ""
179
+ @graph.each do |from, hash|
180
+ list << "#{from} => "
181
+ a = []
182
+ hash.each do |to, relation|
183
+ a.push("#{to} (#{relation})")
184
+ end
185
+ list << a.join(", ") + "\n"
186
+ end
187
+ list
188
+ end
189
+
190
+
191
+ # Select labeled nodes and generate subgraph
192
+ def subgraph(list = nil)
193
+ if list
194
+ @label.clear
195
+ list.each do |node|
196
+ @label[node] = true
197
+ end
198
+ end
199
+ sub_graph = Pathway.new([], @undirected)
200
+ @graph.each do |from, hash|
201
+ next unless @label[from]
202
+ hash.each do |to, relation|
203
+ next unless @label[to]
204
+ sub_graph.append(Relation.new(from, to, relation))
205
+ end
206
+ end
207
+ return sub_graph
208
+ end
209
+
210
+
211
+ def common_subgraph(graph)
212
+ raise NotImplementedError
213
+ end
214
+
215
+
216
+ def clique
217
+ raise NotImplementedError
218
+ end
219
+
220
+
221
+ # Returns completeness of the edge density among the surrounded nodes
222
+ def cliquishness(node)
223
+ neighbors = @graph[node].keys
224
+ sg = subgraph(neighbors)
225
+ if sg.graph.size != 0
226
+ edges = sg.edges / 2.0
227
+ nodes = sg.nodes
228
+ complete = (nodes * (nodes - 1)) / 2.0
229
+ return edges/complete
230
+ else
231
+ return 0.0
232
+ end
233
+ end
234
+
235
+
236
+ # Returns frequency of the nodes having same number of edges as hash
237
+ def small_world
238
+ freq = Hash.new(0)
239
+ @graph.each_value do |v|
240
+ freq[v.size] += 1
241
+ end
242
+ return freq
243
+ end
244
+
245
+
246
+ # Breadth first search solves steps and path to the each node and forms
247
+ # a tree contains all reachable vertices from the root node.
248
+ def breadth_first_search(root)
249
+ visited = {}
250
+ distance = {}
251
+ predecessor = {}
252
+
253
+ visited[root] = true
254
+ distance[root] = 0
255
+ predecessor[root] = nil
256
+
257
+ queue = [ root ]
258
+
259
+ while from = queue.shift
260
+ next unless @graph[from]
261
+ @graph[from].each_key do |to|
262
+ unless visited[to]
263
+ visited[to] = true
264
+ distance[to] = distance[from] + 1
265
+ predecessor[to] = from
266
+ queue.push(to)
267
+ end
268
+ end
269
+ end
270
+ return distance, predecessor
271
+ end
272
+ alias bfs breadth_first_search
273
+
274
+
275
+ def bfs_shortest_path(node1, node2)
276
+ distance, route = breadth_first_search(node1)
277
+ step = distance[node2]
278
+ node = node2
279
+ path = [ node2 ]
280
+ while node != node1 and route[node]
281
+ node = route[node]
282
+ path.unshift(node)
283
+ end
284
+ return step, path
285
+ end
286
+
287
+
288
+ # Depth first search yields much information about the structure of the
289
+ # graph especially on the classification of the edges.
290
+ def depth_first_search
291
+ visited = {}
292
+ timestamp = {}
293
+ tree_edges = {}
294
+ back_edges = {}
295
+ cross_edges = {}
296
+ forward_edges = {}
297
+ count = 0
298
+
299
+ dfs_visit = Proc.new { |from|
300
+ visited[from] = true
301
+ timestamp[from] = [count += 1]
302
+ @graph[from].each_key do |to|
303
+ if visited[to]
304
+ if timestamp[to].size > 1
305
+ if timestamp[from].first < timestamp[to].first
306
+ # forward edge (black)
307
+ p "#{from} -> #{to} : forward edge" if $DEBUG
308
+ forward_edges[from] = to
309
+ else
310
+ # cross edge (black)
311
+ p "#{from} -> #{to} : cross edge" if $DEBUG
312
+ cross_edges[from] = to
313
+ end
314
+ else
315
+ # back edge (gray)
316
+ p "#{from} -> #{to} : back edge" if $DEBUG
317
+ back_edges[from] = to
318
+ end
319
+ else
320
+ # tree edge (white)
321
+ p "#{from} -> #{to} : tree edge" if $DEBUG
322
+ tree_edges[to] = from
323
+ dfs_visit.call(to)
324
+ end
325
+ end
326
+ timestamp[from].push(count += 1)
327
+ }
328
+
329
+ @graph.each_key do |node|
330
+ unless visited[node]
331
+ dfs_visit.call(node)
332
+ end
333
+ end
334
+ return timestamp, tree_edges, back_edges, cross_edges, forward_edges
335
+ end
336
+ alias dfs depth_first_search
337
+
338
+
339
+ def dfs_topological_sort
340
+ # sorted by finished time reversely and collect node names only
341
+ timestamp, = self.depth_first_search
342
+ timestamp.sort {|a,b| b[1][1] <=> a[1][1]}.collect {|x| x.first }
343
+ end
344
+
345
+
346
+ # Dijkstra method to solve the shortest path problem in the weighted graph.
347
+ def dijkstra(root)
348
+ distance, predecessor = initialize_single_source(root)
349
+ @graph[root].each do |k, v|
350
+ distance[k] = v
351
+ predecessor[k] = root
352
+ end
353
+ queue = distance.dup
354
+ queue.delete(root)
355
+
356
+ while queue.size != 0
357
+ min = queue.min {|a, b| a[1] <=> b[1]}
358
+ u = min[0] # extranct a node having minimal distance
359
+ @graph[u].each do |k, v|
360
+ # relaxing procedure of root -> 'u' -> 'k'
361
+ if distance[k] > distance[u] + v
362
+ distance[k] = distance[u] + v
363
+ predecessor[k] = u
364
+ end
365
+ end
366
+ queue.delete(u)
367
+ end
368
+ return distance, predecessor
369
+ end
370
+
371
+
372
+ # Bellman-Ford method for solving the single-source shortest-paths
373
+ # problem in the graph in which edge weights can be negative.
374
+ def bellman_ford(root)
375
+ distance, predecessor = initialize_single_source(root)
376
+ for i in 1 ..(self.nodes - 1) do
377
+ @graph.each_key do |u|
378
+ @graph[u].each do |v, w|
379
+ # relaxing procedure of root -> 'u' -> 'v'
380
+ if distance[v] > distance[u] + w
381
+ distance[v] = distance[u] + w
382
+ predecessor[v] = u
383
+ end
384
+ end
385
+ end
386
+ end
387
+ # negative cyclic loop check
388
+ @graph.each_key do |u|
389
+ @graph[u].each do |v, w|
390
+ if distance[v] > distance[u] + w
391
+ return false
392
+ end
393
+ end
394
+ end
395
+ return distance, predecessor
396
+ end
397
+
398
+
399
+ # Floyd-Wardshall alogrithm for solving the all-pairs shortest-paths
400
+ # problem on a directed graph G = (V, E).
401
+ def floyd_warshall
402
+ inf = 1 / 0.0
403
+
404
+ m = self.to_matrix(inf, 0)
405
+ d = m.dup
406
+ n = self.nodes
407
+ for k in 0 .. n - 1 do
408
+ for i in 0 .. n - 1 do
409
+ for j in 0 .. n - 1 do
410
+ if d[i, j] > d[i, k] + d[k, j]
411
+ d[i, j] = d[i, k] + d[k, j]
412
+ end
413
+ end
414
+ end
415
+ end
416
+ return d
417
+ end
418
+ alias floyd floyd_warshall
419
+
420
+
421
+ # Kruskal method for finding minimam spaninng trees
422
+ def kruskal
423
+ # initialize
424
+ rel = self.to_relations.sort{|a, b| a <=> b}
425
+ index = []
426
+ for i in 0 .. (rel.size - 1) do
427
+ for j in (i + 1) .. (rel.size - 1) do
428
+ if rel[i] == rel[j]
429
+ index << j
430
+ end
431
+ end
432
+ end
433
+ index.sort{|x, y| y<=>x}.each do |i|
434
+ rel[i, 1] = []
435
+ end
436
+ mst = []
437
+ seen = Hash.new()
438
+ @graph.each_key do |x|
439
+ seen[x] = nil
440
+ end
441
+ i = 1
442
+ # initialize end
443
+
444
+ rel.each do |r|
445
+ if seen[r.node[0]] == nil
446
+ seen[r.node[0]] = 0
447
+ end
448
+ if seen[r.node[1]] == nil
449
+ seen[r.node[1]] = 0
450
+ end
451
+ if seen[r.node[0]] == seen[r.node[1]] && seen[r.node[0]] == 0
452
+ mst << r
453
+ seen[r.node[0]] = i
454
+ seen[r.node[1]] = i
455
+ elsif seen[r.node[0]] != seen[r.node[1]]
456
+ mst << r
457
+ v1 = seen[r.node[0]].dup
458
+ v2 = seen[r.node[1]].dup
459
+ seen.each do |k, v|
460
+ if v == v1 || v == v2
461
+ seen[k] = i
462
+ end
463
+ end
464
+ end
465
+ i += 1
466
+ end
467
+ return Pathway.new(mst)
468
+ end
469
+
470
+
471
+ private
472
+
473
+
474
+ def initialize_single_source(root)
475
+ inf = 1 / 0.0 # inf.infinite? -> true
476
+
477
+ distance = {}
478
+ predecessor = {}
479
+
480
+ @graph.each_key do |k|
481
+ distance[k] = inf
482
+ predecessor[k] = nil
483
+ end
484
+ distance[root] = 0
485
+ return distance, predecessor
486
+ end
487
+
488
+ end
489
+
490
+
491
+
492
+ class Relation
493
+
494
+ def initialize(node1, node2, edge)
495
+ @node = [node1, node2]
496
+ @edge = edge
497
+ end
498
+ attr_accessor :node, :edge
499
+
500
+ def from
501
+ @node[0]
502
+ end
503
+
504
+ def to
505
+ @node[1]
506
+ end
507
+
508
+ def relation
509
+ @edge
510
+ end
511
+
512
+ def hash
513
+ @node.sort.push(@edge).hash
514
+ end
515
+
516
+ def ===(rel)
517
+ if self.edge == rel.edge
518
+ if self.node[0] == rel.node[0] and self.node[1] == rel.node[1]
519
+ return true
520
+ elsif self.node[0] == rel.node[1] and self.node[1] == rel.node[0]
521
+ return true
522
+ else
523
+ return false
524
+ end
525
+ else
526
+ return false
527
+ end
528
+ end
529
+ alias eql? ===
530
+
531
+ def <=>(rel)
532
+ unless self.edge.kind_of? Comparable
533
+ raise "[Error] edges are not comparable"
534
+ end
535
+ if self.edge > rel.edge
536
+ return 1
537
+ elsif self.edge < rel.edge
538
+ return -1
539
+ elsif self.edge == rel.edge
540
+ return 0
541
+ end
542
+ end
543
+
544
+ end
545
+
546
+ end
547
+
548
+
549
+
550
+ if __FILE__ == $0
551
+
552
+ puts "--- Test === method true/false"
553
+ r1 = Bio::Relation.new('a', 'b', 1)
554
+ r2 = Bio::Relation.new('b', 'a', 1)
555
+ r3 = Bio::Relation.new('b', 'a', 2)
556
+ r4 = Bio::Relation.new('a', 'b', 1)
557
+ p r1 === r2
558
+ p r1 === r3
559
+ p r1 === r4
560
+ p [ r1, r2, r3, r4 ].uniq
561
+ p r1.eql?(r2)
562
+ p r3.eql?(r2)
563
+
564
+ # Sample Graph :
565
+ # +----------------+
566
+ # | |
567
+ # v |
568
+ # +---------(q)-->(t)------->(y)<----(r)
569
+ # | | | ^ |
570
+ # v | v | |
571
+ # +--(s)<--+ | (x)<---+ (u)<-----+
572
+ # | | | | |
573
+ # v | | v |
574
+ # (v)----->(w)<---+ (z)----+
575
+
576
+ data = [
577
+ [ 'q', 's', 1, ],
578
+ [ 'q', 't', 1, ],
579
+ [ 'q', 'w', 1, ],
580
+ [ 'r', 'u', 1, ],
581
+ [ 'r', 'y', 1, ],
582
+ [ 's', 'v', 1, ],
583
+ [ 't', 'x', 1, ],
584
+ [ 't', 'y', 1, ],
585
+ [ 'u', 'y', 1, ],
586
+ [ 'v', 'w', 1, ],
587
+ [ 'w', 's', 1, ],
588
+ [ 'x', 'z', 1, ],
589
+ [ 'y', 'q', 1, ],
590
+ [ 'z', 'x', 1, ],
591
+ ]
592
+
593
+ ary = []
594
+
595
+ puts "--- List of relations"
596
+ data.each do |x|
597
+ ary << Bio::Relation.new(*x)
598
+ end
599
+ p ary
600
+
601
+ puts "--- Generate graph from list of relations"
602
+ graph = Bio::Pathway.new(ary)
603
+ p graph
604
+
605
+ puts "--- Test to_matrix method"
606
+ p graph.to_matrix
607
+
608
+ puts "--- Test dump_matrix method"
609
+ puts graph.dump_matrix(0)
610
+
611
+ puts "--- Test dump_list method"
612
+ puts graph.dump_list
613
+
614
+ puts "--- Labeling some nodes"
615
+ hash = { 'q' => "L1", 's' => "L2", 'v' => "L3", 'w' => "L4" }
616
+ graph.label = hash
617
+ p graph
618
+
619
+ puts "--- Extract subgraph by label"
620
+ p graph.subgraph
621
+
622
+ puts "--- Extract subgraph by list"
623
+ p graph.subgraph(['q', 't', 'x', 'y', 'z'])
624
+
625
+ puts "--- Test cliquishness of the node 'q'"
626
+ p graph.cliquishness('q')
627
+
628
+ puts "--- Test cliquishness of the node 'q' (undirected)"
629
+ u_graph = Bio::Pathway.new(ary, 'undirected')
630
+ p u_graph.cliquishness('q')
631
+
632
+ puts "--- Test small_world histgram"
633
+ p graph.small_world
634
+
635
+ puts "--- Test breadth_first_search method"
636
+ distance, predecessor = graph.breadth_first_search('q')
637
+ p distance
638
+ p predecessor
639
+
640
+ puts "--- Test bfs_shortest_path method"
641
+ step, path = graph.bfs_shortest_path('y', 'w')
642
+ p step
643
+ p path
644
+
645
+ puts "--- Test depth_first_search method"
646
+ timestamp, tree, back, cross, forward = graph.depth_first_search
647
+ p timestamp
648
+ print "tree edges : "; p tree
649
+ print "back edges : "; p back
650
+ print "cross edges : "; p cross
651
+ print "forward edges : "; p forward
652
+
653
+ puts "--- Test dfs_topological_sort method"
654
+ #
655
+ # Professor Bumstead topologically sorts his clothing when getting dressed.
656
+ #
657
+ # "undershorts" "socks"
658
+ # | | |
659
+ # v | v "watch"
660
+ # "pants" --+-------> "shoes"
661
+ # |
662
+ # v
663
+ # "belt" <----- "shirt" ----> "tie" ----> "jacket"
664
+ # | ^
665
+ # `---------------------------------------'
666
+ #
667
+ dag = Bio::Pathway.new([
668
+ Bio::Relation.new("undeershorts", "pants", true),
669
+ Bio::Relation.new("undeershorts", "shoes", true),
670
+ Bio::Relation.new("socks", "shoes", true),
671
+ Bio::Relation.new("watch", "watch", true),
672
+ Bio::Relation.new("pants", "belt", true),
673
+ Bio::Relation.new("pants", "shoes", true),
674
+ Bio::Relation.new("shirt", "belt", true),
675
+ Bio::Relation.new("shirt", "tie", true),
676
+ Bio::Relation.new("tie", "jacket", true),
677
+ Bio::Relation.new("belt", "jacket", true),
678
+ ])
679
+ p dag.dfs_topological_sort
680
+
681
+ puts "--- Test dijkstra method"
682
+ distance, predecessor = graph.dijkstra('q')
683
+ p distance
684
+ p predecessor
685
+
686
+ puts "--- Test dijkstra method by weighted graph"
687
+ #
688
+ # 'a' --> 'b'
689
+ # | 1 | 3
690
+ # |5 v
691
+ # `----> 'c'
692
+ #
693
+ r1 = Bio::Relation.new('a', 'b', 1)
694
+ r2 = Bio::Relation.new('a', 'c', 5)
695
+ r3 = Bio::Relation.new('b', 'c', 3)
696
+ w_graph = Bio::Pathway.new([r1, r2, r3])
697
+ p w_graph
698
+ p w_graph.dijkstra('a')
699
+
700
+ puts "--- Test bellman_ford method by negative weighted graph"
701
+ #
702
+ # ,-- 'a' --> 'b'
703
+ # | | 1 | 3
704
+ # | |5 v
705
+ # | `----> 'c'
706
+ # | ^
707
+ # |2 | -5
708
+ # `--> 'd' ----'
709
+ #
710
+ r4 = Bio::Relation.new('a', 'd', 2)
711
+ r5 = Bio::Relation.new('d', 'c', -5)
712
+ w_graph.append(r4)
713
+ w_graph.append(r5)
714
+ p w_graph.bellman_ford('a')
715
+ p graph.bellman_ford('q')
716
+
717
+ end
718
+
719
+
720
+ =begin
721
+
722
+ = Bio::Pathway
723
+
724
+ Bio::Pathway is a general graph object initially constructed by the list of
725
+ the ((<Bio::Relation>)) objects. The basic concept of the Bio::Pathway object
726
+ is to store a graph as an adjacency list (in the instance variable @graph),
727
+ and converting the list into an adjacency matrix by calling to_matrix method
728
+ on demand. However, in some cases, it is convenient to have the original list
729
+ of the ((<Bio::Relation>))s, Bio::Pathway object also stores the list (as the
730
+ instance variable @relations) redundantly.
731
+
732
+ Note: you can clear the @relations list by calling clear_relations! method to
733
+ reduce the memory usage, and the content of the @relations can be re-generated
734
+ from the @graph by to_relations method.
735
+
736
+ --- Bio::Pathway.new(list, undirected = false)
737
+
738
+ Generate Bio::Pathway object from the list of Bio::Relation objects.
739
+ If the second argument is true, undirected graph is generated.
740
+
741
+ r1 = Bio::Relation.new('a', 'b', 1)
742
+ r2 = Bio::Relation.new('a', 'c', 5)
743
+ r3 = Bio::Relation.new('b', 'c', 3)
744
+ list = [ r1, r2, r3 ]
745
+ g = Bio::Pathway.new(list, 'undirected')
746
+
747
+ --- Bio::Pathway#relations
748
+
749
+ Read-only accessor for the internal list of the Bio::Relation objects
750
+ '@relations'.
751
+
752
+ --- Bio::Pathway#graph
753
+
754
+ Read-only accessor for the adjacency list of the graph.
755
+
756
+ --- Bio::Pathway#index
757
+
758
+ Read-only accessor for the row/column index (@index) of the adjacency
759
+ matrix. Contents of the hash @index is created by calling to_matrix
760
+ method.
761
+
762
+ --- Bio::Pathway#label
763
+
764
+ Accessor for the hash of the label assigned to the each node. You can
765
+ label some of the nodes in the graph by passing a hash to the label
766
+ and select subgraphs which contain labeled nodes only by subgraph method.
767
+
768
+ hash = { 1 => 'red', 2 => 'green', 5 => 'black' }
769
+ g.label = hash
770
+ g.label
771
+ g.subgraph # => new graph consists of the node 1, 2, 5 only
772
+
773
+ --- Bio::Pathway#directed?
774
+ --- Bio::Pathway#undirected?
775
+
776
+ Returns true or false respond to the internal state of the graph.
777
+
778
+ --- Bio::Pathway#directed
779
+ --- Bio::Pathway#undirected
780
+
781
+ Changes the internal state of the graph between 'directed' and
782
+ 'undirected' and re-generate adjacency list. The undirected graph
783
+ can be converted to directed graph, however, the edge between two
784
+ nodes will be simply doubled to both ends.
785
+ Note that these method can not be used without the list of the
786
+ Bio::Relation objects (internally stored in @relations variable).
787
+ Thus if you already called clear_relations! method, call
788
+ to_relations first.
789
+
790
+ --- Bio::Pathway#clear_relations!
791
+ --- Bio::Pathway#to_relations
792
+
793
+ Clear @relations array and re-generate @relations from @graph.
794
+ Useful when you want to reduce the memory usage of the object.
795
+
796
+ --- Bio::Pathway#to_list
797
+
798
+ Generate the adjcancecy list @graph from @relations (called by
799
+ initialize and in some other cases when @relations has been changed).
800
+
801
+ --- Bio::Pathway#append(rel, add_rel = true)
802
+
803
+ Add an Bio::Relation object 'rel' to the @graph and @relations.
804
+ If the second argument is false, @relations is not modified (only
805
+ useful when genarating @graph from @relations internally).
806
+
807
+ --- Bio::Pathway#delete(rel)
808
+
809
+ Remove an edge indicated by the Bio::Relation object 'rel' from the
810
+ @graph and the @relations.
811
+
812
+ --- Bio::Pathway#nodes
813
+ --- Bio::Pathway#edges
814
+
815
+ Returns the number of the nodes or edges in the graph.
816
+
817
+ --- Bio::Pathway#to_matrix(default_value = nil, diagonal_value = nil)
818
+
819
+ Returns the adjacency matrix expression of the graph as a Matrix object.
820
+ If the first argument was assigned, the matrix will be filled with
821
+ the given value. The second argument indicates the value of the
822
+ diagonal constituents of the matrix besides the above.
823
+
824
+ --- Bio::Pathway#dump_matrix(default_value = nil, diagonal_value = nil)
825
+ --- Bio::Pathway#dump_list
826
+
827
+ These are pretty printer of the graph. The dump_matrix method
828
+ accepts the same arguments as to_matrix. Useful when you want to
829
+ check the internal state of the adjacency list or the matrix (for
830
+ the debug etc.) easily.
831
+
832
+ --- Bio::Pathway#subgraph(list = nil)
833
+
834
+ This method select some nodes and returns new Bio::Pathway object
835
+ consists of selected nodes only.
836
+ If the list of the nodes (as Array) is assigned as the argument,
837
+ use the list to select the nodes from the graph. If no argument
838
+ is assigned, internal property of the graph @label is used to select
839
+ the nodes.
840
+
841
+ hash = { 'a' => 'secret', 'b' => 'important', 'c' => 'important' }
842
+ g.label = hash
843
+ g.subgraph
844
+
845
+ list = [ 'a', 'b', 'c' ]
846
+ g.subgraph(list)
847
+
848
+ --- Bio::Pathway#common_subgraph(graph)
849
+
850
+ Not implemented yet.
851
+
852
+ --- Bio::Pathway#clique
853
+
854
+ Not implemented yet.
855
+
856
+ --- Bio::Pathway#cliquishness(node)
857
+
858
+ Calculates the value of cliquishness around the 'node'. This value
859
+ indicates completeness of the edge density among the surrounded nodes.
860
+
861
+ --- Bio::Pathway#small_world
862
+
863
+ Calculates the frequency of the nodes having the same number of edges
864
+ and returns the value as Hash.
865
+
866
+ --- Bio::Pathway#breadth_first_search(root)
867
+
868
+ Breadth first search solves steps and path to the each node and forms
869
+ a tree contains all reachable vertices from the root node. This method
870
+ returns the result in 2 hashes - 1st one shows the steps from root node
871
+ and 2nd hash shows the structure of the tree.
872
+
873
+ The weight of the edges are not considered in this method.
874
+
875
+ --- Bio::Pathway#bfs(root)
876
+
877
+ Alias for the breadth_first_search method.
878
+
879
+ --- Bio::Pathway#bfs_shortest_path(node1, node2)
880
+
881
+ Calculates the shortest path between two nodes by using
882
+ breadth_first_search method and returns steps and the path as Array.
883
+
884
+ --- Bio::Pathway#depth_first_search
885
+
886
+ Depth first search yields much information about the structure of the
887
+ graph especially on the classification of the edges. This method returns
888
+ 5 hashes - 1st one shows the timestamps of each node containing the first
889
+ discoverd time and the search finished time in an array. The 2nd, 3rd,
890
+ 4th, and 5th hashes contain 'tree edges', 'back edges', 'cross edges',
891
+ 'forward edges' respectively.
892
+
893
+ If $DEBUG is true (e.g. ruby -d), this method prints the progression
894
+ of the search.
895
+
896
+ The weight of the edges are not considered in this method.
897
+
898
+ --- Bio::Pathway#dfs
899
+
900
+ Alias for the depth_first_search method.
901
+
902
+ --- Bio::Pathway#dfs_topological_sort
903
+
904
+ Topological sort of the directed acyclic graphs ("dags") by using
905
+ depth_first_search.
906
+
907
+ --- Bio::Pathway#dijkstra(root)
908
+
909
+ Dijkstra method solves the sortest path problem in the weighted graph.
910
+
911
+ --- Bio::Pathway#bellman_ford(root)
912
+
913
+ Bellman-Ford method solves the single-source shortest-paths problem
914
+ in the graph in which the edge weights can be negative.
915
+
916
+ --- Bio::Pathway#floyd_warshall
917
+
918
+ Floyd-Wardshall alogrithm solves the all-pairs shortest-paths problem
919
+ on a directed graph G = (V, E).
920
+
921
+ --- Bio::Pathway#floyd
922
+
923
+ Alias for the floyd_warshall method.
924
+
925
+ --- Bio::Pathway#kruskal
926
+
927
+ Kruskal method calculates the minimam spaninng trees.
928
+
929
+ --- Bio::Pathway#initialize_single_source(root)
930
+
931
+ Private method used to initialize the distance by 'Infinity' and the
932
+ path to the parent node by 'nil'.
933
+
934
+
935
+ = Bio::Relation
936
+
937
+ Bio::Relation is a simple object storing two nodes and the relation of them.
938
+ The nodes and the edge (relation) can be any Ruby object. You can also
939
+ compare Bio::Relation objects if the edges have Comparable property.
940
+
941
+ --- Bio::Relation.new(node1, node2, edge)
942
+
943
+ Create new binary relation object consists of the two object 'node1'
944
+ and 'node2' with the 'edge' object as the relation of them.
945
+
946
+ --- Bio::Relation#node
947
+
948
+ Accessor for the @node.
949
+
950
+ --- Bio::Relation#edge
951
+
952
+ Accessor for the @edge.
953
+
954
+ --- Bio::Relation#from
955
+
956
+ Returns one node.
957
+
958
+ --- Bio::Relation#to
959
+
960
+ Returns another node.
961
+
962
+ --- Bio::Relation#relation
963
+
964
+ Returns the edge.
965
+
966
+ --- Bio::Relation#===(rel)
967
+
968
+ Compare with another Bio::Relation object whether havind same edges
969
+ and same nodes. The == method compares Bio::Relation object's id,
970
+ however this case equality === method compares the internal property
971
+ of the Bio::Relation object.
972
+
973
+ --- Bio::Relation#eql?(rel)
974
+ --- Bio::Relation#hash
975
+
976
+ Method eql? is an alias of the === method and is used with hash method
977
+ to make uniq arry of the Bio::Relation objects.
978
+
979
+ a1 = Bio::Relation.new('a', 'b', 1)
980
+ a2 = Bio::Relation.new('b', 'a', 1)
981
+ a3 = Bio::Relation.new('b', 'c', 1)
982
+ p [ a1, a2, a3 ].uniq
983
+
984
+ --- Bio::Relation#<=>(rel)
985
+
986
+ Used by the each method to compare with another Bio::Relation object.
987
+ This method is only usable when the edge objects have the property of
988
+ the module Comparable.
989
+
990
+ =end
991
+