bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,991 @@
1
+ #
2
+ # bio/pathway.rb - Binary relations and Graph algorithms
3
+ #
4
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
5
+ # KAWASHIMA Shuichi <s@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: pathway.rb,v 1.34 2005/12/18 16:50:56 k Exp $
22
+ #
23
+
24
+ require 'matrix'
25
+
26
+ module Bio
27
+
28
+ class Pathway
29
+
30
+ # Initial graph (adjacency list) generation from the list of Relation
31
+ def initialize(relations, undirected = false)
32
+ @undirected = undirected
33
+ @relations = relations
34
+ @graph = {} # adjacency list expression of the graph
35
+ @index = {} # numbering each node in matrix
36
+ @label = {} # additional information on each node
37
+ self.to_list # generate adjacency list
38
+ end
39
+ attr_reader :relations, :graph, :index
40
+ attr_accessor :label
41
+
42
+ def directed?
43
+ @undirected ? false : true
44
+ end
45
+
46
+ def undirected?
47
+ @undirected ? true : false
48
+ end
49
+
50
+ def directed
51
+ if undirected?
52
+ @undirected = false
53
+ self.to_list
54
+ end
55
+ end
56
+
57
+ def undirected
58
+ if directed?
59
+ @undirected = true
60
+ self.to_list
61
+ end
62
+ end
63
+
64
+ # clear @relations to reduce the memory usage
65
+ def clear_relations!
66
+ @relations.clear
67
+ end
68
+
69
+ # reconstruct @relations from the adjacency list @graph
70
+ def to_relations
71
+ @relations.clear
72
+ @graph.each_key do |from|
73
+ @graph[from].each do |to, w|
74
+ @relations << Relation.new(from, to, w)
75
+ end
76
+ end
77
+ return @relations
78
+ end
79
+
80
+
81
+ # Graph (adjacency list) generation from the Relations
82
+ def to_list
83
+ @graph.clear
84
+ @relations.each do |rel|
85
+ append(rel, false) # append to @graph without push to @relations
86
+ end
87
+ end
88
+
89
+ def append(rel, add_rel = true)
90
+ @relations.push(rel) if add_rel
91
+ if @graph[rel.from].nil?
92
+ @graph[rel.from] = {}
93
+ end
94
+ if @graph[rel.to].nil?
95
+ @graph[rel.to] = {}
96
+ end
97
+ @graph[rel.from][rel.to] = rel.relation
98
+ @graph[rel.to][rel.from] = rel.relation if @undirected
99
+ end
100
+
101
+ def delete(rel)
102
+ @relations.delete_if do |x|
103
+ x === rel
104
+ end
105
+ @graph[rel.from].delete(rel.to)
106
+ @graph[rel.to].delete(rel.from) if @undirected
107
+ end
108
+
109
+ def nodes
110
+ @graph.keys.length
111
+ end
112
+
113
+ def edges
114
+ edges = 0
115
+ @graph.each_value do |v|
116
+ edges += v.size
117
+ end
118
+ edges
119
+ end
120
+
121
+
122
+ # Convert adjacency list to adjacency matrix
123
+ def to_matrix(default_value = nil, diagonal_value = nil)
124
+
125
+ # Note: following code only fills the outer Array with the reference
126
+ # to the same inner Array object.
127
+ #
128
+ # matrix = Array.new(nodes, Array.new(nodes))
129
+ #
130
+ # so create a new Array object for each row as follows:
131
+
132
+ matrix = Array.new
133
+ nodes.times do
134
+ matrix.push(Array.new(nodes, default_value))
135
+ end
136
+
137
+ if diagonal_value
138
+ nodes.times do |i|
139
+ matrix[i][i] = diagonal_value
140
+ end
141
+ end
142
+
143
+ # assign index number for each node
144
+ @graph.keys.each_with_index do |k, i|
145
+ @index[k] = i
146
+ end
147
+
148
+ if @relations.empty? # only used after clear_relations!
149
+ @graph.each do |from, hash|
150
+ hash.each do |to, relation|
151
+ x = @index[from]
152
+ y = @index[to]
153
+ matrix[x][y] = relation
154
+ end
155
+ end
156
+ else
157
+ @relations.each do |rel|
158
+ x = @index[rel.from]
159
+ y = @index[rel.to]
160
+ matrix[x][y] = rel.relation
161
+ matrix[y][x] = rel.relation if @undirected
162
+ end
163
+ end
164
+ Matrix[*matrix]
165
+ end
166
+
167
+
168
+ # pretty printer of the adjacency matrix
169
+ def dump_matrix(*arg)
170
+ matrix = self.to_matrix(*arg)
171
+ sorted = @index.sort {|a,b| a[1] <=> b[1]}
172
+ "[# " + sorted.collect{|x| x[0]}.join(", ") + "\n" +
173
+ matrix.to_a.collect{|row| ' ' + row.inspect}.join(",\n") + "\n]"
174
+ end
175
+
176
+ # pretty printer of the adjacency list
177
+ def dump_list
178
+ list = ""
179
+ @graph.each do |from, hash|
180
+ list << "#{from} => "
181
+ a = []
182
+ hash.each do |to, relation|
183
+ a.push("#{to} (#{relation})")
184
+ end
185
+ list << a.join(", ") + "\n"
186
+ end
187
+ list
188
+ end
189
+
190
+
191
+ # Select labeled nodes and generate subgraph
192
+ def subgraph(list = nil)
193
+ if list
194
+ @label.clear
195
+ list.each do |node|
196
+ @label[node] = true
197
+ end
198
+ end
199
+ sub_graph = Pathway.new([], @undirected)
200
+ @graph.each do |from, hash|
201
+ next unless @label[from]
202
+ hash.each do |to, relation|
203
+ next unless @label[to]
204
+ sub_graph.append(Relation.new(from, to, relation))
205
+ end
206
+ end
207
+ return sub_graph
208
+ end
209
+
210
+
211
+ def common_subgraph(graph)
212
+ raise NotImplementedError
213
+ end
214
+
215
+
216
+ def clique
217
+ raise NotImplementedError
218
+ end
219
+
220
+
221
+ # Returns completeness of the edge density among the surrounded nodes
222
+ def cliquishness(node)
223
+ neighbors = @graph[node].keys
224
+ sg = subgraph(neighbors)
225
+ if sg.graph.size != 0
226
+ edges = sg.edges / 2.0
227
+ nodes = sg.nodes
228
+ complete = (nodes * (nodes - 1)) / 2.0
229
+ return edges/complete
230
+ else
231
+ return 0.0
232
+ end
233
+ end
234
+
235
+
236
+ # Returns frequency of the nodes having same number of edges as hash
237
+ def small_world
238
+ freq = Hash.new(0)
239
+ @graph.each_value do |v|
240
+ freq[v.size] += 1
241
+ end
242
+ return freq
243
+ end
244
+
245
+
246
+ # Breadth first search solves steps and path to the each node and forms
247
+ # a tree contains all reachable vertices from the root node.
248
+ def breadth_first_search(root)
249
+ visited = {}
250
+ distance = {}
251
+ predecessor = {}
252
+
253
+ visited[root] = true
254
+ distance[root] = 0
255
+ predecessor[root] = nil
256
+
257
+ queue = [ root ]
258
+
259
+ while from = queue.shift
260
+ next unless @graph[from]
261
+ @graph[from].each_key do |to|
262
+ unless visited[to]
263
+ visited[to] = true
264
+ distance[to] = distance[from] + 1
265
+ predecessor[to] = from
266
+ queue.push(to)
267
+ end
268
+ end
269
+ end
270
+ return distance, predecessor
271
+ end
272
+ alias bfs breadth_first_search
273
+
274
+
275
+ def bfs_shortest_path(node1, node2)
276
+ distance, route = breadth_first_search(node1)
277
+ step = distance[node2]
278
+ node = node2
279
+ path = [ node2 ]
280
+ while node != node1 and route[node]
281
+ node = route[node]
282
+ path.unshift(node)
283
+ end
284
+ return step, path
285
+ end
286
+
287
+
288
+ # Depth first search yields much information about the structure of the
289
+ # graph especially on the classification of the edges.
290
+ def depth_first_search
291
+ visited = {}
292
+ timestamp = {}
293
+ tree_edges = {}
294
+ back_edges = {}
295
+ cross_edges = {}
296
+ forward_edges = {}
297
+ count = 0
298
+
299
+ dfs_visit = Proc.new { |from|
300
+ visited[from] = true
301
+ timestamp[from] = [count += 1]
302
+ @graph[from].each_key do |to|
303
+ if visited[to]
304
+ if timestamp[to].size > 1
305
+ if timestamp[from].first < timestamp[to].first
306
+ # forward edge (black)
307
+ p "#{from} -> #{to} : forward edge" if $DEBUG
308
+ forward_edges[from] = to
309
+ else
310
+ # cross edge (black)
311
+ p "#{from} -> #{to} : cross edge" if $DEBUG
312
+ cross_edges[from] = to
313
+ end
314
+ else
315
+ # back edge (gray)
316
+ p "#{from} -> #{to} : back edge" if $DEBUG
317
+ back_edges[from] = to
318
+ end
319
+ else
320
+ # tree edge (white)
321
+ p "#{from} -> #{to} : tree edge" if $DEBUG
322
+ tree_edges[to] = from
323
+ dfs_visit.call(to)
324
+ end
325
+ end
326
+ timestamp[from].push(count += 1)
327
+ }
328
+
329
+ @graph.each_key do |node|
330
+ unless visited[node]
331
+ dfs_visit.call(node)
332
+ end
333
+ end
334
+ return timestamp, tree_edges, back_edges, cross_edges, forward_edges
335
+ end
336
+ alias dfs depth_first_search
337
+
338
+
339
+ def dfs_topological_sort
340
+ # sorted by finished time reversely and collect node names only
341
+ timestamp, = self.depth_first_search
342
+ timestamp.sort {|a,b| b[1][1] <=> a[1][1]}.collect {|x| x.first }
343
+ end
344
+
345
+
346
+ # Dijkstra method to solve the shortest path problem in the weighted graph.
347
+ def dijkstra(root)
348
+ distance, predecessor = initialize_single_source(root)
349
+ @graph[root].each do |k, v|
350
+ distance[k] = v
351
+ predecessor[k] = root
352
+ end
353
+ queue = distance.dup
354
+ queue.delete(root)
355
+
356
+ while queue.size != 0
357
+ min = queue.min {|a, b| a[1] <=> b[1]}
358
+ u = min[0] # extranct a node having minimal distance
359
+ @graph[u].each do |k, v|
360
+ # relaxing procedure of root -> 'u' -> 'k'
361
+ if distance[k] > distance[u] + v
362
+ distance[k] = distance[u] + v
363
+ predecessor[k] = u
364
+ end
365
+ end
366
+ queue.delete(u)
367
+ end
368
+ return distance, predecessor
369
+ end
370
+
371
+
372
+ # Bellman-Ford method for solving the single-source shortest-paths
373
+ # problem in the graph in which edge weights can be negative.
374
+ def bellman_ford(root)
375
+ distance, predecessor = initialize_single_source(root)
376
+ for i in 1 ..(self.nodes - 1) do
377
+ @graph.each_key do |u|
378
+ @graph[u].each do |v, w|
379
+ # relaxing procedure of root -> 'u' -> 'v'
380
+ if distance[v] > distance[u] + w
381
+ distance[v] = distance[u] + w
382
+ predecessor[v] = u
383
+ end
384
+ end
385
+ end
386
+ end
387
+ # negative cyclic loop check
388
+ @graph.each_key do |u|
389
+ @graph[u].each do |v, w|
390
+ if distance[v] > distance[u] + w
391
+ return false
392
+ end
393
+ end
394
+ end
395
+ return distance, predecessor
396
+ end
397
+
398
+
399
+ # Floyd-Wardshall alogrithm for solving the all-pairs shortest-paths
400
+ # problem on a directed graph G = (V, E).
401
+ def floyd_warshall
402
+ inf = 1 / 0.0
403
+
404
+ m = self.to_matrix(inf, 0)
405
+ d = m.dup
406
+ n = self.nodes
407
+ for k in 0 .. n - 1 do
408
+ for i in 0 .. n - 1 do
409
+ for j in 0 .. n - 1 do
410
+ if d[i, j] > d[i, k] + d[k, j]
411
+ d[i, j] = d[i, k] + d[k, j]
412
+ end
413
+ end
414
+ end
415
+ end
416
+ return d
417
+ end
418
+ alias floyd floyd_warshall
419
+
420
+
421
+ # Kruskal method for finding minimam spaninng trees
422
+ def kruskal
423
+ # initialize
424
+ rel = self.to_relations.sort{|a, b| a <=> b}
425
+ index = []
426
+ for i in 0 .. (rel.size - 1) do
427
+ for j in (i + 1) .. (rel.size - 1) do
428
+ if rel[i] == rel[j]
429
+ index << j
430
+ end
431
+ end
432
+ end
433
+ index.sort{|x, y| y<=>x}.each do |i|
434
+ rel[i, 1] = []
435
+ end
436
+ mst = []
437
+ seen = Hash.new()
438
+ @graph.each_key do |x|
439
+ seen[x] = nil
440
+ end
441
+ i = 1
442
+ # initialize end
443
+
444
+ rel.each do |r|
445
+ if seen[r.node[0]] == nil
446
+ seen[r.node[0]] = 0
447
+ end
448
+ if seen[r.node[1]] == nil
449
+ seen[r.node[1]] = 0
450
+ end
451
+ if seen[r.node[0]] == seen[r.node[1]] && seen[r.node[0]] == 0
452
+ mst << r
453
+ seen[r.node[0]] = i
454
+ seen[r.node[1]] = i
455
+ elsif seen[r.node[0]] != seen[r.node[1]]
456
+ mst << r
457
+ v1 = seen[r.node[0]].dup
458
+ v2 = seen[r.node[1]].dup
459
+ seen.each do |k, v|
460
+ if v == v1 || v == v2
461
+ seen[k] = i
462
+ end
463
+ end
464
+ end
465
+ i += 1
466
+ end
467
+ return Pathway.new(mst)
468
+ end
469
+
470
+
471
+ private
472
+
473
+
474
+ def initialize_single_source(root)
475
+ inf = 1 / 0.0 # inf.infinite? -> true
476
+
477
+ distance = {}
478
+ predecessor = {}
479
+
480
+ @graph.each_key do |k|
481
+ distance[k] = inf
482
+ predecessor[k] = nil
483
+ end
484
+ distance[root] = 0
485
+ return distance, predecessor
486
+ end
487
+
488
+ end
489
+
490
+
491
+
492
+ class Relation
493
+
494
+ def initialize(node1, node2, edge)
495
+ @node = [node1, node2]
496
+ @edge = edge
497
+ end
498
+ attr_accessor :node, :edge
499
+
500
+ def from
501
+ @node[0]
502
+ end
503
+
504
+ def to
505
+ @node[1]
506
+ end
507
+
508
+ def relation
509
+ @edge
510
+ end
511
+
512
+ def hash
513
+ @node.sort.push(@edge).hash
514
+ end
515
+
516
+ def ===(rel)
517
+ if self.edge == rel.edge
518
+ if self.node[0] == rel.node[0] and self.node[1] == rel.node[1]
519
+ return true
520
+ elsif self.node[0] == rel.node[1] and self.node[1] == rel.node[0]
521
+ return true
522
+ else
523
+ return false
524
+ end
525
+ else
526
+ return false
527
+ end
528
+ end
529
+ alias eql? ===
530
+
531
+ def <=>(rel)
532
+ unless self.edge.kind_of? Comparable
533
+ raise "[Error] edges are not comparable"
534
+ end
535
+ if self.edge > rel.edge
536
+ return 1
537
+ elsif self.edge < rel.edge
538
+ return -1
539
+ elsif self.edge == rel.edge
540
+ return 0
541
+ end
542
+ end
543
+
544
+ end
545
+
546
+ end
547
+
548
+
549
+
550
+ if __FILE__ == $0
551
+
552
+ puts "--- Test === method true/false"
553
+ r1 = Bio::Relation.new('a', 'b', 1)
554
+ r2 = Bio::Relation.new('b', 'a', 1)
555
+ r3 = Bio::Relation.new('b', 'a', 2)
556
+ r4 = Bio::Relation.new('a', 'b', 1)
557
+ p r1 === r2
558
+ p r1 === r3
559
+ p r1 === r4
560
+ p [ r1, r2, r3, r4 ].uniq
561
+ p r1.eql?(r2)
562
+ p r3.eql?(r2)
563
+
564
+ # Sample Graph :
565
+ # +----------------+
566
+ # | |
567
+ # v |
568
+ # +---------(q)-->(t)------->(y)<----(r)
569
+ # | | | ^ |
570
+ # v | v | |
571
+ # +--(s)<--+ | (x)<---+ (u)<-----+
572
+ # | | | | |
573
+ # v | | v |
574
+ # (v)----->(w)<---+ (z)----+
575
+
576
+ data = [
577
+ [ 'q', 's', 1, ],
578
+ [ 'q', 't', 1, ],
579
+ [ 'q', 'w', 1, ],
580
+ [ 'r', 'u', 1, ],
581
+ [ 'r', 'y', 1, ],
582
+ [ 's', 'v', 1, ],
583
+ [ 't', 'x', 1, ],
584
+ [ 't', 'y', 1, ],
585
+ [ 'u', 'y', 1, ],
586
+ [ 'v', 'w', 1, ],
587
+ [ 'w', 's', 1, ],
588
+ [ 'x', 'z', 1, ],
589
+ [ 'y', 'q', 1, ],
590
+ [ 'z', 'x', 1, ],
591
+ ]
592
+
593
+ ary = []
594
+
595
+ puts "--- List of relations"
596
+ data.each do |x|
597
+ ary << Bio::Relation.new(*x)
598
+ end
599
+ p ary
600
+
601
+ puts "--- Generate graph from list of relations"
602
+ graph = Bio::Pathway.new(ary)
603
+ p graph
604
+
605
+ puts "--- Test to_matrix method"
606
+ p graph.to_matrix
607
+
608
+ puts "--- Test dump_matrix method"
609
+ puts graph.dump_matrix(0)
610
+
611
+ puts "--- Test dump_list method"
612
+ puts graph.dump_list
613
+
614
+ puts "--- Labeling some nodes"
615
+ hash = { 'q' => "L1", 's' => "L2", 'v' => "L3", 'w' => "L4" }
616
+ graph.label = hash
617
+ p graph
618
+
619
+ puts "--- Extract subgraph by label"
620
+ p graph.subgraph
621
+
622
+ puts "--- Extract subgraph by list"
623
+ p graph.subgraph(['q', 't', 'x', 'y', 'z'])
624
+
625
+ puts "--- Test cliquishness of the node 'q'"
626
+ p graph.cliquishness('q')
627
+
628
+ puts "--- Test cliquishness of the node 'q' (undirected)"
629
+ u_graph = Bio::Pathway.new(ary, 'undirected')
630
+ p u_graph.cliquishness('q')
631
+
632
+ puts "--- Test small_world histgram"
633
+ p graph.small_world
634
+
635
+ puts "--- Test breadth_first_search method"
636
+ distance, predecessor = graph.breadth_first_search('q')
637
+ p distance
638
+ p predecessor
639
+
640
+ puts "--- Test bfs_shortest_path method"
641
+ step, path = graph.bfs_shortest_path('y', 'w')
642
+ p step
643
+ p path
644
+
645
+ puts "--- Test depth_first_search method"
646
+ timestamp, tree, back, cross, forward = graph.depth_first_search
647
+ p timestamp
648
+ print "tree edges : "; p tree
649
+ print "back edges : "; p back
650
+ print "cross edges : "; p cross
651
+ print "forward edges : "; p forward
652
+
653
+ puts "--- Test dfs_topological_sort method"
654
+ #
655
+ # Professor Bumstead topologically sorts his clothing when getting dressed.
656
+ #
657
+ # "undershorts" "socks"
658
+ # | | |
659
+ # v | v "watch"
660
+ # "pants" --+-------> "shoes"
661
+ # |
662
+ # v
663
+ # "belt" <----- "shirt" ----> "tie" ----> "jacket"
664
+ # | ^
665
+ # `---------------------------------------'
666
+ #
667
+ dag = Bio::Pathway.new([
668
+ Bio::Relation.new("undeershorts", "pants", true),
669
+ Bio::Relation.new("undeershorts", "shoes", true),
670
+ Bio::Relation.new("socks", "shoes", true),
671
+ Bio::Relation.new("watch", "watch", true),
672
+ Bio::Relation.new("pants", "belt", true),
673
+ Bio::Relation.new("pants", "shoes", true),
674
+ Bio::Relation.new("shirt", "belt", true),
675
+ Bio::Relation.new("shirt", "tie", true),
676
+ Bio::Relation.new("tie", "jacket", true),
677
+ Bio::Relation.new("belt", "jacket", true),
678
+ ])
679
+ p dag.dfs_topological_sort
680
+
681
+ puts "--- Test dijkstra method"
682
+ distance, predecessor = graph.dijkstra('q')
683
+ p distance
684
+ p predecessor
685
+
686
+ puts "--- Test dijkstra method by weighted graph"
687
+ #
688
+ # 'a' --> 'b'
689
+ # | 1 | 3
690
+ # |5 v
691
+ # `----> 'c'
692
+ #
693
+ r1 = Bio::Relation.new('a', 'b', 1)
694
+ r2 = Bio::Relation.new('a', 'c', 5)
695
+ r3 = Bio::Relation.new('b', 'c', 3)
696
+ w_graph = Bio::Pathway.new([r1, r2, r3])
697
+ p w_graph
698
+ p w_graph.dijkstra('a')
699
+
700
+ puts "--- Test bellman_ford method by negative weighted graph"
701
+ #
702
+ # ,-- 'a' --> 'b'
703
+ # | | 1 | 3
704
+ # | |5 v
705
+ # | `----> 'c'
706
+ # | ^
707
+ # |2 | -5
708
+ # `--> 'd' ----'
709
+ #
710
+ r4 = Bio::Relation.new('a', 'd', 2)
711
+ r5 = Bio::Relation.new('d', 'c', -5)
712
+ w_graph.append(r4)
713
+ w_graph.append(r5)
714
+ p w_graph.bellman_ford('a')
715
+ p graph.bellman_ford('q')
716
+
717
+ end
718
+
719
+
720
+ =begin
721
+
722
+ = Bio::Pathway
723
+
724
+ Bio::Pathway is a general graph object initially constructed by the list of
725
+ the ((<Bio::Relation>)) objects. The basic concept of the Bio::Pathway object
726
+ is to store a graph as an adjacency list (in the instance variable @graph),
727
+ and converting the list into an adjacency matrix by calling to_matrix method
728
+ on demand. However, in some cases, it is convenient to have the original list
729
+ of the ((<Bio::Relation>))s, Bio::Pathway object also stores the list (as the
730
+ instance variable @relations) redundantly.
731
+
732
+ Note: you can clear the @relations list by calling clear_relations! method to
733
+ reduce the memory usage, and the content of the @relations can be re-generated
734
+ from the @graph by to_relations method.
735
+
736
+ --- Bio::Pathway.new(list, undirected = false)
737
+
738
+ Generate Bio::Pathway object from the list of Bio::Relation objects.
739
+ If the second argument is true, undirected graph is generated.
740
+
741
+ r1 = Bio::Relation.new('a', 'b', 1)
742
+ r2 = Bio::Relation.new('a', 'c', 5)
743
+ r3 = Bio::Relation.new('b', 'c', 3)
744
+ list = [ r1, r2, r3 ]
745
+ g = Bio::Pathway.new(list, 'undirected')
746
+
747
+ --- Bio::Pathway#relations
748
+
749
+ Read-only accessor for the internal list of the Bio::Relation objects
750
+ '@relations'.
751
+
752
+ --- Bio::Pathway#graph
753
+
754
+ Read-only accessor for the adjacency list of the graph.
755
+
756
+ --- Bio::Pathway#index
757
+
758
+ Read-only accessor for the row/column index (@index) of the adjacency
759
+ matrix. Contents of the hash @index is created by calling to_matrix
760
+ method.
761
+
762
+ --- Bio::Pathway#label
763
+
764
+ Accessor for the hash of the label assigned to the each node. You can
765
+ label some of the nodes in the graph by passing a hash to the label
766
+ and select subgraphs which contain labeled nodes only by subgraph method.
767
+
768
+ hash = { 1 => 'red', 2 => 'green', 5 => 'black' }
769
+ g.label = hash
770
+ g.label
771
+ g.subgraph # => new graph consists of the node 1, 2, 5 only
772
+
773
+ --- Bio::Pathway#directed?
774
+ --- Bio::Pathway#undirected?
775
+
776
+ Returns true or false respond to the internal state of the graph.
777
+
778
+ --- Bio::Pathway#directed
779
+ --- Bio::Pathway#undirected
780
+
781
+ Changes the internal state of the graph between 'directed' and
782
+ 'undirected' and re-generate adjacency list. The undirected graph
783
+ can be converted to directed graph, however, the edge between two
784
+ nodes will be simply doubled to both ends.
785
+ Note that these method can not be used without the list of the
786
+ Bio::Relation objects (internally stored in @relations variable).
787
+ Thus if you already called clear_relations! method, call
788
+ to_relations first.
789
+
790
+ --- Bio::Pathway#clear_relations!
791
+ --- Bio::Pathway#to_relations
792
+
793
+ Clear @relations array and re-generate @relations from @graph.
794
+ Useful when you want to reduce the memory usage of the object.
795
+
796
+ --- Bio::Pathway#to_list
797
+
798
+ Generate the adjcancecy list @graph from @relations (called by
799
+ initialize and in some other cases when @relations has been changed).
800
+
801
+ --- Bio::Pathway#append(rel, add_rel = true)
802
+
803
+ Add an Bio::Relation object 'rel' to the @graph and @relations.
804
+ If the second argument is false, @relations is not modified (only
805
+ useful when genarating @graph from @relations internally).
806
+
807
+ --- Bio::Pathway#delete(rel)
808
+
809
+ Remove an edge indicated by the Bio::Relation object 'rel' from the
810
+ @graph and the @relations.
811
+
812
+ --- Bio::Pathway#nodes
813
+ --- Bio::Pathway#edges
814
+
815
+ Returns the number of the nodes or edges in the graph.
816
+
817
+ --- Bio::Pathway#to_matrix(default_value = nil, diagonal_value = nil)
818
+
819
+ Returns the adjacency matrix expression of the graph as a Matrix object.
820
+ If the first argument was assigned, the matrix will be filled with
821
+ the given value. The second argument indicates the value of the
822
+ diagonal constituents of the matrix besides the above.
823
+
824
+ --- Bio::Pathway#dump_matrix(default_value = nil, diagonal_value = nil)
825
+ --- Bio::Pathway#dump_list
826
+
827
+ These are pretty printer of the graph. The dump_matrix method
828
+ accepts the same arguments as to_matrix. Useful when you want to
829
+ check the internal state of the adjacency list or the matrix (for
830
+ the debug etc.) easily.
831
+
832
+ --- Bio::Pathway#subgraph(list = nil)
833
+
834
+ This method select some nodes and returns new Bio::Pathway object
835
+ consists of selected nodes only.
836
+ If the list of the nodes (as Array) is assigned as the argument,
837
+ use the list to select the nodes from the graph. If no argument
838
+ is assigned, internal property of the graph @label is used to select
839
+ the nodes.
840
+
841
+ hash = { 'a' => 'secret', 'b' => 'important', 'c' => 'important' }
842
+ g.label = hash
843
+ g.subgraph
844
+
845
+ list = [ 'a', 'b', 'c' ]
846
+ g.subgraph(list)
847
+
848
+ --- Bio::Pathway#common_subgraph(graph)
849
+
850
+ Not implemented yet.
851
+
852
+ --- Bio::Pathway#clique
853
+
854
+ Not implemented yet.
855
+
856
+ --- Bio::Pathway#cliquishness(node)
857
+
858
+ Calculates the value of cliquishness around the 'node'. This value
859
+ indicates completeness of the edge density among the surrounded nodes.
860
+
861
+ --- Bio::Pathway#small_world
862
+
863
+ Calculates the frequency of the nodes having the same number of edges
864
+ and returns the value as Hash.
865
+
866
+ --- Bio::Pathway#breadth_first_search(root)
867
+
868
+ Breadth first search solves steps and path to the each node and forms
869
+ a tree contains all reachable vertices from the root node. This method
870
+ returns the result in 2 hashes - 1st one shows the steps from root node
871
+ and 2nd hash shows the structure of the tree.
872
+
873
+ The weight of the edges are not considered in this method.
874
+
875
+ --- Bio::Pathway#bfs(root)
876
+
877
+ Alias for the breadth_first_search method.
878
+
879
+ --- Bio::Pathway#bfs_shortest_path(node1, node2)
880
+
881
+ Calculates the shortest path between two nodes by using
882
+ breadth_first_search method and returns steps and the path as Array.
883
+
884
+ --- Bio::Pathway#depth_first_search
885
+
886
+ Depth first search yields much information about the structure of the
887
+ graph especially on the classification of the edges. This method returns
888
+ 5 hashes - 1st one shows the timestamps of each node containing the first
889
+ discoverd time and the search finished time in an array. The 2nd, 3rd,
890
+ 4th, and 5th hashes contain 'tree edges', 'back edges', 'cross edges',
891
+ 'forward edges' respectively.
892
+
893
+ If $DEBUG is true (e.g. ruby -d), this method prints the progression
894
+ of the search.
895
+
896
+ The weight of the edges are not considered in this method.
897
+
898
+ --- Bio::Pathway#dfs
899
+
900
+ Alias for the depth_first_search method.
901
+
902
+ --- Bio::Pathway#dfs_topological_sort
903
+
904
+ Topological sort of the directed acyclic graphs ("dags") by using
905
+ depth_first_search.
906
+
907
+ --- Bio::Pathway#dijkstra(root)
908
+
909
+ Dijkstra method solves the sortest path problem in the weighted graph.
910
+
911
+ --- Bio::Pathway#bellman_ford(root)
912
+
913
+ Bellman-Ford method solves the single-source shortest-paths problem
914
+ in the graph in which the edge weights can be negative.
915
+
916
+ --- Bio::Pathway#floyd_warshall
917
+
918
+ Floyd-Wardshall alogrithm solves the all-pairs shortest-paths problem
919
+ on a directed graph G = (V, E).
920
+
921
+ --- Bio::Pathway#floyd
922
+
923
+ Alias for the floyd_warshall method.
924
+
925
+ --- Bio::Pathway#kruskal
926
+
927
+ Kruskal method calculates the minimam spaninng trees.
928
+
929
+ --- Bio::Pathway#initialize_single_source(root)
930
+
931
+ Private method used to initialize the distance by 'Infinity' and the
932
+ path to the parent node by 'nil'.
933
+
934
+
935
+ = Bio::Relation
936
+
937
+ Bio::Relation is a simple object storing two nodes and the relation of them.
938
+ The nodes and the edge (relation) can be any Ruby object. You can also
939
+ compare Bio::Relation objects if the edges have Comparable property.
940
+
941
+ --- Bio::Relation.new(node1, node2, edge)
942
+
943
+ Create new binary relation object consists of the two object 'node1'
944
+ and 'node2' with the 'edge' object as the relation of them.
945
+
946
+ --- Bio::Relation#node
947
+
948
+ Accessor for the @node.
949
+
950
+ --- Bio::Relation#edge
951
+
952
+ Accessor for the @edge.
953
+
954
+ --- Bio::Relation#from
955
+
956
+ Returns one node.
957
+
958
+ --- Bio::Relation#to
959
+
960
+ Returns another node.
961
+
962
+ --- Bio::Relation#relation
963
+
964
+ Returns the edge.
965
+
966
+ --- Bio::Relation#===(rel)
967
+
968
+ Compare with another Bio::Relation object whether havind same edges
969
+ and same nodes. The == method compares Bio::Relation object's id,
970
+ however this case equality === method compares the internal property
971
+ of the Bio::Relation object.
972
+
973
+ --- Bio::Relation#eql?(rel)
974
+ --- Bio::Relation#hash
975
+
976
+ Method eql? is an alias of the === method and is used with hash method
977
+ to make uniq arry of the Bio::Relation objects.
978
+
979
+ a1 = Bio::Relation.new('a', 'b', 1)
980
+ a2 = Bio::Relation.new('b', 'a', 1)
981
+ a3 = Bio::Relation.new('b', 'c', 1)
982
+ p [ a1, a2, a3 ].uniq
983
+
984
+ --- Bio::Relation#<=>(rel)
985
+
986
+ Used by the each method to compare with another Bio::Relation object.
987
+ This method is only usable when the edge objects have the property of
988
+ the module Comparable.
989
+
990
+ =end
991
+