chemruby 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (241) hide show
  1. data/README +120 -0
  2. data/Rakefile +195 -0
  3. data/ext/extconf.rb +4 -0
  4. data/ext/subcomp.c +416 -0
  5. data/lib/chem.rb +130 -0
  6. data/lib/chem/appl.rb +1 -0
  7. data/lib/chem/appl/chem3dole.rb +36 -0
  8. data/lib/chem/appl/tinker/nucleic.rb +40 -0
  9. data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
  10. data/lib/chem/data.rb +4 -0
  11. data/lib/chem/data/atomic_weight.rb +124 -0
  12. data/lib/chem/data/character.rb +2 -0
  13. data/lib/chem/data/electronegativity.rb +14 -0
  14. data/lib/chem/data/periodic_table.rb +6 -0
  15. data/lib/chem/data/prime_numbers.rb +1 -0
  16. data/lib/chem/data/vdw_radii.rb +1 -0
  17. data/lib/chem/db.rb +64 -0
  18. data/lib/chem/db/cansmi.rb +234 -0
  19. data/lib/chem/db/cdx.rb +1525 -0
  20. data/lib/chem/db/eps.rb +164 -0
  21. data/lib/chem/db/g98.rb +909 -0
  22. data/lib/chem/db/gspan.rb +130 -0
  23. data/lib/chem/db/iupac.rb +5 -0
  24. data/lib/chem/db/iupac/a_1.rb +46 -0
  25. data/lib/chem/db/iupac/iuparser.rb +226 -0
  26. data/lib/chem/db/iupac/iuparser.ry +97 -0
  27. data/lib/chem/db/iupac/postfix.rb +2 -0
  28. data/lib/chem/db/kcf.rb +390 -0
  29. data/lib/chem/db/kcf_glycan.rb +19 -0
  30. data/lib/chem/db/kegg.rb +516 -0
  31. data/lib/chem/db/linucs/linparser.rb +144 -0
  32. data/lib/chem/db/linucs/linucs.ry +53 -0
  33. data/lib/chem/db/mdl.rb +379 -0
  34. data/lib/chem/db/molconnz.rb +12 -0
  35. data/lib/chem/db/mopac.rb +88 -0
  36. data/lib/chem/db/msi.rb +107 -0
  37. data/lib/chem/db/pdb_dic.rb +115 -0
  38. data/lib/chem/db/pdf.rb +131 -0
  39. data/lib/chem/db/pubchem.rb +113 -0
  40. data/lib/chem/db/rmagick.rb +70 -0
  41. data/lib/chem/db/sdf.rb +37 -0
  42. data/lib/chem/db/smbl.rb +88 -0
  43. data/lib/chem/db/smiles.rb +2 -0
  44. data/lib/chem/db/smiles/smiles.ry +203 -0
  45. data/lib/chem/db/smiles/smiparser.rb +375 -0
  46. data/lib/chem/db/swf.rb +74 -0
  47. data/lib/chem/db/sybyl.rb +150 -0
  48. data/lib/chem/db/tinker.rb +77 -0
  49. data/lib/chem/db/types/type_cansmi.rb +9 -0
  50. data/lib/chem/db/types/type_cdx.rb +24 -0
  51. data/lib/chem/db/types/type_gspan.rb +31 -0
  52. data/lib/chem/db/types/type_kcf.rb +28 -0
  53. data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
  54. data/lib/chem/db/types/type_kegg.rb +92 -0
  55. data/lib/chem/db/types/type_mdl.rb +31 -0
  56. data/lib/chem/db/types/type_pdf.rb +33 -0
  57. data/lib/chem/db/types/type_png.rb +31 -0
  58. data/lib/chem/db/types/type_rxn.rb +25 -0
  59. data/lib/chem/db/types/type_sdf.rb +25 -0
  60. data/lib/chem/db/types/type_sybyl.rb +30 -0
  61. data/lib/chem/db/types/type_xyz.rb +26 -0
  62. data/lib/chem/db/vector.rb +128 -0
  63. data/lib/chem/db/xyz.rb +39 -0
  64. data/lib/chem/model.rb +119 -0
  65. data/lib/chem/model/skeleton.rb +37 -0
  66. data/lib/chem/utils.rb +11 -0
  67. data/lib/chem/utils/geometry.rb +27 -0
  68. data/lib/chem/utils/graph_db.rb +146 -0
  69. data/lib/chem/utils/math.rb +17 -0
  70. data/lib/chem/utils/prop.rb +123 -0
  71. data/lib/chem/utils/sssr.rb +101 -0
  72. data/lib/chem/utils/sub.rb +78 -0
  73. data/lib/chem/utils/transform.rb +110 -0
  74. data/lib/chem/utils/traverse.rb +37 -0
  75. data/lib/chem/utils/ullmann.rb +134 -0
  76. data/lib/graph.rb +41 -0
  77. data/lib/graph/cluster.rb +20 -0
  78. data/lib/graph/morgan.rb +38 -0
  79. data/sample/frequent_subgraph.rb +46 -0
  80. data/sample/images/ex1.rb +11 -0
  81. data/sample/images/ex2.rb +4 -0
  82. data/sample/images/ex3.rb +5 -0
  83. data/sample/images/ex4.rb +17 -0
  84. data/sample/images/ex5.rb +10 -0
  85. data/sample/images/mol/adenine.mol +26 -0
  86. data/sample/images/mol/atp.mol +69 -0
  87. data/sample/images/temp/ex5.mol +344 -0
  88. data/sample/kegg_db.rb +116 -0
  89. data/setup.rb +1551 -0
  90. data/test/all.rb +6 -0
  91. data/test/coord_test.rb +17 -0
  92. data/test/ctab_test.rb +31 -0
  93. data/test/data/A_21.tar.gz +0 -0
  94. data/test/data/A_21/aceanthrylene.cdx +0 -0
  95. data/test/data/A_21/aceanthrylene.mol +40 -0
  96. data/test/data/A_21/acenaphthylene.cdx +0 -0
  97. data/test/data/A_21/acenaphthylene.mol +31 -0
  98. data/test/data/A_21/acephenanthrylene.cdx +0 -0
  99. data/test/data/A_21/acephenanthrylene.mol +40 -0
  100. data/test/data/A_21/anthracene.cdx +0 -0
  101. data/test/data/A_21/anthracene.mol +35 -0
  102. data/test/data/A_21/as-indacene.cdx +0 -0
  103. data/test/data/A_21/as-indacene.mol +31 -0
  104. data/test/data/A_21/azulene.cdx +0 -0
  105. data/test/data/A_21/azulene.mol +26 -0
  106. data/test/data/A_21/biphenylene.cdx +0 -0
  107. data/test/data/A_21/biphenylene.mol +31 -0
  108. data/test/data/A_21/chrysene.cdx +0 -0
  109. data/test/data/A_21/chrysene.mol +44 -0
  110. data/test/data/A_21/coronen.cdx +0 -0
  111. data/test/data/A_21/coronen.mol +59 -0
  112. data/test/data/A_21/fluoranthene.cdx +0 -0
  113. data/test/data/A_21/fluoranthene.mol +40 -0
  114. data/test/data/A_21/fluorene.cdx +0 -0
  115. data/test/data/A_21/fluorene.mol +33 -0
  116. data/test/data/A_21/heptacene.cdx +0 -0
  117. data/test/data/A_21/heptacene.mol +71 -0
  118. data/test/data/A_21/heptalene.cdx +0 -0
  119. data/test/data/A_21/heptalene.mol +30 -0
  120. data/test/data/A_21/heptaphene.cdx +0 -0
  121. data/test/data/A_21/heptaphene.mol +71 -0
  122. data/test/data/A_21/hexacene.cdx +0 -0
  123. data/test/data/A_21/hexacene.mol +62 -0
  124. data/test/data/A_21/hexaphene.cdx +0 -0
  125. data/test/data/A_21/hexaphene.mol +62 -0
  126. data/test/data/A_21/indene.cdx +0 -0
  127. data/test/data/A_21/indene.mol +24 -0
  128. data/test/data/A_21/iupac.txt +41 -0
  129. data/test/data/A_21/naphthacene.cdx +0 -0
  130. data/test/data/A_21/naphthacene.mol +44 -0
  131. data/test/data/A_21/naphthalene.cdx +0 -0
  132. data/test/data/A_21/naphthalene.mol +26 -0
  133. data/test/data/A_21/ovalene.cdx +0 -0
  134. data/test/data/A_21/ovalene.mol +78 -0
  135. data/test/data/A_21/pentacene.cdx +0 -0
  136. data/test/data/A_21/pentacene.mol +53 -0
  137. data/test/data/A_21/pentalene.cdx +0 -0
  138. data/test/data/A_21/pentalene.mol +22 -0
  139. data/test/data/A_21/pentaphene.cdx +0 -0
  140. data/test/data/A_21/pentaphene.mol +53 -0
  141. data/test/data/A_21/perylene.cdx +0 -0
  142. data/test/data/A_21/perylene.mol +49 -0
  143. data/test/data/A_21/phenalene.cdx +0 -0
  144. data/test/data/A_21/phenalene.mol +33 -0
  145. data/test/data/A_21/phenanthrene.cdx +0 -0
  146. data/test/data/A_21/phenanthrene.mol +35 -0
  147. data/test/data/A_21/picene.cdx +0 -0
  148. data/test/data/A_21/picene.mol +53 -0
  149. data/test/data/A_21/pleiadene.cdx +0 -0
  150. data/test/data/A_21/pleiadene.mol +44 -0
  151. data/test/data/A_21/pyranthrene.cdx +0 -0
  152. data/test/data/A_21/pyranthrene.mol +72 -0
  153. data/test/data/A_21/pyrene.cdx +0 -0
  154. data/test/data/A_21/pyrene.mol +40 -0
  155. data/test/data/A_21/rubicene.cdx +0 -0
  156. data/test/data/A_21/rubicene.mol +63 -0
  157. data/test/data/A_21/s-indacene.cdx +0 -0
  158. data/test/data/A_21/s-indacene.mol +31 -0
  159. data/test/data/A_21/tetraphenylene.cdx +0 -0
  160. data/test/data/A_21/tetraphenylene.mol +57 -0
  161. data/test/data/A_21/trinaphthylene.cdx +0 -0
  162. data/test/data/A_21/trinaphthylene.mol +71 -0
  163. data/test/data/A_21/triphenylene.cdx +0 -0
  164. data/test/data/A_21/triphenylene.mol +44 -0
  165. data/test/data/C00147.kcf +25 -0
  166. data/test/data/G00147.kcf +13 -0
  167. data/test/data/atp.mol +69 -0
  168. data/test/data/cyclohexane.mol +17 -0
  169. data/test/data/cyclohexane.ps +485 -0
  170. data/test/data/fullerene.mol +155 -0
  171. data/test/data/glycan +33 -0
  172. data/test/data/hypericin.cdx +0 -0
  173. data/test/data/hypericin.cdxml +596 -0
  174. data/test/data/hypericin.chm +0 -0
  175. data/test/data/hypericin.ct +85 -0
  176. data/test/data/hypericin.f1d +0 -0
  177. data/test/data/hypericin.f1q +0 -0
  178. data/test/data/hypericin.gif +0 -0
  179. data/test/data/hypericin.mol +88 -0
  180. data/test/data/hypericin.mol2 +159 -0
  181. data/test/data/hypericin.msm +123 -0
  182. data/test/data/hypericin.pdf +359 -0
  183. data/test/data/hypericin.png +0 -0
  184. data/test/data/hypericin.ps +0 -0
  185. data/test/data/hypericin.skc +0 -0
  186. data/test/data/hypericin2.gif +0 -0
  187. data/test/data/hypericin2.ps +0 -0
  188. data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
  189. data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
  190. data/test/data/kegg/ligand/mol/C00147.mol +26 -0
  191. data/test/data/kegg/ligand/reaction +14 -0
  192. data/test/data/kegg/ligand/reaction.lst +1 -0
  193. data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
  194. data/test/data/reaction +14 -0
  195. data/test/data/reaction.lst +1 -0
  196. data/test/data/reaction_mapformula.lst +3 -0
  197. data/test/data/rxn/C00001.mol +6 -0
  198. data/test/data/rxn/C00011.mol +10 -0
  199. data/test/data/rxn/C00014.mol +6 -0
  200. data/test/data/rxn/C01010.mol +18 -0
  201. data/test/data/rxn/sample.rxn +50 -0
  202. data/test/data/rxn/substitution.rxn +45 -0
  203. data/test/data/test.eps +0 -0
  204. data/test/data/test.mol +28 -0
  205. data/test/data/test.sdf +143 -0
  206. data/test/data/test.skc +0 -0
  207. data/test/data/test.xyz +4 -0
  208. data/test/data/test_lf.sdf +143 -0
  209. data/test/heavy_test_pubchem.rb +16 -0
  210. data/test/multiple_test.rb +22 -0
  211. data/test/test_adj.rb +54 -0
  212. data/test/test_canonical_smiles.rb +46 -0
  213. data/test/test_cdx.rb +32 -0
  214. data/test/test_chem.rb +18 -0
  215. data/test/test_cluster.rb +19 -0
  216. data/test/test_db.rb +11 -0
  217. data/test/test_eps.rb +24 -0
  218. data/test/test_geometry.rb +11 -0
  219. data/test/test_gspan.rb +28 -0
  220. data/test/test_iupac.rb +36 -0
  221. data/test/test_kcf.rb +24 -0
  222. data/test/test_kcf_glycan.rb +10 -0
  223. data/test/test_kegg.rb +118 -0
  224. data/test/test_linucs.rb +21 -0
  225. data/test/test_mdl.rb +45 -0
  226. data/test/test_mol2.rb +62 -0
  227. data/test/test_morgan.rb +21 -0
  228. data/test/test_pdf.rb +12 -0
  229. data/test/test_prop.rb +86 -0
  230. data/test/test_rmagick.rb +15 -0
  231. data/test/test_sbdb.rb +23 -0
  232. data/test/test_sdf.rb +30 -0
  233. data/test/test_smiles.rb +84 -0
  234. data/test/test_sssr.rb +18 -0
  235. data/test/test_sub.rb +47 -0
  236. data/test/test_subcomp.rb +37 -0
  237. data/test/test_traverse.rb +29 -0
  238. data/test/test_writer.rb +13 -0
  239. data/test/test_xyz.rb +15 -0
  240. data/test/type_test.rb +25 -0
  241. metadata +290 -0
data/README ADDED
@@ -0,0 +1,120 @@
1
+ = ChemRuby - Cheminformatics Ruby
2
+
3
+ ChemRuby is a framework for developing cheminformatics applications in Ruby.
4
+ It will let you retrieve chemical information from variety of data sources
5
+ in various formats (such as MDL mol, SMILES etc.), fast substructure search
6
+ based on graph theory, draw a chemical structure in various graphics formats
7
+ (such as PDF, PNG etc.), and calculate a number of chemical properties.
8
+
9
+ == FOR MORE INFORMATION
10
+
11
+ ChemRuby's official website is at ((<URL:http://www.chemruby.org/>)).
12
+ You will find links to the related resouces including downloads,
13
+ Wiki documentations etc. in the top page.
14
+
15
+ * ((<URL:http://www.chemruby.org/>))
16
+
17
+
18
+ == WHERE TO OBTAIN
19
+
20
+ --- WWW
21
+
22
+ The releases can be obtained at ChemRuby website.
23
+
24
+ * ((<URL:http://www.chemruby.org/>))
25
+
26
+ --- RubyGems
27
+
28
+ ((<RubyGems|URL:http://rubyforge.org/projects/rubygems/>)) version of
29
+ the ChemRuby package is also available for easy installation.
30
+
31
+ * ((<URL:http://rubyforge.org/projects/chemruby/>))
32
+
33
+
34
+ == REQUIREMENTS
35
+
36
+ * Ruby 1.8.2 or later -- ((<URL:http://www.ruby-lang.org/>))
37
+
38
+
39
+ == OPTIONAL REQUIREMENTS
40
+
41
+ Some optional libraries can be utilized to extend ChemRuby's functionality.
42
+ If your needs meets the following conditions, install them from the "Ruby
43
+ Application Archive" at ((<URL:http://raa.ruby-lang.org/>)).
44
+
45
+ For outputting png and jpeg images:
46
+
47
+ * [RAA:rmagick]
48
+
49
+ For testing and developing ChemRuby:
50
+
51
+ * [RAA:rake]
52
+
53
+ == INSTALL
54
+
55
+ In the chemruby source directory (such as chemruby-x.x.x/), run install.rb
56
+ as follows:
57
+
58
+ % ruby setup.rb config
59
+ % ruby setup.rb setup
60
+ % su
61
+ # ruby setup.rb install
62
+
63
+ If your operating system supports 'sudo' command (such as Mac OS X),
64
+ try the following procedure instead of the above.
65
+
66
+ % ruby setup.rb config
67
+ % ruby setup.rb setup
68
+ % sudo ruby setup.rb install
69
+
70
+ You can run tests by
71
+
72
+ % rake test
73
+
74
+ and run
75
+
76
+ % rake
77
+
78
+ for more details.
79
+
80
+ == USAGE
81
+
82
+ You can load all ChemRuby classes just by requiring 'chem.rb'. All the
83
+ ChemRuby classes and modules are located under the module name 'Chem' to
84
+ separate the name space.
85
+
86
+ #!/usr/bin/env ruby
87
+ require 'chem'
88
+
89
+ --- RubyGems
90
+
91
+ In RubyGems, you need to load 'rubygems' library before using 'chem'.
92
+
93
+ #!/usr/bin/env ruby
94
+ require 'rubygems'
95
+ require_gem 'chem'
96
+
97
+ == Credits
98
+
99
+ * GOTO Naohisa, KATAYAMA Toshiaki and NAKAO Mitsuteru (alphabetical order)
100
+ who are developers of BioRuby, led the design of ChemRuby.
101
+
102
+ * gSpan parser and PubChem search from KADOWAKI masashi.
103
+
104
+ == LICENSE
105
+
106
+ ChemRuby can be freely distributed under the Ruby's license.
107
+ Note that, setup.rb included in the ChemRuby package comes from
108
+ <RAA:setup> developed by Minero Aoki.
109
+
110
+ License of This README file can be also distributed under the Ruby's license.
111
+
112
+ Copyright (C) 2006 TANAKA Nobuya <tanaka@chemruby.org>
113
+ KATAYAMA Toshiaki <k@bioruby.org>
114
+
115
+ == CONTACT
116
+
117
+ Current staffs of the ChemRuby project can be reached by sending e-mail
118
+ to <staff@chemruby.org>.
119
+
120
+
@@ -0,0 +1,195 @@
1
+ #
2
+ # Rakefile
3
+ #
4
+ # See http://docs.rubyrake.org/ to see how to use ``rake'' command.
5
+ #
6
+ # $Id: Rakefile 61 2005-10-12 09:17:39Z tanaka $
7
+ #
8
+
9
+ require 'rake/clean'
10
+ require 'rake/testtask'
11
+ require 'rake/gempackagetask'
12
+
13
+ task :default => [:help]
14
+
15
+ PKG_VERSION = "0.9.3"
16
+ PKG_BUILD = "RC1"
17
+
18
+ PKG_FILES = FileList[
19
+ "Rakefile", "README", #"ChangeLog", "Releases", "TODO",
20
+ "setup.rb",
21
+ # "post-install.rb",
22
+ # "bin/*",
23
+ # "doc/*.css", "doc/*.rb",
24
+ # "examples/**/*",
25
+ # "gemspecs/**/*",
26
+ "lib/**/*.rb",
27
+ "lib/**/*.ry",
28
+ "test/**/*",
29
+ "sample/**/*.rb",
30
+ "sample/**/*.mol",
31
+ "ext/**/*.h",
32
+ "ext/**/*.c",
33
+ "ext/**/*.rb",
34
+ # "pkgs/**/*",
35
+ # "redist/*.gem",
36
+ # "scripts/*.rb",
37
+ # "test/**/*"
38
+ ]
39
+
40
+ task :help do |t|
41
+ puts <<EOL
42
+
43
+ ChemRuby #{PKG_VERSION}
44
+
45
+ To install ChemRuby, you need at least
46
+
47
+ * ruby-1.8.2 (or later)
48
+ * Ruby header files (included in original Ruby)
49
+ * C language compilers (such as gcc)
50
+
51
+ If the following modules are installed, ChemRuby will use it.
52
+ You can install them later.
53
+
54
+ * RMagick ( You will find how to install them in http://www.chemruby.org)
55
+
56
+ == Compiling and Installing
57
+
58
+ % rake compile
59
+ % sudo rake install
60
+
61
+ or just
62
+
63
+ % sudo ruby setup.rb
64
+
65
+ == Compiling RDOC
66
+
67
+ % rake doc
68
+
69
+ == Test
70
+
71
+ % rake test
72
+
73
+ You will need RMagick and other libraries to pass all the tests.
74
+
75
+ EOL
76
+
77
+ end
78
+
79
+ task :doc do |t|
80
+ system "rdoc --main README ./lib README"
81
+ end
82
+
83
+
84
+ task :dev => [:test]
85
+ Rake::TestTask.new(:dev) do |t|
86
+ t.libs << File.join('ext')
87
+ t.libs << File.join('lib')
88
+ t.libs << File.join('dev/lib')
89
+ t.libs << File.join('dev/ext')
90
+ # cd 'dev/ext/chem/db/inchi/' do
91
+ # ruby %{extconf.rb}
92
+ # sh "make"
93
+ # end
94
+ t.test_files = FileList['dev/test/test*.rb']
95
+ end
96
+
97
+ task :test => [:compile]
98
+ Rake::TestTask.new(:test) do |t|
99
+ t.libs << File.join('ext')
100
+ t.libs << File.join('lib')
101
+ t.test_files = FileList['test/test*.rb']
102
+ end
103
+
104
+ task :light => [:compile]
105
+ Rake::TestTask.new(:light) do |t|
106
+ t.libs << File.join('ext')
107
+ t.libs << File.join('lib')
108
+ t.test_files = FileList['test/test_subcomp.rb']
109
+ #'test/test_kegg.rb'# 'test/test_kcf_glycan.rb' #FileList['test/test_canonical_smiles.rb']
110
+ end
111
+
112
+ task :rm do
113
+ system "rm -rf /usr/local/lib/site_ruby/1.8/chem"
114
+ system "rm /usr/local/lib/site_ruby/1.8/chem.rb"
115
+ system "rm -rf /usr/local/lib/site_ruby/1.8/i386-linux/chem"
116
+ end
117
+
118
+ desc "Prepares for installation"
119
+ task :prepare do
120
+ ruby "setup.rb config"
121
+ ruby "setup.rb setup"
122
+ end
123
+
124
+ desc "Installing library"
125
+ task :install => [:compile, :prepare] do
126
+ ruby "setup.rb install"
127
+ end
128
+
129
+ task :heavy => [:test]
130
+ Rake::TestTask.new(:heavy) do |t|
131
+ t.libs << File.join('ext')
132
+ t.libs << File.join('lib')
133
+ t.test_files = FileList['test/heavy_test*.rb']
134
+ end
135
+
136
+ task :clean do
137
+ cd "ext/" do
138
+ Dir.glob("*.o").each do |file|
139
+ rm file
140
+ end
141
+ Dir.glob("*.bundle").each do |file|
142
+ rm file
143
+ end
144
+ end
145
+ end
146
+
147
+ # BUG!? Need code for testing if racc exist !?
148
+ file 'lib/chem/db/smiles/smiparser.rb' => ['lib/chem/db/smiles/smiles.ry'] do
149
+ cd 'lib/chem/db/smiles/' do
150
+ sh "racc smiles.ry -o smiparser.rb"
151
+ end
152
+ end
153
+
154
+ file 'lib/chem/db/iupac/iuparser.rb' => ['lib/chem/db/iupac/iuparser.ry'] do
155
+ cd 'lib/chem/db/iupac/' do
156
+ sh "racc iuparser.ry -o iuparser.rb"
157
+ end
158
+ end
159
+
160
+ file 'lib/chem/db/linucs/linparser.rb' => ['lib/chem/db/linucs/linucs.ry'] do
161
+ cd 'lib/chem/db/linucs/' do
162
+ sh "racc linucs.ry -o linparser.rb"
163
+ end
164
+ end
165
+
166
+ file 'ext/Makefile' => ['ext/extconf.rb', 'ext/subcomp.c'] do
167
+ cd 'ext/' do
168
+ ruby %{extconf.rb}
169
+ end
170
+ end
171
+
172
+ file "ext/subcomp.#{Config::CONFIG["DLEXT"]}" => ['ext/subcomp.c', 'ext/Makefile'] do
173
+ cd 'ext/' do
174
+ sh "make"
175
+ end
176
+ end
177
+
178
+
179
+ desc "Compiling library"
180
+ task :compile => ['lib/chem/db/smiles/smiparser.rb', 'lib/chem/db/iupac/iuparser.rb', 'lib/chem/db/linucs/linparser.rb', "ext/subcomp.#{Config::CONFIG["DLEXT"]}"]
181
+
182
+ spec = Gem::Specification.new do |s|
183
+ s.name = 'chemruby'
184
+ s.version = PKG_VERSION
185
+ s.require_path = 'lib'
186
+ s.autorequire = 'chem'
187
+ s.files = PKG_FILES
188
+ s.extensions << 'ext/extconf.rb'
189
+ s.summary = "A framework program for cheminformatics"
190
+ end
191
+
192
+ Rake::GemPackageTask.new(spec) do |pkg|
193
+ pkg.need_tar = true
194
+ pkg.package_files += PKG_FILES
195
+ end
@@ -0,0 +1,4 @@
1
+
2
+ require 'mkmf'
3
+
4
+ create_makefile("subcomp")
@@ -0,0 +1,416 @@
1
+ /**********************************************************************
2
+
3
+ subcomp.c -
4
+
5
+ $Author: nobyt $
6
+
7
+ Copyright (C) 2004-2006 Nobuya Tanaka
8
+
9
+ **********************************************************************/
10
+
11
+ #define FULL 0xffffffff
12
+ #define ZERO 0x0
13
+
14
+ #define FAIL 0;
15
+ #define SUCCESS 1;
16
+
17
+ #include <ruby.h>
18
+
19
+ static void
20
+ show(long *m, int pa, int pb)
21
+ {
22
+ int i, j, k;
23
+ static int count = 0;
24
+ int n_words;
25
+
26
+ n_words = (pb - 1) / (sizeof(int) * 8) + 1;
27
+
28
+ //printf("count : %3d\n", count++);
29
+
30
+ printf("\n ");
31
+ for(i = 0 ; i < pb ; i++){
32
+ printf("%d", i % 10);
33
+ }
34
+ printf("\n");
35
+ for(i = 0 ; i < pa * n_words ; i += n_words){
36
+ printf("%d ", (i / n_words) % 10);
37
+ for(k = 0 ; k < n_words ; k++){
38
+ for(j = k * 32 ; j < ((k + 1) * 32 < pb ? (k + 1) * 32 : pb) ; j++){
39
+ if(m[i + k ] & (1 << (j - k * 32)))
40
+ printf("@");
41
+ else
42
+ printf(".");
43
+ }
44
+ //printf(" ");
45
+ }
46
+ printf("\n");
47
+ }
48
+ printf("\n");
49
+ }
50
+
51
+ /*
52
+ * call-seq:
53
+ * SubGraphDB.show -> print out adjacency matrix
54
+ *
55
+ * This function is mainly for debug.
56
+ */
57
+
58
+ static VALUE
59
+ subcomp_show(VALUE self, VALUE str, VALUE pa, VALUE pb)
60
+ {
61
+ printf("subcomp_show called %3d %3d\n", FIX2INT(pa), FIX2INT(pb));
62
+ show((long * )RSTRING(str)->ptr, FIX2INT(pa), FIX2INT(pb));
63
+ return Qnil;
64
+ }
65
+
66
+
67
+ /*
68
+ * returns number of trailing zero of m-bit
69
+ */
70
+ static int ntz_m(long *y, int pb){
71
+ int i = 0;
72
+ int n;
73
+ long x;
74
+
75
+ n = 1;
76
+
77
+ while(i < pb && y[i] == 0){
78
+ n += 32;
79
+ i++;
80
+ }
81
+
82
+ x = y[i];
83
+
84
+ if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
85
+ if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
86
+ if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
87
+ if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
88
+ return n - (x & 1);
89
+ }
90
+
91
+ static int ntz(long x){
92
+ int n;
93
+
94
+ if (x == 0) return (32);
95
+ n = 1;
96
+ if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
97
+ if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
98
+ if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
99
+ if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
100
+ return n - (x & 1);
101
+ }
102
+
103
+ static int ntz_n_words(long * x, int n_words){
104
+ int i;
105
+ int words = 0;
106
+ for(i = 0 ; x[i] == 0 && i < n_words ; i++){
107
+ words += 32;
108
+ }
109
+ return ntz(x[i]) + words;
110
+ }
111
+
112
+ long bit_mask[32] = {
113
+ 0x1, 0x2, 0x4, 0x8,
114
+ 0x10, 0x20, 0x40, 0x80,
115
+ 0x100, 0x200, 0x400, 0x800,
116
+ 0x1000, 0x2000, 0x4000, 0x8000,
117
+ 0x10000, 0x20000, 0x40000, 0x80000,
118
+ 0x100000, 0x200000, 0x400000, 0x800000,
119
+ 0x1000000, 0x2000000, 0x4000000, 0x8000000,
120
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
121
+ };
122
+
123
+ long reverse_bit[32] = {
124
+ 0xfffffffe,
125
+ 0xfffffffd,
126
+ 0xfffffffb,
127
+ 0xfffffff7,
128
+ 0xffffffef,
129
+ 0xffffffdf,
130
+ 0xffffffbf,
131
+ 0xffffff7f,
132
+ 0xfffffeff,
133
+ 0xfffffdff,
134
+ 0xfffffbff,
135
+ 0xfffff7ff,
136
+ 0xffffefff,
137
+ 0xffffdfff,
138
+ 0xffffbfff,
139
+ 0xffff7fff,
140
+ 0xfffeffff,
141
+ 0xfffdffff,
142
+ 0xfffbffff,
143
+ 0xfff7ffff,
144
+ 0xffefffff,
145
+ 0xffdfffff,
146
+ 0xffbfffff,
147
+ 0xff7fffff,
148
+ 0xfeffffff,
149
+ 0xfdffffff,
150
+ 0xfbffffff,
151
+ 0xf7ffffff,
152
+ 0xefffffff,
153
+ 0xdfffffff,
154
+ 0xbfffffff,
155
+ 0x7fffffff,
156
+ };
157
+
158
+ //int matchN(ADJACENCY *adj_ptr, long *b, long *m, int pa, int pb)
159
+ static int matchN(const int * num_adj, long ** point, long *b, long *m, int pa, int pb)
160
+ {
161
+ long * mm;// current matrix
162
+ long f[1000];//which columns has been used at an intermediate state of computing
163
+ long h[100];// pb < 100 * 32
164
+
165
+ int d;// depth for matrix
166
+ int k;// width for matrix
167
+ int dd;// depth of matrix in refinement step
168
+ int kk;// width of matrix in refinement step
169
+
170
+ int i, j;//temp
171
+ long l;// temp
172
+
173
+ short vflag;//valid check flag
174
+ int n_words;// number of words needed for storing 'pb' bits.
175
+ long refine_mm;// pointer for mm(match matrix) used in refinment step.
176
+
177
+ d = k = 0;
178
+ // start back track
179
+ for(i = 0 ; i < (pb / 32 + 1) ; i++)
180
+ h[i] = 0;
181
+ for(i = 0 ; i < 10 ; i++)
182
+ f[i] = 0;
183
+
184
+ n_words = (pb - 1) / (sizeof(int) * 8) + 1;
185
+
186
+ /* show(b, pb, pb); */
187
+ /* show(m, pa, pb); */
188
+
189
+ if( d == 0 && k == 0){
190
+ k = ntz_n_words(m, n_words);
191
+ h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
192
+ }
193
+ while(k <= pb && d <= pa){
194
+ /* printf("d : %3d k : %3d n_words : %3d\n", d, k, n_words); */
195
+ if(d < 0){
196
+ printf("d < 0 return \n");
197
+ return FAIL;
198
+ }
199
+
200
+ // Idea for optimization :
201
+ // instead of using following equation, just (mm = mm + len) and (mm = mm - len).
202
+ mm = m + pa * (d + 1) * n_words;
203
+ /* printf("pa : %d d : %d k : %d n_words : %d hint : %d\n", pa, d, k, n_words, pa * (d + 1) * n_words); */
204
+
205
+ //printf("ntz : %d\n", ntz(mm));
206
+ //k = ntz(mm + d);
207
+ // set (k, d) bit '1', clear k-column and d-row '0'
208
+ /* printf("k : %d d: %d\n", k, d); */
209
+ for(j = 0 ; j < n_words ; j++){
210
+ if(j == (k / 32)){
211
+ for(i = 0 ; i < pa ; i++){
212
+ mm[i * n_words + j] = mm[(i - pa) * n_words + j] & reverse_bit[k - (k / 32) * 32];
213
+ }
214
+ mm[d * n_words + j] = bit_mask[k - (k / 32) * 32];
215
+ }else{
216
+ for(i = 0 ; i < pa ; i++){
217
+ mm[i * n_words + j] = mm[(i - pa) * n_words + j];
218
+ }
219
+ mm[d * n_words + j] = ZERO;
220
+ }
221
+ }
222
+ // BEGIN
223
+ /* show(mm, pa, pb); */
224
+ // END
225
+
226
+ // Refinement step
227
+ // Hot Spot!!
228
+ dd = kk = 0;
229
+ /* printf("before refinement step \n"); */
230
+ /* show(mm, pa, pb); */
231
+
232
+ while(dd != pa){
233
+ while(kk != pb){
234
+ //Idea for optimization :
235
+ //refine_mm should not updated 1 / 32 times.mm[dd + ((kk - 1) / 32)]
236
+
237
+ //Idea for optimization :
238
+ // when mm is sparse there may be better algorithm
239
+ // for searching '1' bit.
240
+ if(mm[dd * n_words + ((kk - 1) / 32)] & bit_mask[kk - ((kk - 1) / 32) * 32]){
241
+ // Following loop can be flattened
242
+ for(i = 0 ; i < num_adj[dd] ; i++){
243
+ l = 0;
244
+ for(j = 0 ; j < n_words ; j++){
245
+ l |= (b[kk * n_words + j] & mm[point[dd][i] * n_words + j]);
246
+ }
247
+ if(l == 0){
248
+ mm[dd * n_words + (kk / 32)] &= reverse_bit[kk - (kk / 32) * 32];//remove bit
249
+ /* break;//quit for loop */
250
+ }
251
+ }
252
+
253
+ }
254
+ kk++;
255
+ }
256
+ // Idea for optimization
257
+ // every 32 bit is tested here.
258
+ kk = 0;
259
+ dd++;
260
+ }
261
+ /* show(mm, pa, pb); */
262
+
263
+ //Checking whether match matrices are valid.
264
+ // Subgraph isomorphism can be checked here before reaching d == pa.
265
+ vflag = SUCCESS;
266
+ for(i = 0 ; i < pa ; i++){
267
+ l = 0;
268
+ for(j = 0 ; j < n_words ; j++){
269
+ l |= mm[i * n_words + j];
270
+ }
271
+ if(l == 0){
272
+ vflag = FAIL;
273
+ break;
274
+ }
275
+ }
276
+
277
+ if(vflag){// Success
278
+ f[d] = k;
279
+ k = 0;
280
+ while(h[k / 32] & bit_mask[k - (k / 32) * 32])
281
+ k++;
282
+ d++;
283
+ if(d == pa){
284
+ /* show(mm, pa, pb); */
285
+ //printf("FOUND! d : %d\n", d);
286
+ return SUCCESS;
287
+ }
288
+ else{
289
+ h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
290
+ }
291
+ }else{//Failed
292
+ h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
293
+ k++;
294
+ //printf("d : %d k : %d\n", d, k);
295
+ while((h[k / 32] & bit_mask[k - (k / 32) * 32] ||
296
+ (m[d * n_words + (k / 32)] & bit_mask[k - (k / 32) * 32] ) == 0) &&
297
+ k < pb)
298
+ k++;
299
+ /* printf("d : %d k : %d\n", d, k); */
300
+ while(k > pb){
301
+ if(d == 0){
302
+ return FAIL;
303
+ }
304
+ d--;
305
+ k = f[d];
306
+ h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
307
+ k++;
308
+ while(h[k / 32] & bit_mask[k - (k / 32) * 32])
309
+ k++;
310
+ }
311
+ h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
312
+ }
313
+ }
314
+ //printf("d : %d k : %d FAIL!\n", d, k);
315
+ return FAIL;
316
+ }
317
+
318
+ static void set_adjacency(int * num_adj, long ** point, long * adj, VALUE ret){
319
+ int i, j, n_words;
320
+ int off_set = 0;
321
+
322
+ n_words = (RARRAY(ret)->len - 1) / (sizeof(int) * 8) + 1;
323
+
324
+ for(i = 0 ; i < RARRAY(ret)->len ; i++){
325
+ num_adj[i] = FIX2INT(rb_funcall(RARRAY(ret)->ptr[i], rb_intern("length"), 0));
326
+ point[i] = adj + off_set;
327
+ for(j = 0 ; j < RARRAY(RARRAY(ret)->ptr[i])->len ; j++){
328
+ adj[off_set++] = FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]);
329
+ //printf(" %d ", FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]));
330
+ }
331
+ //printf("\n");
332
+ }
333
+ }
334
+
335
+ static VALUE subcomp_match_by_ullmann(VALUE self, VALUE a_matrix, VALUE pa, VALUE other_adj, VALUE pb, VALUE match){
336
+ // variables for adjacency list of graph A
337
+ int num_adj[1000];
338
+ long * point[1000];
339
+ long adj[3000];//adjacency list
340
+
341
+ // match matrix; = pa * (n_words * pa)
342
+ long * mm;//[800000];
343
+ long * m;
344
+
345
+ //temporary variables
346
+ int i;
347
+ int result;
348
+ VALUE mapping;
349
+
350
+ int n_pb, n_pa;
351
+ int n_words;
352
+ int sizeof_mm;
353
+
354
+ n_pb = NUM2INT(pb);
355
+ n_pa = NUM2INT(pa);
356
+
357
+ if(n_pb > n_pa){
358
+ return Qfalse;
359
+ }
360
+
361
+ sizeof_mm = n_pa * (n_pb + 1) * n_words;
362
+
363
+ n_words = (n_pa - 1) / (sizeof(int) * 8) + 1;
364
+
365
+ mm = (long * )malloc(sizeof(long) * 800000);
366
+ if(RSTRING(match)->len > 800000 * sizeof(long))
367
+ rb_raise(rb_eArgError, "Length of match matrix too short! %d", sizeof(mm));
368
+
369
+ memcpy(mm, (long *)RSTRING(match)->ptr, RSTRING(match)->len); // BUG!!
370
+
371
+ Check_Type(a_matrix, T_STRING);
372
+
373
+ set_adjacency(num_adj, point, adj, other_adj);
374
+
375
+ //show(mm, n_pa, n_pb);
376
+ //show((long *)RSTRING(a_matrix)->ptr, n_pa, n_pa);
377
+
378
+ result = matchN(num_adj, point, (long *)RSTRING(a_matrix)->ptr, mm, n_pb, n_pa);
379
+
380
+ if(result == 1){//?
381
+ mapping = rb_ary_new();
382
+ //printf("n_words : %d n_pa : %d n_pb : %d n_words * n_pa * n_pa : %d", n_words, n_pa, n_pb, n_words * n_pa * n_pa);
383
+ //show(mm + n_words * n_pb * n_pb, n_pb, n_pa);
384
+
385
+ for(i = 0 ; i < n_pb ; i++){
386
+ rb_ary_push(mapping, INT2FIX(ntz_m(mm + n_words * n_pb * n_pb + i * n_words, n_pa)));
387
+ }
388
+ return mapping;
389
+ }
390
+ return Qfalse;
391
+ }
392
+
393
+ // DataBase for substructure search
394
+
395
+ struct dbmdata {
396
+ int di_size;
397
+ };
398
+
399
+ static VALUE sdb_s_search(VALUE dbname){
400
+ rb_p(dbname);
401
+ }
402
+
403
+ Init_subcomp(){
404
+ VALUE subcomp_cGraph;
405
+ VALUE subcomp_cSubGraphDB;
406
+
407
+ subcomp_cGraph = rb_define_module("Graph");
408
+ rb_define_method(subcomp_cGraph, "subcomp_match_by_ullmann", subcomp_match_by_ullmann, 5);
409
+
410
+ subcomp_cSubGraphDB = rb_define_class_under(subcomp_cGraph, "SubGraphDB", rb_cObject);
411
+
412
+ rb_define_method(subcomp_cSubGraphDB, "open_for_search", sdb_s_search, 0);
413
+
414
+ rb_define_singleton_method(subcomp_cSubGraphDB, "show", subcomp_show, 3);
415
+ rb_define_singleton_method(subcomp_cSubGraphDB, "match", subcomp_match_by_ullmann, 5);
416
+ }