chemruby 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. data/README +120 -0
  2. data/Rakefile +195 -0
  3. data/ext/extconf.rb +4 -0
  4. data/ext/subcomp.c +416 -0
  5. data/lib/chem.rb +130 -0
  6. data/lib/chem/appl.rb +1 -0
  7. data/lib/chem/appl/chem3dole.rb +36 -0
  8. data/lib/chem/appl/tinker/nucleic.rb +40 -0
  9. data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
  10. data/lib/chem/data.rb +4 -0
  11. data/lib/chem/data/atomic_weight.rb +124 -0
  12. data/lib/chem/data/character.rb +2 -0
  13. data/lib/chem/data/electronegativity.rb +14 -0
  14. data/lib/chem/data/periodic_table.rb +6 -0
  15. data/lib/chem/data/prime_numbers.rb +1 -0
  16. data/lib/chem/data/vdw_radii.rb +1 -0
  17. data/lib/chem/db.rb +64 -0
  18. data/lib/chem/db/cansmi.rb +234 -0
  19. data/lib/chem/db/cdx.rb +1525 -0
  20. data/lib/chem/db/eps.rb +164 -0
  21. data/lib/chem/db/g98.rb +909 -0
  22. data/lib/chem/db/gspan.rb +130 -0
  23. data/lib/chem/db/iupac.rb +5 -0
  24. data/lib/chem/db/iupac/a_1.rb +46 -0
  25. data/lib/chem/db/iupac/iuparser.rb +226 -0
  26. data/lib/chem/db/iupac/iuparser.ry +97 -0
  27. data/lib/chem/db/iupac/postfix.rb +2 -0
  28. data/lib/chem/db/kcf.rb +390 -0
  29. data/lib/chem/db/kcf_glycan.rb +19 -0
  30. data/lib/chem/db/kegg.rb +516 -0
  31. data/lib/chem/db/linucs/linparser.rb +144 -0
  32. data/lib/chem/db/linucs/linucs.ry +53 -0
  33. data/lib/chem/db/mdl.rb +379 -0
  34. data/lib/chem/db/molconnz.rb +12 -0
  35. data/lib/chem/db/mopac.rb +88 -0
  36. data/lib/chem/db/msi.rb +107 -0
  37. data/lib/chem/db/pdb_dic.rb +115 -0
  38. data/lib/chem/db/pdf.rb +131 -0
  39. data/lib/chem/db/pubchem.rb +113 -0
  40. data/lib/chem/db/rmagick.rb +70 -0
  41. data/lib/chem/db/sdf.rb +37 -0
  42. data/lib/chem/db/smbl.rb +88 -0
  43. data/lib/chem/db/smiles.rb +2 -0
  44. data/lib/chem/db/smiles/smiles.ry +203 -0
  45. data/lib/chem/db/smiles/smiparser.rb +375 -0
  46. data/lib/chem/db/swf.rb +74 -0
  47. data/lib/chem/db/sybyl.rb +150 -0
  48. data/lib/chem/db/tinker.rb +77 -0
  49. data/lib/chem/db/types/type_cansmi.rb +9 -0
  50. data/lib/chem/db/types/type_cdx.rb +24 -0
  51. data/lib/chem/db/types/type_gspan.rb +31 -0
  52. data/lib/chem/db/types/type_kcf.rb +28 -0
  53. data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
  54. data/lib/chem/db/types/type_kegg.rb +92 -0
  55. data/lib/chem/db/types/type_mdl.rb +31 -0
  56. data/lib/chem/db/types/type_pdf.rb +33 -0
  57. data/lib/chem/db/types/type_png.rb +31 -0
  58. data/lib/chem/db/types/type_rxn.rb +25 -0
  59. data/lib/chem/db/types/type_sdf.rb +25 -0
  60. data/lib/chem/db/types/type_sybyl.rb +30 -0
  61. data/lib/chem/db/types/type_xyz.rb +26 -0
  62. data/lib/chem/db/vector.rb +128 -0
  63. data/lib/chem/db/xyz.rb +39 -0
  64. data/lib/chem/model.rb +119 -0
  65. data/lib/chem/model/skeleton.rb +37 -0
  66. data/lib/chem/utils.rb +11 -0
  67. data/lib/chem/utils/geometry.rb +27 -0
  68. data/lib/chem/utils/graph_db.rb +146 -0
  69. data/lib/chem/utils/math.rb +17 -0
  70. data/lib/chem/utils/prop.rb +123 -0
  71. data/lib/chem/utils/sssr.rb +101 -0
  72. data/lib/chem/utils/sub.rb +78 -0
  73. data/lib/chem/utils/transform.rb +110 -0
  74. data/lib/chem/utils/traverse.rb +37 -0
  75. data/lib/chem/utils/ullmann.rb +134 -0
  76. data/lib/graph.rb +41 -0
  77. data/lib/graph/cluster.rb +20 -0
  78. data/lib/graph/morgan.rb +38 -0
  79. data/sample/frequent_subgraph.rb +46 -0
  80. data/sample/images/ex1.rb +11 -0
  81. data/sample/images/ex2.rb +4 -0
  82. data/sample/images/ex3.rb +5 -0
  83. data/sample/images/ex4.rb +17 -0
  84. data/sample/images/ex5.rb +10 -0
  85. data/sample/images/mol/adenine.mol +26 -0
  86. data/sample/images/mol/atp.mol +69 -0
  87. data/sample/images/temp/ex5.mol +344 -0
  88. data/sample/kegg_db.rb +116 -0
  89. data/setup.rb +1551 -0
  90. data/test/all.rb +6 -0
  91. data/test/coord_test.rb +17 -0
  92. data/test/ctab_test.rb +31 -0
  93. data/test/data/A_21.tar.gz +0 -0
  94. data/test/data/A_21/aceanthrylene.cdx +0 -0
  95. data/test/data/A_21/aceanthrylene.mol +40 -0
  96. data/test/data/A_21/acenaphthylene.cdx +0 -0
  97. data/test/data/A_21/acenaphthylene.mol +31 -0
  98. data/test/data/A_21/acephenanthrylene.cdx +0 -0
  99. data/test/data/A_21/acephenanthrylene.mol +40 -0
  100. data/test/data/A_21/anthracene.cdx +0 -0
  101. data/test/data/A_21/anthracene.mol +35 -0
  102. data/test/data/A_21/as-indacene.cdx +0 -0
  103. data/test/data/A_21/as-indacene.mol +31 -0
  104. data/test/data/A_21/azulene.cdx +0 -0
  105. data/test/data/A_21/azulene.mol +26 -0
  106. data/test/data/A_21/biphenylene.cdx +0 -0
  107. data/test/data/A_21/biphenylene.mol +31 -0
  108. data/test/data/A_21/chrysene.cdx +0 -0
  109. data/test/data/A_21/chrysene.mol +44 -0
  110. data/test/data/A_21/coronen.cdx +0 -0
  111. data/test/data/A_21/coronen.mol +59 -0
  112. data/test/data/A_21/fluoranthene.cdx +0 -0
  113. data/test/data/A_21/fluoranthene.mol +40 -0
  114. data/test/data/A_21/fluorene.cdx +0 -0
  115. data/test/data/A_21/fluorene.mol +33 -0
  116. data/test/data/A_21/heptacene.cdx +0 -0
  117. data/test/data/A_21/heptacene.mol +71 -0
  118. data/test/data/A_21/heptalene.cdx +0 -0
  119. data/test/data/A_21/heptalene.mol +30 -0
  120. data/test/data/A_21/heptaphene.cdx +0 -0
  121. data/test/data/A_21/heptaphene.mol +71 -0
  122. data/test/data/A_21/hexacene.cdx +0 -0
  123. data/test/data/A_21/hexacene.mol +62 -0
  124. data/test/data/A_21/hexaphene.cdx +0 -0
  125. data/test/data/A_21/hexaphene.mol +62 -0
  126. data/test/data/A_21/indene.cdx +0 -0
  127. data/test/data/A_21/indene.mol +24 -0
  128. data/test/data/A_21/iupac.txt +41 -0
  129. data/test/data/A_21/naphthacene.cdx +0 -0
  130. data/test/data/A_21/naphthacene.mol +44 -0
  131. data/test/data/A_21/naphthalene.cdx +0 -0
  132. data/test/data/A_21/naphthalene.mol +26 -0
  133. data/test/data/A_21/ovalene.cdx +0 -0
  134. data/test/data/A_21/ovalene.mol +78 -0
  135. data/test/data/A_21/pentacene.cdx +0 -0
  136. data/test/data/A_21/pentacene.mol +53 -0
  137. data/test/data/A_21/pentalene.cdx +0 -0
  138. data/test/data/A_21/pentalene.mol +22 -0
  139. data/test/data/A_21/pentaphene.cdx +0 -0
  140. data/test/data/A_21/pentaphene.mol +53 -0
  141. data/test/data/A_21/perylene.cdx +0 -0
  142. data/test/data/A_21/perylene.mol +49 -0
  143. data/test/data/A_21/phenalene.cdx +0 -0
  144. data/test/data/A_21/phenalene.mol +33 -0
  145. data/test/data/A_21/phenanthrene.cdx +0 -0
  146. data/test/data/A_21/phenanthrene.mol +35 -0
  147. data/test/data/A_21/picene.cdx +0 -0
  148. data/test/data/A_21/picene.mol +53 -0
  149. data/test/data/A_21/pleiadene.cdx +0 -0
  150. data/test/data/A_21/pleiadene.mol +44 -0
  151. data/test/data/A_21/pyranthrene.cdx +0 -0
  152. data/test/data/A_21/pyranthrene.mol +72 -0
  153. data/test/data/A_21/pyrene.cdx +0 -0
  154. data/test/data/A_21/pyrene.mol +40 -0
  155. data/test/data/A_21/rubicene.cdx +0 -0
  156. data/test/data/A_21/rubicene.mol +63 -0
  157. data/test/data/A_21/s-indacene.cdx +0 -0
  158. data/test/data/A_21/s-indacene.mol +31 -0
  159. data/test/data/A_21/tetraphenylene.cdx +0 -0
  160. data/test/data/A_21/tetraphenylene.mol +57 -0
  161. data/test/data/A_21/trinaphthylene.cdx +0 -0
  162. data/test/data/A_21/trinaphthylene.mol +71 -0
  163. data/test/data/A_21/triphenylene.cdx +0 -0
  164. data/test/data/A_21/triphenylene.mol +44 -0
  165. data/test/data/C00147.kcf +25 -0
  166. data/test/data/G00147.kcf +13 -0
  167. data/test/data/atp.mol +69 -0
  168. data/test/data/cyclohexane.mol +17 -0
  169. data/test/data/cyclohexane.ps +485 -0
  170. data/test/data/fullerene.mol +155 -0
  171. data/test/data/glycan +33 -0
  172. data/test/data/hypericin.cdx +0 -0
  173. data/test/data/hypericin.cdxml +596 -0
  174. data/test/data/hypericin.chm +0 -0
  175. data/test/data/hypericin.ct +85 -0
  176. data/test/data/hypericin.f1d +0 -0
  177. data/test/data/hypericin.f1q +0 -0
  178. data/test/data/hypericin.gif +0 -0
  179. data/test/data/hypericin.mol +88 -0
  180. data/test/data/hypericin.mol2 +159 -0
  181. data/test/data/hypericin.msm +123 -0
  182. data/test/data/hypericin.pdf +359 -0
  183. data/test/data/hypericin.png +0 -0
  184. data/test/data/hypericin.ps +0 -0
  185. data/test/data/hypericin.skc +0 -0
  186. data/test/data/hypericin2.gif +0 -0
  187. data/test/data/hypericin2.ps +0 -0
  188. data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
  189. data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
  190. data/test/data/kegg/ligand/mol/C00147.mol +26 -0
  191. data/test/data/kegg/ligand/reaction +14 -0
  192. data/test/data/kegg/ligand/reaction.lst +1 -0
  193. data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
  194. data/test/data/reaction +14 -0
  195. data/test/data/reaction.lst +1 -0
  196. data/test/data/reaction_mapformula.lst +3 -0
  197. data/test/data/rxn/C00001.mol +6 -0
  198. data/test/data/rxn/C00011.mol +10 -0
  199. data/test/data/rxn/C00014.mol +6 -0
  200. data/test/data/rxn/C01010.mol +18 -0
  201. data/test/data/rxn/sample.rxn +50 -0
  202. data/test/data/rxn/substitution.rxn +45 -0
  203. data/test/data/test.eps +0 -0
  204. data/test/data/test.mol +28 -0
  205. data/test/data/test.sdf +143 -0
  206. data/test/data/test.skc +0 -0
  207. data/test/data/test.xyz +4 -0
  208. data/test/data/test_lf.sdf +143 -0
  209. data/test/heavy_test_pubchem.rb +16 -0
  210. data/test/multiple_test.rb +22 -0
  211. data/test/test_adj.rb +54 -0
  212. data/test/test_canonical_smiles.rb +46 -0
  213. data/test/test_cdx.rb +32 -0
  214. data/test/test_chem.rb +18 -0
  215. data/test/test_cluster.rb +19 -0
  216. data/test/test_db.rb +11 -0
  217. data/test/test_eps.rb +24 -0
  218. data/test/test_geometry.rb +11 -0
  219. data/test/test_gspan.rb +28 -0
  220. data/test/test_iupac.rb +36 -0
  221. data/test/test_kcf.rb +24 -0
  222. data/test/test_kcf_glycan.rb +10 -0
  223. data/test/test_kegg.rb +118 -0
  224. data/test/test_linucs.rb +21 -0
  225. data/test/test_mdl.rb +45 -0
  226. data/test/test_mol2.rb +62 -0
  227. data/test/test_morgan.rb +21 -0
  228. data/test/test_pdf.rb +12 -0
  229. data/test/test_prop.rb +86 -0
  230. data/test/test_rmagick.rb +15 -0
  231. data/test/test_sbdb.rb +23 -0
  232. data/test/test_sdf.rb +30 -0
  233. data/test/test_smiles.rb +84 -0
  234. data/test/test_sssr.rb +18 -0
  235. data/test/test_sub.rb +47 -0
  236. data/test/test_subcomp.rb +37 -0
  237. data/test/test_traverse.rb +29 -0
  238. data/test/test_writer.rb +13 -0
  239. data/test/test_xyz.rb +15 -0
  240. data/test/type_test.rb +25 -0
  241. metadata +290 -0
data/README ADDED
@@ -0,0 +1,120 @@
1
+ = ChemRuby - Cheminformatics Ruby
2
+
3
+ ChemRuby is a framework for developing cheminformatics applications in Ruby.
4
+ It will let you retrieve chemical information from variety of data sources
5
+ in various formats (such as MDL mol, SMILES etc.), fast substructure search
6
+ based on graph theory, draw a chemical structure in various graphics formats
7
+ (such as PDF, PNG etc.), and calculate a number of chemical properties.
8
+
9
+ == FOR MORE INFORMATION
10
+
11
+ ChemRuby's official website is at ((<URL:http://www.chemruby.org/>)).
12
+ You will find links to the related resouces including downloads,
13
+ Wiki documentations etc. in the top page.
14
+
15
+ * ((<URL:http://www.chemruby.org/>))
16
+
17
+
18
+ == WHERE TO OBTAIN
19
+
20
+ --- WWW
21
+
22
+ The releases can be obtained at ChemRuby website.
23
+
24
+ * ((<URL:http://www.chemruby.org/>))
25
+
26
+ --- RubyGems
27
+
28
+ ((<RubyGems|URL:http://rubyforge.org/projects/rubygems/>)) version of
29
+ the ChemRuby package is also available for easy installation.
30
+
31
+ * ((<URL:http://rubyforge.org/projects/chemruby/>))
32
+
33
+
34
+ == REQUIREMENTS
35
+
36
+ * Ruby 1.8.2 or later -- ((<URL:http://www.ruby-lang.org/>))
37
+
38
+
39
+ == OPTIONAL REQUIREMENTS
40
+
41
+ Some optional libraries can be utilized to extend ChemRuby's functionality.
42
+ If your needs meets the following conditions, install them from the "Ruby
43
+ Application Archive" at ((<URL:http://raa.ruby-lang.org/>)).
44
+
45
+ For outputting png and jpeg images:
46
+
47
+ * [RAA:rmagick]
48
+
49
+ For testing and developing ChemRuby:
50
+
51
+ * [RAA:rake]
52
+
53
+ == INSTALL
54
+
55
+ In the chemruby source directory (such as chemruby-x.x.x/), run install.rb
56
+ as follows:
57
+
58
+ % ruby setup.rb config
59
+ % ruby setup.rb setup
60
+ % su
61
+ # ruby setup.rb install
62
+
63
+ If your operating system supports 'sudo' command (such as Mac OS X),
64
+ try the following procedure instead of the above.
65
+
66
+ % ruby setup.rb config
67
+ % ruby setup.rb setup
68
+ % sudo ruby setup.rb install
69
+
70
+ You can run tests by
71
+
72
+ % rake test
73
+
74
+ and run
75
+
76
+ % rake
77
+
78
+ for more details.
79
+
80
+ == USAGE
81
+
82
+ You can load all ChemRuby classes just by requiring 'chem.rb'. All the
83
+ ChemRuby classes and modules are located under the module name 'Chem' to
84
+ separate the name space.
85
+
86
+ #!/usr/bin/env ruby
87
+ require 'chem'
88
+
89
+ --- RubyGems
90
+
91
+ In RubyGems, you need to load 'rubygems' library before using 'chem'.
92
+
93
+ #!/usr/bin/env ruby
94
+ require 'rubygems'
95
+ require_gem 'chem'
96
+
97
+ == Credits
98
+
99
+ * GOTO Naohisa, KATAYAMA Toshiaki and NAKAO Mitsuteru (alphabetical order)
100
+ who are developers of BioRuby, led the design of ChemRuby.
101
+
102
+ * gSpan parser and PubChem search from KADOWAKI masashi.
103
+
104
+ == LICENSE
105
+
106
+ ChemRuby can be freely distributed under the Ruby's license.
107
+ Note that, setup.rb included in the ChemRuby package comes from
108
+ <RAA:setup> developed by Minero Aoki.
109
+
110
+ License of This README file can be also distributed under the Ruby's license.
111
+
112
+ Copyright (C) 2006 TANAKA Nobuya <tanaka@chemruby.org>
113
+ KATAYAMA Toshiaki <k@bioruby.org>
114
+
115
+ == CONTACT
116
+
117
+ Current staffs of the ChemRuby project can be reached by sending e-mail
118
+ to <staff@chemruby.org>.
119
+
120
+
@@ -0,0 +1,195 @@
1
+ #
2
+ # Rakefile
3
+ #
4
+ # See http://docs.rubyrake.org/ to see how to use ``rake'' command.
5
+ #
6
+ # $Id: Rakefile 61 2005-10-12 09:17:39Z tanaka $
7
+ #
8
+
9
+ require 'rake/clean'
10
+ require 'rake/testtask'
11
+ require 'rake/gempackagetask'
12
+
13
+ task :default => [:help]
14
+
15
+ PKG_VERSION = "0.9.3"
16
+ PKG_BUILD = "RC1"
17
+
18
+ PKG_FILES = FileList[
19
+ "Rakefile", "README", #"ChangeLog", "Releases", "TODO",
20
+ "setup.rb",
21
+ # "post-install.rb",
22
+ # "bin/*",
23
+ # "doc/*.css", "doc/*.rb",
24
+ # "examples/**/*",
25
+ # "gemspecs/**/*",
26
+ "lib/**/*.rb",
27
+ "lib/**/*.ry",
28
+ "test/**/*",
29
+ "sample/**/*.rb",
30
+ "sample/**/*.mol",
31
+ "ext/**/*.h",
32
+ "ext/**/*.c",
33
+ "ext/**/*.rb",
34
+ # "pkgs/**/*",
35
+ # "redist/*.gem",
36
+ # "scripts/*.rb",
37
+ # "test/**/*"
38
+ ]
39
+
40
+ task :help do |t|
41
+ puts <<EOL
42
+
43
+ ChemRuby #{PKG_VERSION}
44
+
45
+ To install ChemRuby, you need at least
46
+
47
+ * ruby-1.8.2 (or later)
48
+ * Ruby header files (included in original Ruby)
49
+ * C language compilers (such as gcc)
50
+
51
+ If the following modules are installed, ChemRuby will use it.
52
+ You can install them later.
53
+
54
+ * RMagick ( You will find how to install them in http://www.chemruby.org)
55
+
56
+ == Compiling and Installing
57
+
58
+ % rake compile
59
+ % sudo rake install
60
+
61
+ or just
62
+
63
+ % sudo ruby setup.rb
64
+
65
+ == Compiling RDOC
66
+
67
+ % rake doc
68
+
69
+ == Test
70
+
71
+ % rake test
72
+
73
+ You will need RMagick and other libraries to pass all the tests.
74
+
75
+ EOL
76
+
77
+ end
78
+
79
+ task :doc do |t|
80
+ system "rdoc --main README ./lib README"
81
+ end
82
+
83
+
84
+ task :dev => [:test]
85
+ Rake::TestTask.new(:dev) do |t|
86
+ t.libs << File.join('ext')
87
+ t.libs << File.join('lib')
88
+ t.libs << File.join('dev/lib')
89
+ t.libs << File.join('dev/ext')
90
+ # cd 'dev/ext/chem/db/inchi/' do
91
+ # ruby %{extconf.rb}
92
+ # sh "make"
93
+ # end
94
+ t.test_files = FileList['dev/test/test*.rb']
95
+ end
96
+
97
+ task :test => [:compile]
98
+ Rake::TestTask.new(:test) do |t|
99
+ t.libs << File.join('ext')
100
+ t.libs << File.join('lib')
101
+ t.test_files = FileList['test/test*.rb']
102
+ end
103
+
104
+ task :light => [:compile]
105
+ Rake::TestTask.new(:light) do |t|
106
+ t.libs << File.join('ext')
107
+ t.libs << File.join('lib')
108
+ t.test_files = FileList['test/test_subcomp.rb']
109
+ #'test/test_kegg.rb'# 'test/test_kcf_glycan.rb' #FileList['test/test_canonical_smiles.rb']
110
+ end
111
+
112
+ task :rm do
113
+ system "rm -rf /usr/local/lib/site_ruby/1.8/chem"
114
+ system "rm /usr/local/lib/site_ruby/1.8/chem.rb"
115
+ system "rm -rf /usr/local/lib/site_ruby/1.8/i386-linux/chem"
116
+ end
117
+
118
+ desc "Prepares for installation"
119
+ task :prepare do
120
+ ruby "setup.rb config"
121
+ ruby "setup.rb setup"
122
+ end
123
+
124
+ desc "Installing library"
125
+ task :install => [:compile, :prepare] do
126
+ ruby "setup.rb install"
127
+ end
128
+
129
+ task :heavy => [:test]
130
+ Rake::TestTask.new(:heavy) do |t|
131
+ t.libs << File.join('ext')
132
+ t.libs << File.join('lib')
133
+ t.test_files = FileList['test/heavy_test*.rb']
134
+ end
135
+
136
+ task :clean do
137
+ cd "ext/" do
138
+ Dir.glob("*.o").each do |file|
139
+ rm file
140
+ end
141
+ Dir.glob("*.bundle").each do |file|
142
+ rm file
143
+ end
144
+ end
145
+ end
146
+
147
+ # BUG!? Need code for testing if racc exist !?
148
+ file 'lib/chem/db/smiles/smiparser.rb' => ['lib/chem/db/smiles/smiles.ry'] do
149
+ cd 'lib/chem/db/smiles/' do
150
+ sh "racc smiles.ry -o smiparser.rb"
151
+ end
152
+ end
153
+
154
+ file 'lib/chem/db/iupac/iuparser.rb' => ['lib/chem/db/iupac/iuparser.ry'] do
155
+ cd 'lib/chem/db/iupac/' do
156
+ sh "racc iuparser.ry -o iuparser.rb"
157
+ end
158
+ end
159
+
160
+ file 'lib/chem/db/linucs/linparser.rb' => ['lib/chem/db/linucs/linucs.ry'] do
161
+ cd 'lib/chem/db/linucs/' do
162
+ sh "racc linucs.ry -o linparser.rb"
163
+ end
164
+ end
165
+
166
+ file 'ext/Makefile' => ['ext/extconf.rb', 'ext/subcomp.c'] do
167
+ cd 'ext/' do
168
+ ruby %{extconf.rb}
169
+ end
170
+ end
171
+
172
+ file "ext/subcomp.#{Config::CONFIG["DLEXT"]}" => ['ext/subcomp.c', 'ext/Makefile'] do
173
+ cd 'ext/' do
174
+ sh "make"
175
+ end
176
+ end
177
+
178
+
179
+ desc "Compiling library"
180
+ task :compile => ['lib/chem/db/smiles/smiparser.rb', 'lib/chem/db/iupac/iuparser.rb', 'lib/chem/db/linucs/linparser.rb', "ext/subcomp.#{Config::CONFIG["DLEXT"]}"]
181
+
182
+ spec = Gem::Specification.new do |s|
183
+ s.name = 'chemruby'
184
+ s.version = PKG_VERSION
185
+ s.require_path = 'lib'
186
+ s.autorequire = 'chem'
187
+ s.files = PKG_FILES
188
+ s.extensions << 'ext/extconf.rb'
189
+ s.summary = "A framework program for cheminformatics"
190
+ end
191
+
192
+ Rake::GemPackageTask.new(spec) do |pkg|
193
+ pkg.need_tar = true
194
+ pkg.package_files += PKG_FILES
195
+ end
@@ -0,0 +1,4 @@
1
+
2
+ require 'mkmf'
3
+
4
+ create_makefile("subcomp")
@@ -0,0 +1,416 @@
1
+ /**********************************************************************
2
+
3
+ subcomp.c -
4
+
5
+ $Author: nobyt $
6
+
7
+ Copyright (C) 2004-2006 Nobuya Tanaka
8
+
9
+ **********************************************************************/
10
+
11
+ #define FULL 0xffffffff
12
+ #define ZERO 0x0
13
+
14
+ #define FAIL 0;
15
+ #define SUCCESS 1;
16
+
17
+ #include <ruby.h>
18
+
19
+ static void
20
+ show(long *m, int pa, int pb)
21
+ {
22
+ int i, j, k;
23
+ static int count = 0;
24
+ int n_words;
25
+
26
+ n_words = (pb - 1) / (sizeof(int) * 8) + 1;
27
+
28
+ //printf("count : %3d\n", count++);
29
+
30
+ printf("\n ");
31
+ for(i = 0 ; i < pb ; i++){
32
+ printf("%d", i % 10);
33
+ }
34
+ printf("\n");
35
+ for(i = 0 ; i < pa * n_words ; i += n_words){
36
+ printf("%d ", (i / n_words) % 10);
37
+ for(k = 0 ; k < n_words ; k++){
38
+ for(j = k * 32 ; j < ((k + 1) * 32 < pb ? (k + 1) * 32 : pb) ; j++){
39
+ if(m[i + k ] & (1 << (j - k * 32)))
40
+ printf("@");
41
+ else
42
+ printf(".");
43
+ }
44
+ //printf(" ");
45
+ }
46
+ printf("\n");
47
+ }
48
+ printf("\n");
49
+ }
50
+
51
+ /*
52
+ * call-seq:
53
+ * SubGraphDB.show -> print out adjacency matrix
54
+ *
55
+ * This function is mainly for debug.
56
+ */
57
+
58
+ static VALUE
59
+ subcomp_show(VALUE self, VALUE str, VALUE pa, VALUE pb)
60
+ {
61
+ printf("subcomp_show called %3d %3d\n", FIX2INT(pa), FIX2INT(pb));
62
+ show((long * )RSTRING(str)->ptr, FIX2INT(pa), FIX2INT(pb));
63
+ return Qnil;
64
+ }
65
+
66
+
67
+ /*
68
+ * returns number of trailing zero of m-bit
69
+ */
70
+ static int ntz_m(long *y, int pb){
71
+ int i = 0;
72
+ int n;
73
+ long x;
74
+
75
+ n = 1;
76
+
77
+ while(i < pb && y[i] == 0){
78
+ n += 32;
79
+ i++;
80
+ }
81
+
82
+ x = y[i];
83
+
84
+ if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
85
+ if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
86
+ if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
87
+ if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
88
+ return n - (x & 1);
89
+ }
90
+
91
+ static int ntz(long x){
92
+ int n;
93
+
94
+ if (x == 0) return (32);
95
+ n = 1;
96
+ if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
97
+ if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
98
+ if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
99
+ if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
100
+ return n - (x & 1);
101
+ }
102
+
103
+ static int ntz_n_words(long * x, int n_words){
104
+ int i;
105
+ int words = 0;
106
+ for(i = 0 ; x[i] == 0 && i < n_words ; i++){
107
+ words += 32;
108
+ }
109
+ return ntz(x[i]) + words;
110
+ }
111
+
112
+ long bit_mask[32] = {
113
+ 0x1, 0x2, 0x4, 0x8,
114
+ 0x10, 0x20, 0x40, 0x80,
115
+ 0x100, 0x200, 0x400, 0x800,
116
+ 0x1000, 0x2000, 0x4000, 0x8000,
117
+ 0x10000, 0x20000, 0x40000, 0x80000,
118
+ 0x100000, 0x200000, 0x400000, 0x800000,
119
+ 0x1000000, 0x2000000, 0x4000000, 0x8000000,
120
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
121
+ };
122
+
123
+ long reverse_bit[32] = {
124
+ 0xfffffffe,
125
+ 0xfffffffd,
126
+ 0xfffffffb,
127
+ 0xfffffff7,
128
+ 0xffffffef,
129
+ 0xffffffdf,
130
+ 0xffffffbf,
131
+ 0xffffff7f,
132
+ 0xfffffeff,
133
+ 0xfffffdff,
134
+ 0xfffffbff,
135
+ 0xfffff7ff,
136
+ 0xffffefff,
137
+ 0xffffdfff,
138
+ 0xffffbfff,
139
+ 0xffff7fff,
140
+ 0xfffeffff,
141
+ 0xfffdffff,
142
+ 0xfffbffff,
143
+ 0xfff7ffff,
144
+ 0xffefffff,
145
+ 0xffdfffff,
146
+ 0xffbfffff,
147
+ 0xff7fffff,
148
+ 0xfeffffff,
149
+ 0xfdffffff,
150
+ 0xfbffffff,
151
+ 0xf7ffffff,
152
+ 0xefffffff,
153
+ 0xdfffffff,
154
+ 0xbfffffff,
155
+ 0x7fffffff,
156
+ };
157
+
158
+ //int matchN(ADJACENCY *adj_ptr, long *b, long *m, int pa, int pb)
159
+ static int matchN(const int * num_adj, long ** point, long *b, long *m, int pa, int pb)
160
+ {
161
+ long * mm;// current matrix
162
+ long f[1000];//which columns has been used at an intermediate state of computing
163
+ long h[100];// pb < 100 * 32
164
+
165
+ int d;// depth for matrix
166
+ int k;// width for matrix
167
+ int dd;// depth of matrix in refinement step
168
+ int kk;// width of matrix in refinement step
169
+
170
+ int i, j;//temp
171
+ long l;// temp
172
+
173
+ short vflag;//valid check flag
174
+ int n_words;// number of words needed for storing 'pb' bits.
175
+ long refine_mm;// pointer for mm(match matrix) used in refinment step.
176
+
177
+ d = k = 0;
178
+ // start back track
179
+ for(i = 0 ; i < (pb / 32 + 1) ; i++)
180
+ h[i] = 0;
181
+ for(i = 0 ; i < 10 ; i++)
182
+ f[i] = 0;
183
+
184
+ n_words = (pb - 1) / (sizeof(int) * 8) + 1;
185
+
186
+ /* show(b, pb, pb); */
187
+ /* show(m, pa, pb); */
188
+
189
+ if( d == 0 && k == 0){
190
+ k = ntz_n_words(m, n_words);
191
+ h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
192
+ }
193
+ while(k <= pb && d <= pa){
194
+ /* printf("d : %3d k : %3d n_words : %3d\n", d, k, n_words); */
195
+ if(d < 0){
196
+ printf("d < 0 return \n");
197
+ return FAIL;
198
+ }
199
+
200
+ // Idea for optimization :
201
+ // instead of using following equation, just (mm = mm + len) and (mm = mm - len).
202
+ mm = m + pa * (d + 1) * n_words;
203
+ /* printf("pa : %d d : %d k : %d n_words : %d hint : %d\n", pa, d, k, n_words, pa * (d + 1) * n_words); */
204
+
205
+ //printf("ntz : %d\n", ntz(mm));
206
+ //k = ntz(mm + d);
207
+ // set (k, d) bit '1', clear k-column and d-row '0'
208
+ /* printf("k : %d d: %d\n", k, d); */
209
+ for(j = 0 ; j < n_words ; j++){
210
+ if(j == (k / 32)){
211
+ for(i = 0 ; i < pa ; i++){
212
+ mm[i * n_words + j] = mm[(i - pa) * n_words + j] & reverse_bit[k - (k / 32) * 32];
213
+ }
214
+ mm[d * n_words + j] = bit_mask[k - (k / 32) * 32];
215
+ }else{
216
+ for(i = 0 ; i < pa ; i++){
217
+ mm[i * n_words + j] = mm[(i - pa) * n_words + j];
218
+ }
219
+ mm[d * n_words + j] = ZERO;
220
+ }
221
+ }
222
+ // BEGIN
223
+ /* show(mm, pa, pb); */
224
+ // END
225
+
226
+ // Refinement step
227
+ // Hot Spot!!
228
+ dd = kk = 0;
229
+ /* printf("before refinement step \n"); */
230
+ /* show(mm, pa, pb); */
231
+
232
+ while(dd != pa){
233
+ while(kk != pb){
234
+ //Idea for optimization :
235
+ //refine_mm should not updated 1 / 32 times.mm[dd + ((kk - 1) / 32)]
236
+
237
+ //Idea for optimization :
238
+ // when mm is sparse there may be better algorithm
239
+ // for searching '1' bit.
240
+ if(mm[dd * n_words + ((kk - 1) / 32)] & bit_mask[kk - ((kk - 1) / 32) * 32]){
241
+ // Following loop can be flattened
242
+ for(i = 0 ; i < num_adj[dd] ; i++){
243
+ l = 0;
244
+ for(j = 0 ; j < n_words ; j++){
245
+ l |= (b[kk * n_words + j] & mm[point[dd][i] * n_words + j]);
246
+ }
247
+ if(l == 0){
248
+ mm[dd * n_words + (kk / 32)] &= reverse_bit[kk - (kk / 32) * 32];//remove bit
249
+ /* break;//quit for loop */
250
+ }
251
+ }
252
+
253
+ }
254
+ kk++;
255
+ }
256
+ // Idea for optimization
257
+ // every 32 bit is tested here.
258
+ kk = 0;
259
+ dd++;
260
+ }
261
+ /* show(mm, pa, pb); */
262
+
263
+ //Checking whether match matrices are valid.
264
+ // Subgraph isomorphism can be checked here before reaching d == pa.
265
+ vflag = SUCCESS;
266
+ for(i = 0 ; i < pa ; i++){
267
+ l = 0;
268
+ for(j = 0 ; j < n_words ; j++){
269
+ l |= mm[i * n_words + j];
270
+ }
271
+ if(l == 0){
272
+ vflag = FAIL;
273
+ break;
274
+ }
275
+ }
276
+
277
+ if(vflag){// Success
278
+ f[d] = k;
279
+ k = 0;
280
+ while(h[k / 32] & bit_mask[k - (k / 32) * 32])
281
+ k++;
282
+ d++;
283
+ if(d == pa){
284
+ /* show(mm, pa, pb); */
285
+ //printf("FOUND! d : %d\n", d);
286
+ return SUCCESS;
287
+ }
288
+ else{
289
+ h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
290
+ }
291
+ }else{//Failed
292
+ h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
293
+ k++;
294
+ //printf("d : %d k : %d\n", d, k);
295
+ while((h[k / 32] & bit_mask[k - (k / 32) * 32] ||
296
+ (m[d * n_words + (k / 32)] & bit_mask[k - (k / 32) * 32] ) == 0) &&
297
+ k < pb)
298
+ k++;
299
+ /* printf("d : %d k : %d\n", d, k); */
300
+ while(k > pb){
301
+ if(d == 0){
302
+ return FAIL;
303
+ }
304
+ d--;
305
+ k = f[d];
306
+ h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
307
+ k++;
308
+ while(h[k / 32] & bit_mask[k - (k / 32) * 32])
309
+ k++;
310
+ }
311
+ h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
312
+ }
313
+ }
314
+ //printf("d : %d k : %d FAIL!\n", d, k);
315
+ return FAIL;
316
+ }
317
+
318
+ static void set_adjacency(int * num_adj, long ** point, long * adj, VALUE ret){
319
+ int i, j, n_words;
320
+ int off_set = 0;
321
+
322
+ n_words = (RARRAY(ret)->len - 1) / (sizeof(int) * 8) + 1;
323
+
324
+ for(i = 0 ; i < RARRAY(ret)->len ; i++){
325
+ num_adj[i] = FIX2INT(rb_funcall(RARRAY(ret)->ptr[i], rb_intern("length"), 0));
326
+ point[i] = adj + off_set;
327
+ for(j = 0 ; j < RARRAY(RARRAY(ret)->ptr[i])->len ; j++){
328
+ adj[off_set++] = FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]);
329
+ //printf(" %d ", FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]));
330
+ }
331
+ //printf("\n");
332
+ }
333
+ }
334
+
335
+ static VALUE subcomp_match_by_ullmann(VALUE self, VALUE a_matrix, VALUE pa, VALUE other_adj, VALUE pb, VALUE match){
336
+ // variables for adjacency list of graph A
337
+ int num_adj[1000];
338
+ long * point[1000];
339
+ long adj[3000];//adjacency list
340
+
341
+ // match matrix; = pa * (n_words * pa)
342
+ long * mm;//[800000];
343
+ long * m;
344
+
345
+ //temporary variables
346
+ int i;
347
+ int result;
348
+ VALUE mapping;
349
+
350
+ int n_pb, n_pa;
351
+ int n_words;
352
+ int sizeof_mm;
353
+
354
+ n_pb = NUM2INT(pb);
355
+ n_pa = NUM2INT(pa);
356
+
357
+ if(n_pb > n_pa){
358
+ return Qfalse;
359
+ }
360
+
361
+ sizeof_mm = n_pa * (n_pb + 1) * n_words;
362
+
363
+ n_words = (n_pa - 1) / (sizeof(int) * 8) + 1;
364
+
365
+ mm = (long * )malloc(sizeof(long) * 800000);
366
+ if(RSTRING(match)->len > 800000 * sizeof(long))
367
+ rb_raise(rb_eArgError, "Length of match matrix too short! %d", sizeof(mm));
368
+
369
+ memcpy(mm, (long *)RSTRING(match)->ptr, RSTRING(match)->len); // BUG!!
370
+
371
+ Check_Type(a_matrix, T_STRING);
372
+
373
+ set_adjacency(num_adj, point, adj, other_adj);
374
+
375
+ //show(mm, n_pa, n_pb);
376
+ //show((long *)RSTRING(a_matrix)->ptr, n_pa, n_pa);
377
+
378
+ result = matchN(num_adj, point, (long *)RSTRING(a_matrix)->ptr, mm, n_pb, n_pa);
379
+
380
+ if(result == 1){//?
381
+ mapping = rb_ary_new();
382
+ //printf("n_words : %d n_pa : %d n_pb : %d n_words * n_pa * n_pa : %d", n_words, n_pa, n_pb, n_words * n_pa * n_pa);
383
+ //show(mm + n_words * n_pb * n_pb, n_pb, n_pa);
384
+
385
+ for(i = 0 ; i < n_pb ; i++){
386
+ rb_ary_push(mapping, INT2FIX(ntz_m(mm + n_words * n_pb * n_pb + i * n_words, n_pa)));
387
+ }
388
+ return mapping;
389
+ }
390
+ return Qfalse;
391
+ }
392
+
393
+ // DataBase for substructure search
394
+
395
+ struct dbmdata {
396
+ int di_size;
397
+ };
398
+
399
+ static VALUE sdb_s_search(VALUE dbname){
400
+ rb_p(dbname);
401
+ }
402
+
403
+ Init_subcomp(){
404
+ VALUE subcomp_cGraph;
405
+ VALUE subcomp_cSubGraphDB;
406
+
407
+ subcomp_cGraph = rb_define_module("Graph");
408
+ rb_define_method(subcomp_cGraph, "subcomp_match_by_ullmann", subcomp_match_by_ullmann, 5);
409
+
410
+ subcomp_cSubGraphDB = rb_define_class_under(subcomp_cGraph, "SubGraphDB", rb_cObject);
411
+
412
+ rb_define_method(subcomp_cSubGraphDB, "open_for_search", sdb_s_search, 0);
413
+
414
+ rb_define_singleton_method(subcomp_cSubGraphDB, "show", subcomp_show, 3);
415
+ rb_define_singleton_method(subcomp_cSubGraphDB, "match", subcomp_match_by_ullmann, 5);
416
+ }