chemruby 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +120 -0
- data/Rakefile +195 -0
- data/ext/extconf.rb +4 -0
- data/ext/subcomp.c +416 -0
- data/lib/chem.rb +130 -0
- data/lib/chem/appl.rb +1 -0
- data/lib/chem/appl/chem3dole.rb +36 -0
- data/lib/chem/appl/tinker/nucleic.rb +40 -0
- data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
- data/lib/chem/data.rb +4 -0
- data/lib/chem/data/atomic_weight.rb +124 -0
- data/lib/chem/data/character.rb +2 -0
- data/lib/chem/data/electronegativity.rb +14 -0
- data/lib/chem/data/periodic_table.rb +6 -0
- data/lib/chem/data/prime_numbers.rb +1 -0
- data/lib/chem/data/vdw_radii.rb +1 -0
- data/lib/chem/db.rb +64 -0
- data/lib/chem/db/cansmi.rb +234 -0
- data/lib/chem/db/cdx.rb +1525 -0
- data/lib/chem/db/eps.rb +164 -0
- data/lib/chem/db/g98.rb +909 -0
- data/lib/chem/db/gspan.rb +130 -0
- data/lib/chem/db/iupac.rb +5 -0
- data/lib/chem/db/iupac/a_1.rb +46 -0
- data/lib/chem/db/iupac/iuparser.rb +226 -0
- data/lib/chem/db/iupac/iuparser.ry +97 -0
- data/lib/chem/db/iupac/postfix.rb +2 -0
- data/lib/chem/db/kcf.rb +390 -0
- data/lib/chem/db/kcf_glycan.rb +19 -0
- data/lib/chem/db/kegg.rb +516 -0
- data/lib/chem/db/linucs/linparser.rb +144 -0
- data/lib/chem/db/linucs/linucs.ry +53 -0
- data/lib/chem/db/mdl.rb +379 -0
- data/lib/chem/db/molconnz.rb +12 -0
- data/lib/chem/db/mopac.rb +88 -0
- data/lib/chem/db/msi.rb +107 -0
- data/lib/chem/db/pdb_dic.rb +115 -0
- data/lib/chem/db/pdf.rb +131 -0
- data/lib/chem/db/pubchem.rb +113 -0
- data/lib/chem/db/rmagick.rb +70 -0
- data/lib/chem/db/sdf.rb +37 -0
- data/lib/chem/db/smbl.rb +88 -0
- data/lib/chem/db/smiles.rb +2 -0
- data/lib/chem/db/smiles/smiles.ry +203 -0
- data/lib/chem/db/smiles/smiparser.rb +375 -0
- data/lib/chem/db/swf.rb +74 -0
- data/lib/chem/db/sybyl.rb +150 -0
- data/lib/chem/db/tinker.rb +77 -0
- data/lib/chem/db/types/type_cansmi.rb +9 -0
- data/lib/chem/db/types/type_cdx.rb +24 -0
- data/lib/chem/db/types/type_gspan.rb +31 -0
- data/lib/chem/db/types/type_kcf.rb +28 -0
- data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
- data/lib/chem/db/types/type_kegg.rb +92 -0
- data/lib/chem/db/types/type_mdl.rb +31 -0
- data/lib/chem/db/types/type_pdf.rb +33 -0
- data/lib/chem/db/types/type_png.rb +31 -0
- data/lib/chem/db/types/type_rxn.rb +25 -0
- data/lib/chem/db/types/type_sdf.rb +25 -0
- data/lib/chem/db/types/type_sybyl.rb +30 -0
- data/lib/chem/db/types/type_xyz.rb +26 -0
- data/lib/chem/db/vector.rb +128 -0
- data/lib/chem/db/xyz.rb +39 -0
- data/lib/chem/model.rb +119 -0
- data/lib/chem/model/skeleton.rb +37 -0
- data/lib/chem/utils.rb +11 -0
- data/lib/chem/utils/geometry.rb +27 -0
- data/lib/chem/utils/graph_db.rb +146 -0
- data/lib/chem/utils/math.rb +17 -0
- data/lib/chem/utils/prop.rb +123 -0
- data/lib/chem/utils/sssr.rb +101 -0
- data/lib/chem/utils/sub.rb +78 -0
- data/lib/chem/utils/transform.rb +110 -0
- data/lib/chem/utils/traverse.rb +37 -0
- data/lib/chem/utils/ullmann.rb +134 -0
- data/lib/graph.rb +41 -0
- data/lib/graph/cluster.rb +20 -0
- data/lib/graph/morgan.rb +38 -0
- data/sample/frequent_subgraph.rb +46 -0
- data/sample/images/ex1.rb +11 -0
- data/sample/images/ex2.rb +4 -0
- data/sample/images/ex3.rb +5 -0
- data/sample/images/ex4.rb +17 -0
- data/sample/images/ex5.rb +10 -0
- data/sample/images/mol/adenine.mol +26 -0
- data/sample/images/mol/atp.mol +69 -0
- data/sample/images/temp/ex5.mol +344 -0
- data/sample/kegg_db.rb +116 -0
- data/setup.rb +1551 -0
- data/test/all.rb +6 -0
- data/test/coord_test.rb +17 -0
- data/test/ctab_test.rb +31 -0
- data/test/data/A_21.tar.gz +0 -0
- data/test/data/A_21/aceanthrylene.cdx +0 -0
- data/test/data/A_21/aceanthrylene.mol +40 -0
- data/test/data/A_21/acenaphthylene.cdx +0 -0
- data/test/data/A_21/acenaphthylene.mol +31 -0
- data/test/data/A_21/acephenanthrylene.cdx +0 -0
- data/test/data/A_21/acephenanthrylene.mol +40 -0
- data/test/data/A_21/anthracene.cdx +0 -0
- data/test/data/A_21/anthracene.mol +35 -0
- data/test/data/A_21/as-indacene.cdx +0 -0
- data/test/data/A_21/as-indacene.mol +31 -0
- data/test/data/A_21/azulene.cdx +0 -0
- data/test/data/A_21/azulene.mol +26 -0
- data/test/data/A_21/biphenylene.cdx +0 -0
- data/test/data/A_21/biphenylene.mol +31 -0
- data/test/data/A_21/chrysene.cdx +0 -0
- data/test/data/A_21/chrysene.mol +44 -0
- data/test/data/A_21/coronen.cdx +0 -0
- data/test/data/A_21/coronen.mol +59 -0
- data/test/data/A_21/fluoranthene.cdx +0 -0
- data/test/data/A_21/fluoranthene.mol +40 -0
- data/test/data/A_21/fluorene.cdx +0 -0
- data/test/data/A_21/fluorene.mol +33 -0
- data/test/data/A_21/heptacene.cdx +0 -0
- data/test/data/A_21/heptacene.mol +71 -0
- data/test/data/A_21/heptalene.cdx +0 -0
- data/test/data/A_21/heptalene.mol +30 -0
- data/test/data/A_21/heptaphene.cdx +0 -0
- data/test/data/A_21/heptaphene.mol +71 -0
- data/test/data/A_21/hexacene.cdx +0 -0
- data/test/data/A_21/hexacene.mol +62 -0
- data/test/data/A_21/hexaphene.cdx +0 -0
- data/test/data/A_21/hexaphene.mol +62 -0
- data/test/data/A_21/indene.cdx +0 -0
- data/test/data/A_21/indene.mol +24 -0
- data/test/data/A_21/iupac.txt +41 -0
- data/test/data/A_21/naphthacene.cdx +0 -0
- data/test/data/A_21/naphthacene.mol +44 -0
- data/test/data/A_21/naphthalene.cdx +0 -0
- data/test/data/A_21/naphthalene.mol +26 -0
- data/test/data/A_21/ovalene.cdx +0 -0
- data/test/data/A_21/ovalene.mol +78 -0
- data/test/data/A_21/pentacene.cdx +0 -0
- data/test/data/A_21/pentacene.mol +53 -0
- data/test/data/A_21/pentalene.cdx +0 -0
- data/test/data/A_21/pentalene.mol +22 -0
- data/test/data/A_21/pentaphene.cdx +0 -0
- data/test/data/A_21/pentaphene.mol +53 -0
- data/test/data/A_21/perylene.cdx +0 -0
- data/test/data/A_21/perylene.mol +49 -0
- data/test/data/A_21/phenalene.cdx +0 -0
- data/test/data/A_21/phenalene.mol +33 -0
- data/test/data/A_21/phenanthrene.cdx +0 -0
- data/test/data/A_21/phenanthrene.mol +35 -0
- data/test/data/A_21/picene.cdx +0 -0
- data/test/data/A_21/picene.mol +53 -0
- data/test/data/A_21/pleiadene.cdx +0 -0
- data/test/data/A_21/pleiadene.mol +44 -0
- data/test/data/A_21/pyranthrene.cdx +0 -0
- data/test/data/A_21/pyranthrene.mol +72 -0
- data/test/data/A_21/pyrene.cdx +0 -0
- data/test/data/A_21/pyrene.mol +40 -0
- data/test/data/A_21/rubicene.cdx +0 -0
- data/test/data/A_21/rubicene.mol +63 -0
- data/test/data/A_21/s-indacene.cdx +0 -0
- data/test/data/A_21/s-indacene.mol +31 -0
- data/test/data/A_21/tetraphenylene.cdx +0 -0
- data/test/data/A_21/tetraphenylene.mol +57 -0
- data/test/data/A_21/trinaphthylene.cdx +0 -0
- data/test/data/A_21/trinaphthylene.mol +71 -0
- data/test/data/A_21/triphenylene.cdx +0 -0
- data/test/data/A_21/triphenylene.mol +44 -0
- data/test/data/C00147.kcf +25 -0
- data/test/data/G00147.kcf +13 -0
- data/test/data/atp.mol +69 -0
- data/test/data/cyclohexane.mol +17 -0
- data/test/data/cyclohexane.ps +485 -0
- data/test/data/fullerene.mol +155 -0
- data/test/data/glycan +33 -0
- data/test/data/hypericin.cdx +0 -0
- data/test/data/hypericin.cdxml +596 -0
- data/test/data/hypericin.chm +0 -0
- data/test/data/hypericin.ct +85 -0
- data/test/data/hypericin.f1d +0 -0
- data/test/data/hypericin.f1q +0 -0
- data/test/data/hypericin.gif +0 -0
- data/test/data/hypericin.mol +88 -0
- data/test/data/hypericin.mol2 +159 -0
- data/test/data/hypericin.msm +123 -0
- data/test/data/hypericin.pdf +359 -0
- data/test/data/hypericin.png +0 -0
- data/test/data/hypericin.ps +0 -0
- data/test/data/hypericin.skc +0 -0
- data/test/data/hypericin2.gif +0 -0
- data/test/data/hypericin2.ps +0 -0
- data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
- data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
- data/test/data/kegg/ligand/mol/C00147.mol +26 -0
- data/test/data/kegg/ligand/reaction +14 -0
- data/test/data/kegg/ligand/reaction.lst +1 -0
- data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
- data/test/data/reaction +14 -0
- data/test/data/reaction.lst +1 -0
- data/test/data/reaction_mapformula.lst +3 -0
- data/test/data/rxn/C00001.mol +6 -0
- data/test/data/rxn/C00011.mol +10 -0
- data/test/data/rxn/C00014.mol +6 -0
- data/test/data/rxn/C01010.mol +18 -0
- data/test/data/rxn/sample.rxn +50 -0
- data/test/data/rxn/substitution.rxn +45 -0
- data/test/data/test.eps +0 -0
- data/test/data/test.mol +28 -0
- data/test/data/test.sdf +143 -0
- data/test/data/test.skc +0 -0
- data/test/data/test.xyz +4 -0
- data/test/data/test_lf.sdf +143 -0
- data/test/heavy_test_pubchem.rb +16 -0
- data/test/multiple_test.rb +22 -0
- data/test/test_adj.rb +54 -0
- data/test/test_canonical_smiles.rb +46 -0
- data/test/test_cdx.rb +32 -0
- data/test/test_chem.rb +18 -0
- data/test/test_cluster.rb +19 -0
- data/test/test_db.rb +11 -0
- data/test/test_eps.rb +24 -0
- data/test/test_geometry.rb +11 -0
- data/test/test_gspan.rb +28 -0
- data/test/test_iupac.rb +36 -0
- data/test/test_kcf.rb +24 -0
- data/test/test_kcf_glycan.rb +10 -0
- data/test/test_kegg.rb +118 -0
- data/test/test_linucs.rb +21 -0
- data/test/test_mdl.rb +45 -0
- data/test/test_mol2.rb +62 -0
- data/test/test_morgan.rb +21 -0
- data/test/test_pdf.rb +12 -0
- data/test/test_prop.rb +86 -0
- data/test/test_rmagick.rb +15 -0
- data/test/test_sbdb.rb +23 -0
- data/test/test_sdf.rb +30 -0
- data/test/test_smiles.rb +84 -0
- data/test/test_sssr.rb +18 -0
- data/test/test_sub.rb +47 -0
- data/test/test_subcomp.rb +37 -0
- data/test/test_traverse.rb +29 -0
- data/test/test_writer.rb +13 -0
- data/test/test_xyz.rb +15 -0
- data/test/type_test.rb +25 -0
- metadata +290 -0
data/README
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
= ChemRuby - Cheminformatics Ruby
|
2
|
+
|
3
|
+
ChemRuby is a framework for developing cheminformatics applications in Ruby.
|
4
|
+
It will let you retrieve chemical information from variety of data sources
|
5
|
+
in various formats (such as MDL mol, SMILES etc.), fast substructure search
|
6
|
+
based on graph theory, draw a chemical structure in various graphics formats
|
7
|
+
(such as PDF, PNG etc.), and calculate a number of chemical properties.
|
8
|
+
|
9
|
+
== FOR MORE INFORMATION
|
10
|
+
|
11
|
+
ChemRuby's official website is at ((<URL:http://www.chemruby.org/>)).
|
12
|
+
You will find links to the related resouces including downloads,
|
13
|
+
Wiki documentations etc. in the top page.
|
14
|
+
|
15
|
+
* ((<URL:http://www.chemruby.org/>))
|
16
|
+
|
17
|
+
|
18
|
+
== WHERE TO OBTAIN
|
19
|
+
|
20
|
+
--- WWW
|
21
|
+
|
22
|
+
The releases can be obtained at ChemRuby website.
|
23
|
+
|
24
|
+
* ((<URL:http://www.chemruby.org/>))
|
25
|
+
|
26
|
+
--- RubyGems
|
27
|
+
|
28
|
+
((<RubyGems|URL:http://rubyforge.org/projects/rubygems/>)) version of
|
29
|
+
the ChemRuby package is also available for easy installation.
|
30
|
+
|
31
|
+
* ((<URL:http://rubyforge.org/projects/chemruby/>))
|
32
|
+
|
33
|
+
|
34
|
+
== REQUIREMENTS
|
35
|
+
|
36
|
+
* Ruby 1.8.2 or later -- ((<URL:http://www.ruby-lang.org/>))
|
37
|
+
|
38
|
+
|
39
|
+
== OPTIONAL REQUIREMENTS
|
40
|
+
|
41
|
+
Some optional libraries can be utilized to extend ChemRuby's functionality.
|
42
|
+
If your needs meets the following conditions, install them from the "Ruby
|
43
|
+
Application Archive" at ((<URL:http://raa.ruby-lang.org/>)).
|
44
|
+
|
45
|
+
For outputting png and jpeg images:
|
46
|
+
|
47
|
+
* [RAA:rmagick]
|
48
|
+
|
49
|
+
For testing and developing ChemRuby:
|
50
|
+
|
51
|
+
* [RAA:rake]
|
52
|
+
|
53
|
+
== INSTALL
|
54
|
+
|
55
|
+
In the chemruby source directory (such as chemruby-x.x.x/), run install.rb
|
56
|
+
as follows:
|
57
|
+
|
58
|
+
% ruby setup.rb config
|
59
|
+
% ruby setup.rb setup
|
60
|
+
% su
|
61
|
+
# ruby setup.rb install
|
62
|
+
|
63
|
+
If your operating system supports 'sudo' command (such as Mac OS X),
|
64
|
+
try the following procedure instead of the above.
|
65
|
+
|
66
|
+
% ruby setup.rb config
|
67
|
+
% ruby setup.rb setup
|
68
|
+
% sudo ruby setup.rb install
|
69
|
+
|
70
|
+
You can run tests by
|
71
|
+
|
72
|
+
% rake test
|
73
|
+
|
74
|
+
and run
|
75
|
+
|
76
|
+
% rake
|
77
|
+
|
78
|
+
for more details.
|
79
|
+
|
80
|
+
== USAGE
|
81
|
+
|
82
|
+
You can load all ChemRuby classes just by requiring 'chem.rb'. All the
|
83
|
+
ChemRuby classes and modules are located under the module name 'Chem' to
|
84
|
+
separate the name space.
|
85
|
+
|
86
|
+
#!/usr/bin/env ruby
|
87
|
+
require 'chem'
|
88
|
+
|
89
|
+
--- RubyGems
|
90
|
+
|
91
|
+
In RubyGems, you need to load 'rubygems' library before using 'chem'.
|
92
|
+
|
93
|
+
#!/usr/bin/env ruby
|
94
|
+
require 'rubygems'
|
95
|
+
require_gem 'chem'
|
96
|
+
|
97
|
+
== Credits
|
98
|
+
|
99
|
+
* GOTO Naohisa, KATAYAMA Toshiaki and NAKAO Mitsuteru (alphabetical order)
|
100
|
+
who are developers of BioRuby, led the design of ChemRuby.
|
101
|
+
|
102
|
+
* gSpan parser and PubChem search from KADOWAKI masashi.
|
103
|
+
|
104
|
+
== LICENSE
|
105
|
+
|
106
|
+
ChemRuby can be freely distributed under the Ruby's license.
|
107
|
+
Note that, setup.rb included in the ChemRuby package comes from
|
108
|
+
<RAA:setup> developed by Minero Aoki.
|
109
|
+
|
110
|
+
License of This README file can be also distributed under the Ruby's license.
|
111
|
+
|
112
|
+
Copyright (C) 2006 TANAKA Nobuya <tanaka@chemruby.org>
|
113
|
+
KATAYAMA Toshiaki <k@bioruby.org>
|
114
|
+
|
115
|
+
== CONTACT
|
116
|
+
|
117
|
+
Current staffs of the ChemRuby project can be reached by sending e-mail
|
118
|
+
to <staff@chemruby.org>.
|
119
|
+
|
120
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
#
|
2
|
+
# Rakefile
|
3
|
+
#
|
4
|
+
# See http://docs.rubyrake.org/ to see how to use ``rake'' command.
|
5
|
+
#
|
6
|
+
# $Id: Rakefile 61 2005-10-12 09:17:39Z tanaka $
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'rake/clean'
|
10
|
+
require 'rake/testtask'
|
11
|
+
require 'rake/gempackagetask'
|
12
|
+
|
13
|
+
task :default => [:help]
|
14
|
+
|
15
|
+
PKG_VERSION = "0.9.3"
|
16
|
+
PKG_BUILD = "RC1"
|
17
|
+
|
18
|
+
PKG_FILES = FileList[
|
19
|
+
"Rakefile", "README", #"ChangeLog", "Releases", "TODO",
|
20
|
+
"setup.rb",
|
21
|
+
# "post-install.rb",
|
22
|
+
# "bin/*",
|
23
|
+
# "doc/*.css", "doc/*.rb",
|
24
|
+
# "examples/**/*",
|
25
|
+
# "gemspecs/**/*",
|
26
|
+
"lib/**/*.rb",
|
27
|
+
"lib/**/*.ry",
|
28
|
+
"test/**/*",
|
29
|
+
"sample/**/*.rb",
|
30
|
+
"sample/**/*.mol",
|
31
|
+
"ext/**/*.h",
|
32
|
+
"ext/**/*.c",
|
33
|
+
"ext/**/*.rb",
|
34
|
+
# "pkgs/**/*",
|
35
|
+
# "redist/*.gem",
|
36
|
+
# "scripts/*.rb",
|
37
|
+
# "test/**/*"
|
38
|
+
]
|
39
|
+
|
40
|
+
task :help do |t|
|
41
|
+
puts <<EOL
|
42
|
+
|
43
|
+
ChemRuby #{PKG_VERSION}
|
44
|
+
|
45
|
+
To install ChemRuby, you need at least
|
46
|
+
|
47
|
+
* ruby-1.8.2 (or later)
|
48
|
+
* Ruby header files (included in original Ruby)
|
49
|
+
* C language compilers (such as gcc)
|
50
|
+
|
51
|
+
If the following modules are installed, ChemRuby will use it.
|
52
|
+
You can install them later.
|
53
|
+
|
54
|
+
* RMagick ( You will find how to install them in http://www.chemruby.org)
|
55
|
+
|
56
|
+
== Compiling and Installing
|
57
|
+
|
58
|
+
% rake compile
|
59
|
+
% sudo rake install
|
60
|
+
|
61
|
+
or just
|
62
|
+
|
63
|
+
% sudo ruby setup.rb
|
64
|
+
|
65
|
+
== Compiling RDOC
|
66
|
+
|
67
|
+
% rake doc
|
68
|
+
|
69
|
+
== Test
|
70
|
+
|
71
|
+
% rake test
|
72
|
+
|
73
|
+
You will need RMagick and other libraries to pass all the tests.
|
74
|
+
|
75
|
+
EOL
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
task :doc do |t|
|
80
|
+
system "rdoc --main README ./lib README"
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
task :dev => [:test]
|
85
|
+
Rake::TestTask.new(:dev) do |t|
|
86
|
+
t.libs << File.join('ext')
|
87
|
+
t.libs << File.join('lib')
|
88
|
+
t.libs << File.join('dev/lib')
|
89
|
+
t.libs << File.join('dev/ext')
|
90
|
+
# cd 'dev/ext/chem/db/inchi/' do
|
91
|
+
# ruby %{extconf.rb}
|
92
|
+
# sh "make"
|
93
|
+
# end
|
94
|
+
t.test_files = FileList['dev/test/test*.rb']
|
95
|
+
end
|
96
|
+
|
97
|
+
task :test => [:compile]
|
98
|
+
Rake::TestTask.new(:test) do |t|
|
99
|
+
t.libs << File.join('ext')
|
100
|
+
t.libs << File.join('lib')
|
101
|
+
t.test_files = FileList['test/test*.rb']
|
102
|
+
end
|
103
|
+
|
104
|
+
task :light => [:compile]
|
105
|
+
Rake::TestTask.new(:light) do |t|
|
106
|
+
t.libs << File.join('ext')
|
107
|
+
t.libs << File.join('lib')
|
108
|
+
t.test_files = FileList['test/test_subcomp.rb']
|
109
|
+
#'test/test_kegg.rb'# 'test/test_kcf_glycan.rb' #FileList['test/test_canonical_smiles.rb']
|
110
|
+
end
|
111
|
+
|
112
|
+
task :rm do
|
113
|
+
system "rm -rf /usr/local/lib/site_ruby/1.8/chem"
|
114
|
+
system "rm /usr/local/lib/site_ruby/1.8/chem.rb"
|
115
|
+
system "rm -rf /usr/local/lib/site_ruby/1.8/i386-linux/chem"
|
116
|
+
end
|
117
|
+
|
118
|
+
desc "Prepares for installation"
|
119
|
+
task :prepare do
|
120
|
+
ruby "setup.rb config"
|
121
|
+
ruby "setup.rb setup"
|
122
|
+
end
|
123
|
+
|
124
|
+
desc "Installing library"
|
125
|
+
task :install => [:compile, :prepare] do
|
126
|
+
ruby "setup.rb install"
|
127
|
+
end
|
128
|
+
|
129
|
+
task :heavy => [:test]
|
130
|
+
Rake::TestTask.new(:heavy) do |t|
|
131
|
+
t.libs << File.join('ext')
|
132
|
+
t.libs << File.join('lib')
|
133
|
+
t.test_files = FileList['test/heavy_test*.rb']
|
134
|
+
end
|
135
|
+
|
136
|
+
task :clean do
|
137
|
+
cd "ext/" do
|
138
|
+
Dir.glob("*.o").each do |file|
|
139
|
+
rm file
|
140
|
+
end
|
141
|
+
Dir.glob("*.bundle").each do |file|
|
142
|
+
rm file
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# BUG!? Need code for testing if racc exist !?
|
148
|
+
file 'lib/chem/db/smiles/smiparser.rb' => ['lib/chem/db/smiles/smiles.ry'] do
|
149
|
+
cd 'lib/chem/db/smiles/' do
|
150
|
+
sh "racc smiles.ry -o smiparser.rb"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
file 'lib/chem/db/iupac/iuparser.rb' => ['lib/chem/db/iupac/iuparser.ry'] do
|
155
|
+
cd 'lib/chem/db/iupac/' do
|
156
|
+
sh "racc iuparser.ry -o iuparser.rb"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
file 'lib/chem/db/linucs/linparser.rb' => ['lib/chem/db/linucs/linucs.ry'] do
|
161
|
+
cd 'lib/chem/db/linucs/' do
|
162
|
+
sh "racc linucs.ry -o linparser.rb"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
file 'ext/Makefile' => ['ext/extconf.rb', 'ext/subcomp.c'] do
|
167
|
+
cd 'ext/' do
|
168
|
+
ruby %{extconf.rb}
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
file "ext/subcomp.#{Config::CONFIG["DLEXT"]}" => ['ext/subcomp.c', 'ext/Makefile'] do
|
173
|
+
cd 'ext/' do
|
174
|
+
sh "make"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
desc "Compiling library"
|
180
|
+
task :compile => ['lib/chem/db/smiles/smiparser.rb', 'lib/chem/db/iupac/iuparser.rb', 'lib/chem/db/linucs/linparser.rb', "ext/subcomp.#{Config::CONFIG["DLEXT"]}"]
|
181
|
+
|
182
|
+
spec = Gem::Specification.new do |s|
|
183
|
+
s.name = 'chemruby'
|
184
|
+
s.version = PKG_VERSION
|
185
|
+
s.require_path = 'lib'
|
186
|
+
s.autorequire = 'chem'
|
187
|
+
s.files = PKG_FILES
|
188
|
+
s.extensions << 'ext/extconf.rb'
|
189
|
+
s.summary = "A framework program for cheminformatics"
|
190
|
+
end
|
191
|
+
|
192
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
193
|
+
pkg.need_tar = true
|
194
|
+
pkg.package_files += PKG_FILES
|
195
|
+
end
|
data/ext/extconf.rb
ADDED
data/ext/subcomp.c
ADDED
@@ -0,0 +1,416 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
|
3
|
+
subcomp.c -
|
4
|
+
|
5
|
+
$Author: nobyt $
|
6
|
+
|
7
|
+
Copyright (C) 2004-2006 Nobuya Tanaka
|
8
|
+
|
9
|
+
**********************************************************************/
|
10
|
+
|
11
|
+
#define FULL 0xffffffff
|
12
|
+
#define ZERO 0x0
|
13
|
+
|
14
|
+
#define FAIL 0;
|
15
|
+
#define SUCCESS 1;
|
16
|
+
|
17
|
+
#include <ruby.h>
|
18
|
+
|
19
|
+
static void
|
20
|
+
show(long *m, int pa, int pb)
|
21
|
+
{
|
22
|
+
int i, j, k;
|
23
|
+
static int count = 0;
|
24
|
+
int n_words;
|
25
|
+
|
26
|
+
n_words = (pb - 1) / (sizeof(int) * 8) + 1;
|
27
|
+
|
28
|
+
//printf("count : %3d\n", count++);
|
29
|
+
|
30
|
+
printf("\n ");
|
31
|
+
for(i = 0 ; i < pb ; i++){
|
32
|
+
printf("%d", i % 10);
|
33
|
+
}
|
34
|
+
printf("\n");
|
35
|
+
for(i = 0 ; i < pa * n_words ; i += n_words){
|
36
|
+
printf("%d ", (i / n_words) % 10);
|
37
|
+
for(k = 0 ; k < n_words ; k++){
|
38
|
+
for(j = k * 32 ; j < ((k + 1) * 32 < pb ? (k + 1) * 32 : pb) ; j++){
|
39
|
+
if(m[i + k ] & (1 << (j - k * 32)))
|
40
|
+
printf("@");
|
41
|
+
else
|
42
|
+
printf(".");
|
43
|
+
}
|
44
|
+
//printf(" ");
|
45
|
+
}
|
46
|
+
printf("\n");
|
47
|
+
}
|
48
|
+
printf("\n");
|
49
|
+
}
|
50
|
+
|
51
|
+
/*
|
52
|
+
* call-seq:
|
53
|
+
* SubGraphDB.show -> print out adjacency matrix
|
54
|
+
*
|
55
|
+
* This function is mainly for debug.
|
56
|
+
*/
|
57
|
+
|
58
|
+
static VALUE
|
59
|
+
subcomp_show(VALUE self, VALUE str, VALUE pa, VALUE pb)
|
60
|
+
{
|
61
|
+
printf("subcomp_show called %3d %3d\n", FIX2INT(pa), FIX2INT(pb));
|
62
|
+
show((long * )RSTRING(str)->ptr, FIX2INT(pa), FIX2INT(pb));
|
63
|
+
return Qnil;
|
64
|
+
}
|
65
|
+
|
66
|
+
|
67
|
+
/*
|
68
|
+
* returns number of trailing zero of m-bit
|
69
|
+
*/
|
70
|
+
static int ntz_m(long *y, int pb){
|
71
|
+
int i = 0;
|
72
|
+
int n;
|
73
|
+
long x;
|
74
|
+
|
75
|
+
n = 1;
|
76
|
+
|
77
|
+
while(i < pb && y[i] == 0){
|
78
|
+
n += 32;
|
79
|
+
i++;
|
80
|
+
}
|
81
|
+
|
82
|
+
x = y[i];
|
83
|
+
|
84
|
+
if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
|
85
|
+
if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
|
86
|
+
if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
|
87
|
+
if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
|
88
|
+
return n - (x & 1);
|
89
|
+
}
|
90
|
+
|
91
|
+
static int ntz(long x){
|
92
|
+
int n;
|
93
|
+
|
94
|
+
if (x == 0) return (32);
|
95
|
+
n = 1;
|
96
|
+
if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
|
97
|
+
if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
|
98
|
+
if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
|
99
|
+
if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
|
100
|
+
return n - (x & 1);
|
101
|
+
}
|
102
|
+
|
103
|
+
static int ntz_n_words(long * x, int n_words){
|
104
|
+
int i;
|
105
|
+
int words = 0;
|
106
|
+
for(i = 0 ; x[i] == 0 && i < n_words ; i++){
|
107
|
+
words += 32;
|
108
|
+
}
|
109
|
+
return ntz(x[i]) + words;
|
110
|
+
}
|
111
|
+
|
112
|
+
long bit_mask[32] = {
|
113
|
+
0x1, 0x2, 0x4, 0x8,
|
114
|
+
0x10, 0x20, 0x40, 0x80,
|
115
|
+
0x100, 0x200, 0x400, 0x800,
|
116
|
+
0x1000, 0x2000, 0x4000, 0x8000,
|
117
|
+
0x10000, 0x20000, 0x40000, 0x80000,
|
118
|
+
0x100000, 0x200000, 0x400000, 0x800000,
|
119
|
+
0x1000000, 0x2000000, 0x4000000, 0x8000000,
|
120
|
+
0x10000000, 0x20000000, 0x40000000, 0x80000000,
|
121
|
+
};
|
122
|
+
|
123
|
+
long reverse_bit[32] = {
|
124
|
+
0xfffffffe,
|
125
|
+
0xfffffffd,
|
126
|
+
0xfffffffb,
|
127
|
+
0xfffffff7,
|
128
|
+
0xffffffef,
|
129
|
+
0xffffffdf,
|
130
|
+
0xffffffbf,
|
131
|
+
0xffffff7f,
|
132
|
+
0xfffffeff,
|
133
|
+
0xfffffdff,
|
134
|
+
0xfffffbff,
|
135
|
+
0xfffff7ff,
|
136
|
+
0xffffefff,
|
137
|
+
0xffffdfff,
|
138
|
+
0xffffbfff,
|
139
|
+
0xffff7fff,
|
140
|
+
0xfffeffff,
|
141
|
+
0xfffdffff,
|
142
|
+
0xfffbffff,
|
143
|
+
0xfff7ffff,
|
144
|
+
0xffefffff,
|
145
|
+
0xffdfffff,
|
146
|
+
0xffbfffff,
|
147
|
+
0xff7fffff,
|
148
|
+
0xfeffffff,
|
149
|
+
0xfdffffff,
|
150
|
+
0xfbffffff,
|
151
|
+
0xf7ffffff,
|
152
|
+
0xefffffff,
|
153
|
+
0xdfffffff,
|
154
|
+
0xbfffffff,
|
155
|
+
0x7fffffff,
|
156
|
+
};
|
157
|
+
|
158
|
+
//int matchN(ADJACENCY *adj_ptr, long *b, long *m, int pa, int pb)
|
159
|
+
static int matchN(const int * num_adj, long ** point, long *b, long *m, int pa, int pb)
|
160
|
+
{
|
161
|
+
long * mm;// current matrix
|
162
|
+
long f[1000];//which columns has been used at an intermediate state of computing
|
163
|
+
long h[100];// pb < 100 * 32
|
164
|
+
|
165
|
+
int d;// depth for matrix
|
166
|
+
int k;// width for matrix
|
167
|
+
int dd;// depth of matrix in refinement step
|
168
|
+
int kk;// width of matrix in refinement step
|
169
|
+
|
170
|
+
int i, j;//temp
|
171
|
+
long l;// temp
|
172
|
+
|
173
|
+
short vflag;//valid check flag
|
174
|
+
int n_words;// number of words needed for storing 'pb' bits.
|
175
|
+
long refine_mm;// pointer for mm(match matrix) used in refinment step.
|
176
|
+
|
177
|
+
d = k = 0;
|
178
|
+
// start back track
|
179
|
+
for(i = 0 ; i < (pb / 32 + 1) ; i++)
|
180
|
+
h[i] = 0;
|
181
|
+
for(i = 0 ; i < 10 ; i++)
|
182
|
+
f[i] = 0;
|
183
|
+
|
184
|
+
n_words = (pb - 1) / (sizeof(int) * 8) + 1;
|
185
|
+
|
186
|
+
/* show(b, pb, pb); */
|
187
|
+
/* show(m, pa, pb); */
|
188
|
+
|
189
|
+
if( d == 0 && k == 0){
|
190
|
+
k = ntz_n_words(m, n_words);
|
191
|
+
h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
|
192
|
+
}
|
193
|
+
while(k <= pb && d <= pa){
|
194
|
+
/* printf("d : %3d k : %3d n_words : %3d\n", d, k, n_words); */
|
195
|
+
if(d < 0){
|
196
|
+
printf("d < 0 return \n");
|
197
|
+
return FAIL;
|
198
|
+
}
|
199
|
+
|
200
|
+
// Idea for optimization :
|
201
|
+
// instead of using following equation, just (mm = mm + len) and (mm = mm - len).
|
202
|
+
mm = m + pa * (d + 1) * n_words;
|
203
|
+
/* printf("pa : %d d : %d k : %d n_words : %d hint : %d\n", pa, d, k, n_words, pa * (d + 1) * n_words); */
|
204
|
+
|
205
|
+
//printf("ntz : %d\n", ntz(mm));
|
206
|
+
//k = ntz(mm + d);
|
207
|
+
// set (k, d) bit '1', clear k-column and d-row '0'
|
208
|
+
/* printf("k : %d d: %d\n", k, d); */
|
209
|
+
for(j = 0 ; j < n_words ; j++){
|
210
|
+
if(j == (k / 32)){
|
211
|
+
for(i = 0 ; i < pa ; i++){
|
212
|
+
mm[i * n_words + j] = mm[(i - pa) * n_words + j] & reverse_bit[k - (k / 32) * 32];
|
213
|
+
}
|
214
|
+
mm[d * n_words + j] = bit_mask[k - (k / 32) * 32];
|
215
|
+
}else{
|
216
|
+
for(i = 0 ; i < pa ; i++){
|
217
|
+
mm[i * n_words + j] = mm[(i - pa) * n_words + j];
|
218
|
+
}
|
219
|
+
mm[d * n_words + j] = ZERO;
|
220
|
+
}
|
221
|
+
}
|
222
|
+
// BEGIN
|
223
|
+
/* show(mm, pa, pb); */
|
224
|
+
// END
|
225
|
+
|
226
|
+
// Refinement step
|
227
|
+
// Hot Spot!!
|
228
|
+
dd = kk = 0;
|
229
|
+
/* printf("before refinement step \n"); */
|
230
|
+
/* show(mm, pa, pb); */
|
231
|
+
|
232
|
+
while(dd != pa){
|
233
|
+
while(kk != pb){
|
234
|
+
//Idea for optimization :
|
235
|
+
//refine_mm should not updated 1 / 32 times.mm[dd + ((kk - 1) / 32)]
|
236
|
+
|
237
|
+
//Idea for optimization :
|
238
|
+
// when mm is sparse there may be better algorithm
|
239
|
+
// for searching '1' bit.
|
240
|
+
if(mm[dd * n_words + ((kk - 1) / 32)] & bit_mask[kk - ((kk - 1) / 32) * 32]){
|
241
|
+
// Following loop can be flattened
|
242
|
+
for(i = 0 ; i < num_adj[dd] ; i++){
|
243
|
+
l = 0;
|
244
|
+
for(j = 0 ; j < n_words ; j++){
|
245
|
+
l |= (b[kk * n_words + j] & mm[point[dd][i] * n_words + j]);
|
246
|
+
}
|
247
|
+
if(l == 0){
|
248
|
+
mm[dd * n_words + (kk / 32)] &= reverse_bit[kk - (kk / 32) * 32];//remove bit
|
249
|
+
/* break;//quit for loop */
|
250
|
+
}
|
251
|
+
}
|
252
|
+
|
253
|
+
}
|
254
|
+
kk++;
|
255
|
+
}
|
256
|
+
// Idea for optimization
|
257
|
+
// every 32 bit is tested here.
|
258
|
+
kk = 0;
|
259
|
+
dd++;
|
260
|
+
}
|
261
|
+
/* show(mm, pa, pb); */
|
262
|
+
|
263
|
+
//Checking whether match matrices are valid.
|
264
|
+
// Subgraph isomorphism can be checked here before reaching d == pa.
|
265
|
+
vflag = SUCCESS;
|
266
|
+
for(i = 0 ; i < pa ; i++){
|
267
|
+
l = 0;
|
268
|
+
for(j = 0 ; j < n_words ; j++){
|
269
|
+
l |= mm[i * n_words + j];
|
270
|
+
}
|
271
|
+
if(l == 0){
|
272
|
+
vflag = FAIL;
|
273
|
+
break;
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
if(vflag){// Success
|
278
|
+
f[d] = k;
|
279
|
+
k = 0;
|
280
|
+
while(h[k / 32] & bit_mask[k - (k / 32) * 32])
|
281
|
+
k++;
|
282
|
+
d++;
|
283
|
+
if(d == pa){
|
284
|
+
/* show(mm, pa, pb); */
|
285
|
+
//printf("FOUND! d : %d\n", d);
|
286
|
+
return SUCCESS;
|
287
|
+
}
|
288
|
+
else{
|
289
|
+
h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
|
290
|
+
}
|
291
|
+
}else{//Failed
|
292
|
+
h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
|
293
|
+
k++;
|
294
|
+
//printf("d : %d k : %d\n", d, k);
|
295
|
+
while((h[k / 32] & bit_mask[k - (k / 32) * 32] ||
|
296
|
+
(m[d * n_words + (k / 32)] & bit_mask[k - (k / 32) * 32] ) == 0) &&
|
297
|
+
k < pb)
|
298
|
+
k++;
|
299
|
+
/* printf("d : %d k : %d\n", d, k); */
|
300
|
+
while(k > pb){
|
301
|
+
if(d == 0){
|
302
|
+
return FAIL;
|
303
|
+
}
|
304
|
+
d--;
|
305
|
+
k = f[d];
|
306
|
+
h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
|
307
|
+
k++;
|
308
|
+
while(h[k / 32] & bit_mask[k - (k / 32) * 32])
|
309
|
+
k++;
|
310
|
+
}
|
311
|
+
h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
|
312
|
+
}
|
313
|
+
}
|
314
|
+
//printf("d : %d k : %d FAIL!\n", d, k);
|
315
|
+
return FAIL;
|
316
|
+
}
|
317
|
+
|
318
|
+
static void set_adjacency(int * num_adj, long ** point, long * adj, VALUE ret){
|
319
|
+
int i, j, n_words;
|
320
|
+
int off_set = 0;
|
321
|
+
|
322
|
+
n_words = (RARRAY(ret)->len - 1) / (sizeof(int) * 8) + 1;
|
323
|
+
|
324
|
+
for(i = 0 ; i < RARRAY(ret)->len ; i++){
|
325
|
+
num_adj[i] = FIX2INT(rb_funcall(RARRAY(ret)->ptr[i], rb_intern("length"), 0));
|
326
|
+
point[i] = adj + off_set;
|
327
|
+
for(j = 0 ; j < RARRAY(RARRAY(ret)->ptr[i])->len ; j++){
|
328
|
+
adj[off_set++] = FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]);
|
329
|
+
//printf(" %d ", FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]));
|
330
|
+
}
|
331
|
+
//printf("\n");
|
332
|
+
}
|
333
|
+
}
|
334
|
+
|
335
|
+
static VALUE subcomp_match_by_ullmann(VALUE self, VALUE a_matrix, VALUE pa, VALUE other_adj, VALUE pb, VALUE match){
|
336
|
+
// variables for adjacency list of graph A
|
337
|
+
int num_adj[1000];
|
338
|
+
long * point[1000];
|
339
|
+
long adj[3000];//adjacency list
|
340
|
+
|
341
|
+
// match matrix; = pa * (n_words * pa)
|
342
|
+
long * mm;//[800000];
|
343
|
+
long * m;
|
344
|
+
|
345
|
+
//temporary variables
|
346
|
+
int i;
|
347
|
+
int result;
|
348
|
+
VALUE mapping;
|
349
|
+
|
350
|
+
int n_pb, n_pa;
|
351
|
+
int n_words;
|
352
|
+
int sizeof_mm;
|
353
|
+
|
354
|
+
n_pb = NUM2INT(pb);
|
355
|
+
n_pa = NUM2INT(pa);
|
356
|
+
|
357
|
+
if(n_pb > n_pa){
|
358
|
+
return Qfalse;
|
359
|
+
}
|
360
|
+
|
361
|
+
sizeof_mm = n_pa * (n_pb + 1) * n_words;
|
362
|
+
|
363
|
+
n_words = (n_pa - 1) / (sizeof(int) * 8) + 1;
|
364
|
+
|
365
|
+
mm = (long * )malloc(sizeof(long) * 800000);
|
366
|
+
if(RSTRING(match)->len > 800000 * sizeof(long))
|
367
|
+
rb_raise(rb_eArgError, "Length of match matrix too short! %d", sizeof(mm));
|
368
|
+
|
369
|
+
memcpy(mm, (long *)RSTRING(match)->ptr, RSTRING(match)->len); // BUG!!
|
370
|
+
|
371
|
+
Check_Type(a_matrix, T_STRING);
|
372
|
+
|
373
|
+
set_adjacency(num_adj, point, adj, other_adj);
|
374
|
+
|
375
|
+
//show(mm, n_pa, n_pb);
|
376
|
+
//show((long *)RSTRING(a_matrix)->ptr, n_pa, n_pa);
|
377
|
+
|
378
|
+
result = matchN(num_adj, point, (long *)RSTRING(a_matrix)->ptr, mm, n_pb, n_pa);
|
379
|
+
|
380
|
+
if(result == 1){//?
|
381
|
+
mapping = rb_ary_new();
|
382
|
+
//printf("n_words : %d n_pa : %d n_pb : %d n_words * n_pa * n_pa : %d", n_words, n_pa, n_pb, n_words * n_pa * n_pa);
|
383
|
+
//show(mm + n_words * n_pb * n_pb, n_pb, n_pa);
|
384
|
+
|
385
|
+
for(i = 0 ; i < n_pb ; i++){
|
386
|
+
rb_ary_push(mapping, INT2FIX(ntz_m(mm + n_words * n_pb * n_pb + i * n_words, n_pa)));
|
387
|
+
}
|
388
|
+
return mapping;
|
389
|
+
}
|
390
|
+
return Qfalse;
|
391
|
+
}
|
392
|
+
|
393
|
+
// DataBase for substructure search
|
394
|
+
|
395
|
+
struct dbmdata {
|
396
|
+
int di_size;
|
397
|
+
};
|
398
|
+
|
399
|
+
static VALUE sdb_s_search(VALUE dbname){
|
400
|
+
rb_p(dbname);
|
401
|
+
}
|
402
|
+
|
403
|
+
Init_subcomp(){
|
404
|
+
VALUE subcomp_cGraph;
|
405
|
+
VALUE subcomp_cSubGraphDB;
|
406
|
+
|
407
|
+
subcomp_cGraph = rb_define_module("Graph");
|
408
|
+
rb_define_method(subcomp_cGraph, "subcomp_match_by_ullmann", subcomp_match_by_ullmann, 5);
|
409
|
+
|
410
|
+
subcomp_cSubGraphDB = rb_define_class_under(subcomp_cGraph, "SubGraphDB", rb_cObject);
|
411
|
+
|
412
|
+
rb_define_method(subcomp_cSubGraphDB, "open_for_search", sdb_s_search, 0);
|
413
|
+
|
414
|
+
rb_define_singleton_method(subcomp_cSubGraphDB, "show", subcomp_show, 3);
|
415
|
+
rb_define_singleton_method(subcomp_cSubGraphDB, "match", subcomp_match_by_ullmann, 5);
|
416
|
+
}
|