chemruby 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +120 -0
- data/Rakefile +195 -0
- data/ext/extconf.rb +4 -0
- data/ext/subcomp.c +416 -0
- data/lib/chem.rb +130 -0
- data/lib/chem/appl.rb +1 -0
- data/lib/chem/appl/chem3dole.rb +36 -0
- data/lib/chem/appl/tinker/nucleic.rb +40 -0
- data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
- data/lib/chem/data.rb +4 -0
- data/lib/chem/data/atomic_weight.rb +124 -0
- data/lib/chem/data/character.rb +2 -0
- data/lib/chem/data/electronegativity.rb +14 -0
- data/lib/chem/data/periodic_table.rb +6 -0
- data/lib/chem/data/prime_numbers.rb +1 -0
- data/lib/chem/data/vdw_radii.rb +1 -0
- data/lib/chem/db.rb +64 -0
- data/lib/chem/db/cansmi.rb +234 -0
- data/lib/chem/db/cdx.rb +1525 -0
- data/lib/chem/db/eps.rb +164 -0
- data/lib/chem/db/g98.rb +909 -0
- data/lib/chem/db/gspan.rb +130 -0
- data/lib/chem/db/iupac.rb +5 -0
- data/lib/chem/db/iupac/a_1.rb +46 -0
- data/lib/chem/db/iupac/iuparser.rb +226 -0
- data/lib/chem/db/iupac/iuparser.ry +97 -0
- data/lib/chem/db/iupac/postfix.rb +2 -0
- data/lib/chem/db/kcf.rb +390 -0
- data/lib/chem/db/kcf_glycan.rb +19 -0
- data/lib/chem/db/kegg.rb +516 -0
- data/lib/chem/db/linucs/linparser.rb +144 -0
- data/lib/chem/db/linucs/linucs.ry +53 -0
- data/lib/chem/db/mdl.rb +379 -0
- data/lib/chem/db/molconnz.rb +12 -0
- data/lib/chem/db/mopac.rb +88 -0
- data/lib/chem/db/msi.rb +107 -0
- data/lib/chem/db/pdb_dic.rb +115 -0
- data/lib/chem/db/pdf.rb +131 -0
- data/lib/chem/db/pubchem.rb +113 -0
- data/lib/chem/db/rmagick.rb +70 -0
- data/lib/chem/db/sdf.rb +37 -0
- data/lib/chem/db/smbl.rb +88 -0
- data/lib/chem/db/smiles.rb +2 -0
- data/lib/chem/db/smiles/smiles.ry +203 -0
- data/lib/chem/db/smiles/smiparser.rb +375 -0
- data/lib/chem/db/swf.rb +74 -0
- data/lib/chem/db/sybyl.rb +150 -0
- data/lib/chem/db/tinker.rb +77 -0
- data/lib/chem/db/types/type_cansmi.rb +9 -0
- data/lib/chem/db/types/type_cdx.rb +24 -0
- data/lib/chem/db/types/type_gspan.rb +31 -0
- data/lib/chem/db/types/type_kcf.rb +28 -0
- data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
- data/lib/chem/db/types/type_kegg.rb +92 -0
- data/lib/chem/db/types/type_mdl.rb +31 -0
- data/lib/chem/db/types/type_pdf.rb +33 -0
- data/lib/chem/db/types/type_png.rb +31 -0
- data/lib/chem/db/types/type_rxn.rb +25 -0
- data/lib/chem/db/types/type_sdf.rb +25 -0
- data/lib/chem/db/types/type_sybyl.rb +30 -0
- data/lib/chem/db/types/type_xyz.rb +26 -0
- data/lib/chem/db/vector.rb +128 -0
- data/lib/chem/db/xyz.rb +39 -0
- data/lib/chem/model.rb +119 -0
- data/lib/chem/model/skeleton.rb +37 -0
- data/lib/chem/utils.rb +11 -0
- data/lib/chem/utils/geometry.rb +27 -0
- data/lib/chem/utils/graph_db.rb +146 -0
- data/lib/chem/utils/math.rb +17 -0
- data/lib/chem/utils/prop.rb +123 -0
- data/lib/chem/utils/sssr.rb +101 -0
- data/lib/chem/utils/sub.rb +78 -0
- data/lib/chem/utils/transform.rb +110 -0
- data/lib/chem/utils/traverse.rb +37 -0
- data/lib/chem/utils/ullmann.rb +134 -0
- data/lib/graph.rb +41 -0
- data/lib/graph/cluster.rb +20 -0
- data/lib/graph/morgan.rb +38 -0
- data/sample/frequent_subgraph.rb +46 -0
- data/sample/images/ex1.rb +11 -0
- data/sample/images/ex2.rb +4 -0
- data/sample/images/ex3.rb +5 -0
- data/sample/images/ex4.rb +17 -0
- data/sample/images/ex5.rb +10 -0
- data/sample/images/mol/adenine.mol +26 -0
- data/sample/images/mol/atp.mol +69 -0
- data/sample/images/temp/ex5.mol +344 -0
- data/sample/kegg_db.rb +116 -0
- data/setup.rb +1551 -0
- data/test/all.rb +6 -0
- data/test/coord_test.rb +17 -0
- data/test/ctab_test.rb +31 -0
- data/test/data/A_21.tar.gz +0 -0
- data/test/data/A_21/aceanthrylene.cdx +0 -0
- data/test/data/A_21/aceanthrylene.mol +40 -0
- data/test/data/A_21/acenaphthylene.cdx +0 -0
- data/test/data/A_21/acenaphthylene.mol +31 -0
- data/test/data/A_21/acephenanthrylene.cdx +0 -0
- data/test/data/A_21/acephenanthrylene.mol +40 -0
- data/test/data/A_21/anthracene.cdx +0 -0
- data/test/data/A_21/anthracene.mol +35 -0
- data/test/data/A_21/as-indacene.cdx +0 -0
- data/test/data/A_21/as-indacene.mol +31 -0
- data/test/data/A_21/azulene.cdx +0 -0
- data/test/data/A_21/azulene.mol +26 -0
- data/test/data/A_21/biphenylene.cdx +0 -0
- data/test/data/A_21/biphenylene.mol +31 -0
- data/test/data/A_21/chrysene.cdx +0 -0
- data/test/data/A_21/chrysene.mol +44 -0
- data/test/data/A_21/coronen.cdx +0 -0
- data/test/data/A_21/coronen.mol +59 -0
- data/test/data/A_21/fluoranthene.cdx +0 -0
- data/test/data/A_21/fluoranthene.mol +40 -0
- data/test/data/A_21/fluorene.cdx +0 -0
- data/test/data/A_21/fluorene.mol +33 -0
- data/test/data/A_21/heptacene.cdx +0 -0
- data/test/data/A_21/heptacene.mol +71 -0
- data/test/data/A_21/heptalene.cdx +0 -0
- data/test/data/A_21/heptalene.mol +30 -0
- data/test/data/A_21/heptaphene.cdx +0 -0
- data/test/data/A_21/heptaphene.mol +71 -0
- data/test/data/A_21/hexacene.cdx +0 -0
- data/test/data/A_21/hexacene.mol +62 -0
- data/test/data/A_21/hexaphene.cdx +0 -0
- data/test/data/A_21/hexaphene.mol +62 -0
- data/test/data/A_21/indene.cdx +0 -0
- data/test/data/A_21/indene.mol +24 -0
- data/test/data/A_21/iupac.txt +41 -0
- data/test/data/A_21/naphthacene.cdx +0 -0
- data/test/data/A_21/naphthacene.mol +44 -0
- data/test/data/A_21/naphthalene.cdx +0 -0
- data/test/data/A_21/naphthalene.mol +26 -0
- data/test/data/A_21/ovalene.cdx +0 -0
- data/test/data/A_21/ovalene.mol +78 -0
- data/test/data/A_21/pentacene.cdx +0 -0
- data/test/data/A_21/pentacene.mol +53 -0
- data/test/data/A_21/pentalene.cdx +0 -0
- data/test/data/A_21/pentalene.mol +22 -0
- data/test/data/A_21/pentaphene.cdx +0 -0
- data/test/data/A_21/pentaphene.mol +53 -0
- data/test/data/A_21/perylene.cdx +0 -0
- data/test/data/A_21/perylene.mol +49 -0
- data/test/data/A_21/phenalene.cdx +0 -0
- data/test/data/A_21/phenalene.mol +33 -0
- data/test/data/A_21/phenanthrene.cdx +0 -0
- data/test/data/A_21/phenanthrene.mol +35 -0
- data/test/data/A_21/picene.cdx +0 -0
- data/test/data/A_21/picene.mol +53 -0
- data/test/data/A_21/pleiadene.cdx +0 -0
- data/test/data/A_21/pleiadene.mol +44 -0
- data/test/data/A_21/pyranthrene.cdx +0 -0
- data/test/data/A_21/pyranthrene.mol +72 -0
- data/test/data/A_21/pyrene.cdx +0 -0
- data/test/data/A_21/pyrene.mol +40 -0
- data/test/data/A_21/rubicene.cdx +0 -0
- data/test/data/A_21/rubicene.mol +63 -0
- data/test/data/A_21/s-indacene.cdx +0 -0
- data/test/data/A_21/s-indacene.mol +31 -0
- data/test/data/A_21/tetraphenylene.cdx +0 -0
- data/test/data/A_21/tetraphenylene.mol +57 -0
- data/test/data/A_21/trinaphthylene.cdx +0 -0
- data/test/data/A_21/trinaphthylene.mol +71 -0
- data/test/data/A_21/triphenylene.cdx +0 -0
- data/test/data/A_21/triphenylene.mol +44 -0
- data/test/data/C00147.kcf +25 -0
- data/test/data/G00147.kcf +13 -0
- data/test/data/atp.mol +69 -0
- data/test/data/cyclohexane.mol +17 -0
- data/test/data/cyclohexane.ps +485 -0
- data/test/data/fullerene.mol +155 -0
- data/test/data/glycan +33 -0
- data/test/data/hypericin.cdx +0 -0
- data/test/data/hypericin.cdxml +596 -0
- data/test/data/hypericin.chm +0 -0
- data/test/data/hypericin.ct +85 -0
- data/test/data/hypericin.f1d +0 -0
- data/test/data/hypericin.f1q +0 -0
- data/test/data/hypericin.gif +0 -0
- data/test/data/hypericin.mol +88 -0
- data/test/data/hypericin.mol2 +159 -0
- data/test/data/hypericin.msm +123 -0
- data/test/data/hypericin.pdf +359 -0
- data/test/data/hypericin.png +0 -0
- data/test/data/hypericin.ps +0 -0
- data/test/data/hypericin.skc +0 -0
- data/test/data/hypericin2.gif +0 -0
- data/test/data/hypericin2.ps +0 -0
- data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
- data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
- data/test/data/kegg/ligand/mol/C00147.mol +26 -0
- data/test/data/kegg/ligand/reaction +14 -0
- data/test/data/kegg/ligand/reaction.lst +1 -0
- data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
- data/test/data/reaction +14 -0
- data/test/data/reaction.lst +1 -0
- data/test/data/reaction_mapformula.lst +3 -0
- data/test/data/rxn/C00001.mol +6 -0
- data/test/data/rxn/C00011.mol +10 -0
- data/test/data/rxn/C00014.mol +6 -0
- data/test/data/rxn/C01010.mol +18 -0
- data/test/data/rxn/sample.rxn +50 -0
- data/test/data/rxn/substitution.rxn +45 -0
- data/test/data/test.eps +0 -0
- data/test/data/test.mol +28 -0
- data/test/data/test.sdf +143 -0
- data/test/data/test.skc +0 -0
- data/test/data/test.xyz +4 -0
- data/test/data/test_lf.sdf +143 -0
- data/test/heavy_test_pubchem.rb +16 -0
- data/test/multiple_test.rb +22 -0
- data/test/test_adj.rb +54 -0
- data/test/test_canonical_smiles.rb +46 -0
- data/test/test_cdx.rb +32 -0
- data/test/test_chem.rb +18 -0
- data/test/test_cluster.rb +19 -0
- data/test/test_db.rb +11 -0
- data/test/test_eps.rb +24 -0
- data/test/test_geometry.rb +11 -0
- data/test/test_gspan.rb +28 -0
- data/test/test_iupac.rb +36 -0
- data/test/test_kcf.rb +24 -0
- data/test/test_kcf_glycan.rb +10 -0
- data/test/test_kegg.rb +118 -0
- data/test/test_linucs.rb +21 -0
- data/test/test_mdl.rb +45 -0
- data/test/test_mol2.rb +62 -0
- data/test/test_morgan.rb +21 -0
- data/test/test_pdf.rb +12 -0
- data/test/test_prop.rb +86 -0
- data/test/test_rmagick.rb +15 -0
- data/test/test_sbdb.rb +23 -0
- data/test/test_sdf.rb +30 -0
- data/test/test_smiles.rb +84 -0
- data/test/test_sssr.rb +18 -0
- data/test/test_sub.rb +47 -0
- data/test/test_subcomp.rb +37 -0
- data/test/test_traverse.rb +29 -0
- data/test/test_writer.rb +13 -0
- data/test/test_xyz.rb +15 -0
- data/test/type_test.rb +25 -0
- metadata +290 -0
data/README
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
= ChemRuby - Cheminformatics Ruby
|
2
|
+
|
3
|
+
ChemRuby is a framework for developing cheminformatics applications in Ruby.
|
4
|
+
It will let you retrieve chemical information from variety of data sources
|
5
|
+
in various formats (such as MDL mol, SMILES etc.), fast substructure search
|
6
|
+
based on graph theory, draw a chemical structure in various graphics formats
|
7
|
+
(such as PDF, PNG etc.), and calculate a number of chemical properties.
|
8
|
+
|
9
|
+
== FOR MORE INFORMATION
|
10
|
+
|
11
|
+
ChemRuby's official website is at ((<URL:http://www.chemruby.org/>)).
|
12
|
+
You will find links to the related resouces including downloads,
|
13
|
+
Wiki documentations etc. in the top page.
|
14
|
+
|
15
|
+
* ((<URL:http://www.chemruby.org/>))
|
16
|
+
|
17
|
+
|
18
|
+
== WHERE TO OBTAIN
|
19
|
+
|
20
|
+
--- WWW
|
21
|
+
|
22
|
+
The releases can be obtained at ChemRuby website.
|
23
|
+
|
24
|
+
* ((<URL:http://www.chemruby.org/>))
|
25
|
+
|
26
|
+
--- RubyGems
|
27
|
+
|
28
|
+
((<RubyGems|URL:http://rubyforge.org/projects/rubygems/>)) version of
|
29
|
+
the ChemRuby package is also available for easy installation.
|
30
|
+
|
31
|
+
* ((<URL:http://rubyforge.org/projects/chemruby/>))
|
32
|
+
|
33
|
+
|
34
|
+
== REQUIREMENTS
|
35
|
+
|
36
|
+
* Ruby 1.8.2 or later -- ((<URL:http://www.ruby-lang.org/>))
|
37
|
+
|
38
|
+
|
39
|
+
== OPTIONAL REQUIREMENTS
|
40
|
+
|
41
|
+
Some optional libraries can be utilized to extend ChemRuby's functionality.
|
42
|
+
If your needs meets the following conditions, install them from the "Ruby
|
43
|
+
Application Archive" at ((<URL:http://raa.ruby-lang.org/>)).
|
44
|
+
|
45
|
+
For outputting png and jpeg images:
|
46
|
+
|
47
|
+
* [RAA:rmagick]
|
48
|
+
|
49
|
+
For testing and developing ChemRuby:
|
50
|
+
|
51
|
+
* [RAA:rake]
|
52
|
+
|
53
|
+
== INSTALL
|
54
|
+
|
55
|
+
In the chemruby source directory (such as chemruby-x.x.x/), run install.rb
|
56
|
+
as follows:
|
57
|
+
|
58
|
+
% ruby setup.rb config
|
59
|
+
% ruby setup.rb setup
|
60
|
+
% su
|
61
|
+
# ruby setup.rb install
|
62
|
+
|
63
|
+
If your operating system supports 'sudo' command (such as Mac OS X),
|
64
|
+
try the following procedure instead of the above.
|
65
|
+
|
66
|
+
% ruby setup.rb config
|
67
|
+
% ruby setup.rb setup
|
68
|
+
% sudo ruby setup.rb install
|
69
|
+
|
70
|
+
You can run tests by
|
71
|
+
|
72
|
+
% rake test
|
73
|
+
|
74
|
+
and run
|
75
|
+
|
76
|
+
% rake
|
77
|
+
|
78
|
+
for more details.
|
79
|
+
|
80
|
+
== USAGE
|
81
|
+
|
82
|
+
You can load all ChemRuby classes just by requiring 'chem.rb'. All the
|
83
|
+
ChemRuby classes and modules are located under the module name 'Chem' to
|
84
|
+
separate the name space.
|
85
|
+
|
86
|
+
#!/usr/bin/env ruby
|
87
|
+
require 'chem'
|
88
|
+
|
89
|
+
--- RubyGems
|
90
|
+
|
91
|
+
In RubyGems, you need to load 'rubygems' library before using 'chem'.
|
92
|
+
|
93
|
+
#!/usr/bin/env ruby
|
94
|
+
require 'rubygems'
|
95
|
+
require_gem 'chem'
|
96
|
+
|
97
|
+
== Credits
|
98
|
+
|
99
|
+
* GOTO Naohisa, KATAYAMA Toshiaki and NAKAO Mitsuteru (alphabetical order)
|
100
|
+
who are developers of BioRuby, led the design of ChemRuby.
|
101
|
+
|
102
|
+
* gSpan parser and PubChem search from KADOWAKI masashi.
|
103
|
+
|
104
|
+
== LICENSE
|
105
|
+
|
106
|
+
ChemRuby can be freely distributed under the Ruby's license.
|
107
|
+
Note that, setup.rb included in the ChemRuby package comes from
|
108
|
+
<RAA:setup> developed by Minero Aoki.
|
109
|
+
|
110
|
+
License of This README file can be also distributed under the Ruby's license.
|
111
|
+
|
112
|
+
Copyright (C) 2006 TANAKA Nobuya <tanaka@chemruby.org>
|
113
|
+
KATAYAMA Toshiaki <k@bioruby.org>
|
114
|
+
|
115
|
+
== CONTACT
|
116
|
+
|
117
|
+
Current staffs of the ChemRuby project can be reached by sending e-mail
|
118
|
+
to <staff@chemruby.org>.
|
119
|
+
|
120
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
#
|
2
|
+
# Rakefile
|
3
|
+
#
|
4
|
+
# See http://docs.rubyrake.org/ to see how to use ``rake'' command.
|
5
|
+
#
|
6
|
+
# $Id: Rakefile 61 2005-10-12 09:17:39Z tanaka $
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'rake/clean'
|
10
|
+
require 'rake/testtask'
|
11
|
+
require 'rake/gempackagetask'
|
12
|
+
|
13
|
+
task :default => [:help]
|
14
|
+
|
15
|
+
PKG_VERSION = "0.9.3"
|
16
|
+
PKG_BUILD = "RC1"
|
17
|
+
|
18
|
+
PKG_FILES = FileList[
|
19
|
+
"Rakefile", "README", #"ChangeLog", "Releases", "TODO",
|
20
|
+
"setup.rb",
|
21
|
+
# "post-install.rb",
|
22
|
+
# "bin/*",
|
23
|
+
# "doc/*.css", "doc/*.rb",
|
24
|
+
# "examples/**/*",
|
25
|
+
# "gemspecs/**/*",
|
26
|
+
"lib/**/*.rb",
|
27
|
+
"lib/**/*.ry",
|
28
|
+
"test/**/*",
|
29
|
+
"sample/**/*.rb",
|
30
|
+
"sample/**/*.mol",
|
31
|
+
"ext/**/*.h",
|
32
|
+
"ext/**/*.c",
|
33
|
+
"ext/**/*.rb",
|
34
|
+
# "pkgs/**/*",
|
35
|
+
# "redist/*.gem",
|
36
|
+
# "scripts/*.rb",
|
37
|
+
# "test/**/*"
|
38
|
+
]
|
39
|
+
|
40
|
+
task :help do |t|
|
41
|
+
puts <<EOL
|
42
|
+
|
43
|
+
ChemRuby #{PKG_VERSION}
|
44
|
+
|
45
|
+
To install ChemRuby, you need at least
|
46
|
+
|
47
|
+
* ruby-1.8.2 (or later)
|
48
|
+
* Ruby header files (included in original Ruby)
|
49
|
+
* C language compilers (such as gcc)
|
50
|
+
|
51
|
+
If the following modules are installed, ChemRuby will use it.
|
52
|
+
You can install them later.
|
53
|
+
|
54
|
+
* RMagick ( You will find how to install them in http://www.chemruby.org)
|
55
|
+
|
56
|
+
== Compiling and Installing
|
57
|
+
|
58
|
+
% rake compile
|
59
|
+
% sudo rake install
|
60
|
+
|
61
|
+
or just
|
62
|
+
|
63
|
+
% sudo ruby setup.rb
|
64
|
+
|
65
|
+
== Compiling RDOC
|
66
|
+
|
67
|
+
% rake doc
|
68
|
+
|
69
|
+
== Test
|
70
|
+
|
71
|
+
% rake test
|
72
|
+
|
73
|
+
You will need RMagick and other libraries to pass all the tests.
|
74
|
+
|
75
|
+
EOL
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
task :doc do |t|
|
80
|
+
system "rdoc --main README ./lib README"
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
task :dev => [:test]
|
85
|
+
Rake::TestTask.new(:dev) do |t|
|
86
|
+
t.libs << File.join('ext')
|
87
|
+
t.libs << File.join('lib')
|
88
|
+
t.libs << File.join('dev/lib')
|
89
|
+
t.libs << File.join('dev/ext')
|
90
|
+
# cd 'dev/ext/chem/db/inchi/' do
|
91
|
+
# ruby %{extconf.rb}
|
92
|
+
# sh "make"
|
93
|
+
# end
|
94
|
+
t.test_files = FileList['dev/test/test*.rb']
|
95
|
+
end
|
96
|
+
|
97
|
+
task :test => [:compile]
|
98
|
+
Rake::TestTask.new(:test) do |t|
|
99
|
+
t.libs << File.join('ext')
|
100
|
+
t.libs << File.join('lib')
|
101
|
+
t.test_files = FileList['test/test*.rb']
|
102
|
+
end
|
103
|
+
|
104
|
+
task :light => [:compile]
|
105
|
+
Rake::TestTask.new(:light) do |t|
|
106
|
+
t.libs << File.join('ext')
|
107
|
+
t.libs << File.join('lib')
|
108
|
+
t.test_files = FileList['test/test_subcomp.rb']
|
109
|
+
#'test/test_kegg.rb'# 'test/test_kcf_glycan.rb' #FileList['test/test_canonical_smiles.rb']
|
110
|
+
end
|
111
|
+
|
112
|
+
task :rm do
|
113
|
+
system "rm -rf /usr/local/lib/site_ruby/1.8/chem"
|
114
|
+
system "rm /usr/local/lib/site_ruby/1.8/chem.rb"
|
115
|
+
system "rm -rf /usr/local/lib/site_ruby/1.8/i386-linux/chem"
|
116
|
+
end
|
117
|
+
|
118
|
+
desc "Prepares for installation"
|
119
|
+
task :prepare do
|
120
|
+
ruby "setup.rb config"
|
121
|
+
ruby "setup.rb setup"
|
122
|
+
end
|
123
|
+
|
124
|
+
desc "Installing library"
|
125
|
+
task :install => [:compile, :prepare] do
|
126
|
+
ruby "setup.rb install"
|
127
|
+
end
|
128
|
+
|
129
|
+
task :heavy => [:test]
|
130
|
+
Rake::TestTask.new(:heavy) do |t|
|
131
|
+
t.libs << File.join('ext')
|
132
|
+
t.libs << File.join('lib')
|
133
|
+
t.test_files = FileList['test/heavy_test*.rb']
|
134
|
+
end
|
135
|
+
|
136
|
+
task :clean do
|
137
|
+
cd "ext/" do
|
138
|
+
Dir.glob("*.o").each do |file|
|
139
|
+
rm file
|
140
|
+
end
|
141
|
+
Dir.glob("*.bundle").each do |file|
|
142
|
+
rm file
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# BUG!? Need code for testing if racc exist !?
|
148
|
+
file 'lib/chem/db/smiles/smiparser.rb' => ['lib/chem/db/smiles/smiles.ry'] do
|
149
|
+
cd 'lib/chem/db/smiles/' do
|
150
|
+
sh "racc smiles.ry -o smiparser.rb"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
file 'lib/chem/db/iupac/iuparser.rb' => ['lib/chem/db/iupac/iuparser.ry'] do
|
155
|
+
cd 'lib/chem/db/iupac/' do
|
156
|
+
sh "racc iuparser.ry -o iuparser.rb"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
file 'lib/chem/db/linucs/linparser.rb' => ['lib/chem/db/linucs/linucs.ry'] do
|
161
|
+
cd 'lib/chem/db/linucs/' do
|
162
|
+
sh "racc linucs.ry -o linparser.rb"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
file 'ext/Makefile' => ['ext/extconf.rb', 'ext/subcomp.c'] do
|
167
|
+
cd 'ext/' do
|
168
|
+
ruby %{extconf.rb}
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
file "ext/subcomp.#{Config::CONFIG["DLEXT"]}" => ['ext/subcomp.c', 'ext/Makefile'] do
|
173
|
+
cd 'ext/' do
|
174
|
+
sh "make"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
desc "Compiling library"
|
180
|
+
task :compile => ['lib/chem/db/smiles/smiparser.rb', 'lib/chem/db/iupac/iuparser.rb', 'lib/chem/db/linucs/linparser.rb', "ext/subcomp.#{Config::CONFIG["DLEXT"]}"]
|
181
|
+
|
182
|
+
spec = Gem::Specification.new do |s|
|
183
|
+
s.name = 'chemruby'
|
184
|
+
s.version = PKG_VERSION
|
185
|
+
s.require_path = 'lib'
|
186
|
+
s.autorequire = 'chem'
|
187
|
+
s.files = PKG_FILES
|
188
|
+
s.extensions << 'ext/extconf.rb'
|
189
|
+
s.summary = "A framework program for cheminformatics"
|
190
|
+
end
|
191
|
+
|
192
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
193
|
+
pkg.need_tar = true
|
194
|
+
pkg.package_files += PKG_FILES
|
195
|
+
end
|
data/ext/extconf.rb
ADDED
data/ext/subcomp.c
ADDED
@@ -0,0 +1,416 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
|
3
|
+
subcomp.c -
|
4
|
+
|
5
|
+
$Author: nobyt $
|
6
|
+
|
7
|
+
Copyright (C) 2004-2006 Nobuya Tanaka
|
8
|
+
|
9
|
+
**********************************************************************/
|
10
|
+
|
11
|
+
#define FULL 0xffffffff
|
12
|
+
#define ZERO 0x0
|
13
|
+
|
14
|
+
#define FAIL 0;
|
15
|
+
#define SUCCESS 1;
|
16
|
+
|
17
|
+
#include <ruby.h>
|
18
|
+
|
19
|
+
static void
|
20
|
+
show(long *m, int pa, int pb)
|
21
|
+
{
|
22
|
+
int i, j, k;
|
23
|
+
static int count = 0;
|
24
|
+
int n_words;
|
25
|
+
|
26
|
+
n_words = (pb - 1) / (sizeof(int) * 8) + 1;
|
27
|
+
|
28
|
+
//printf("count : %3d\n", count++);
|
29
|
+
|
30
|
+
printf("\n ");
|
31
|
+
for(i = 0 ; i < pb ; i++){
|
32
|
+
printf("%d", i % 10);
|
33
|
+
}
|
34
|
+
printf("\n");
|
35
|
+
for(i = 0 ; i < pa * n_words ; i += n_words){
|
36
|
+
printf("%d ", (i / n_words) % 10);
|
37
|
+
for(k = 0 ; k < n_words ; k++){
|
38
|
+
for(j = k * 32 ; j < ((k + 1) * 32 < pb ? (k + 1) * 32 : pb) ; j++){
|
39
|
+
if(m[i + k ] & (1 << (j - k * 32)))
|
40
|
+
printf("@");
|
41
|
+
else
|
42
|
+
printf(".");
|
43
|
+
}
|
44
|
+
//printf(" ");
|
45
|
+
}
|
46
|
+
printf("\n");
|
47
|
+
}
|
48
|
+
printf("\n");
|
49
|
+
}
|
50
|
+
|
51
|
+
/*
|
52
|
+
* call-seq:
|
53
|
+
* SubGraphDB.show -> print out adjacency matrix
|
54
|
+
*
|
55
|
+
* This function is mainly for debug.
|
56
|
+
*/
|
57
|
+
|
58
|
+
static VALUE
|
59
|
+
subcomp_show(VALUE self, VALUE str, VALUE pa, VALUE pb)
|
60
|
+
{
|
61
|
+
printf("subcomp_show called %3d %3d\n", FIX2INT(pa), FIX2INT(pb));
|
62
|
+
show((long * )RSTRING(str)->ptr, FIX2INT(pa), FIX2INT(pb));
|
63
|
+
return Qnil;
|
64
|
+
}
|
65
|
+
|
66
|
+
|
67
|
+
/*
|
68
|
+
* returns number of trailing zero of m-bit
|
69
|
+
*/
|
70
|
+
static int ntz_m(long *y, int pb){
|
71
|
+
int i = 0;
|
72
|
+
int n;
|
73
|
+
long x;
|
74
|
+
|
75
|
+
n = 1;
|
76
|
+
|
77
|
+
while(i < pb && y[i] == 0){
|
78
|
+
n += 32;
|
79
|
+
i++;
|
80
|
+
}
|
81
|
+
|
82
|
+
x = y[i];
|
83
|
+
|
84
|
+
if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
|
85
|
+
if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
|
86
|
+
if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
|
87
|
+
if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
|
88
|
+
return n - (x & 1);
|
89
|
+
}
|
90
|
+
|
91
|
+
static int ntz(long x){
|
92
|
+
int n;
|
93
|
+
|
94
|
+
if (x == 0) return (32);
|
95
|
+
n = 1;
|
96
|
+
if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
|
97
|
+
if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
|
98
|
+
if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
|
99
|
+
if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
|
100
|
+
return n - (x & 1);
|
101
|
+
}
|
102
|
+
|
103
|
+
static int ntz_n_words(long * x, int n_words){
|
104
|
+
int i;
|
105
|
+
int words = 0;
|
106
|
+
for(i = 0 ; x[i] == 0 && i < n_words ; i++){
|
107
|
+
words += 32;
|
108
|
+
}
|
109
|
+
return ntz(x[i]) + words;
|
110
|
+
}
|
111
|
+
|
112
|
+
long bit_mask[32] = {
|
113
|
+
0x1, 0x2, 0x4, 0x8,
|
114
|
+
0x10, 0x20, 0x40, 0x80,
|
115
|
+
0x100, 0x200, 0x400, 0x800,
|
116
|
+
0x1000, 0x2000, 0x4000, 0x8000,
|
117
|
+
0x10000, 0x20000, 0x40000, 0x80000,
|
118
|
+
0x100000, 0x200000, 0x400000, 0x800000,
|
119
|
+
0x1000000, 0x2000000, 0x4000000, 0x8000000,
|
120
|
+
0x10000000, 0x20000000, 0x40000000, 0x80000000,
|
121
|
+
};
|
122
|
+
|
123
|
+
long reverse_bit[32] = {
|
124
|
+
0xfffffffe,
|
125
|
+
0xfffffffd,
|
126
|
+
0xfffffffb,
|
127
|
+
0xfffffff7,
|
128
|
+
0xffffffef,
|
129
|
+
0xffffffdf,
|
130
|
+
0xffffffbf,
|
131
|
+
0xffffff7f,
|
132
|
+
0xfffffeff,
|
133
|
+
0xfffffdff,
|
134
|
+
0xfffffbff,
|
135
|
+
0xfffff7ff,
|
136
|
+
0xffffefff,
|
137
|
+
0xffffdfff,
|
138
|
+
0xffffbfff,
|
139
|
+
0xffff7fff,
|
140
|
+
0xfffeffff,
|
141
|
+
0xfffdffff,
|
142
|
+
0xfffbffff,
|
143
|
+
0xfff7ffff,
|
144
|
+
0xffefffff,
|
145
|
+
0xffdfffff,
|
146
|
+
0xffbfffff,
|
147
|
+
0xff7fffff,
|
148
|
+
0xfeffffff,
|
149
|
+
0xfdffffff,
|
150
|
+
0xfbffffff,
|
151
|
+
0xf7ffffff,
|
152
|
+
0xefffffff,
|
153
|
+
0xdfffffff,
|
154
|
+
0xbfffffff,
|
155
|
+
0x7fffffff,
|
156
|
+
};
|
157
|
+
|
158
|
+
//int matchN(ADJACENCY *adj_ptr, long *b, long *m, int pa, int pb)
|
159
|
+
static int matchN(const int * num_adj, long ** point, long *b, long *m, int pa, int pb)
|
160
|
+
{
|
161
|
+
long * mm;// current matrix
|
162
|
+
long f[1000];//which columns has been used at an intermediate state of computing
|
163
|
+
long h[100];// pb < 100 * 32
|
164
|
+
|
165
|
+
int d;// depth for matrix
|
166
|
+
int k;// width for matrix
|
167
|
+
int dd;// depth of matrix in refinement step
|
168
|
+
int kk;// width of matrix in refinement step
|
169
|
+
|
170
|
+
int i, j;//temp
|
171
|
+
long l;// temp
|
172
|
+
|
173
|
+
short vflag;//valid check flag
|
174
|
+
int n_words;// number of words needed for storing 'pb' bits.
|
175
|
+
long refine_mm;// pointer for mm(match matrix) used in refinment step.
|
176
|
+
|
177
|
+
d = k = 0;
|
178
|
+
// start back track
|
179
|
+
for(i = 0 ; i < (pb / 32 + 1) ; i++)
|
180
|
+
h[i] = 0;
|
181
|
+
for(i = 0 ; i < 10 ; i++)
|
182
|
+
f[i] = 0;
|
183
|
+
|
184
|
+
n_words = (pb - 1) / (sizeof(int) * 8) + 1;
|
185
|
+
|
186
|
+
/* show(b, pb, pb); */
|
187
|
+
/* show(m, pa, pb); */
|
188
|
+
|
189
|
+
if( d == 0 && k == 0){
|
190
|
+
k = ntz_n_words(m, n_words);
|
191
|
+
h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
|
192
|
+
}
|
193
|
+
while(k <= pb && d <= pa){
|
194
|
+
/* printf("d : %3d k : %3d n_words : %3d\n", d, k, n_words); */
|
195
|
+
if(d < 0){
|
196
|
+
printf("d < 0 return \n");
|
197
|
+
return FAIL;
|
198
|
+
}
|
199
|
+
|
200
|
+
// Idea for optimization :
|
201
|
+
// instead of using following equation, just (mm = mm + len) and (mm = mm - len).
|
202
|
+
mm = m + pa * (d + 1) * n_words;
|
203
|
+
/* printf("pa : %d d : %d k : %d n_words : %d hint : %d\n", pa, d, k, n_words, pa * (d + 1) * n_words); */
|
204
|
+
|
205
|
+
//printf("ntz : %d\n", ntz(mm));
|
206
|
+
//k = ntz(mm + d);
|
207
|
+
// set (k, d) bit '1', clear k-column and d-row '0'
|
208
|
+
/* printf("k : %d d: %d\n", k, d); */
|
209
|
+
for(j = 0 ; j < n_words ; j++){
|
210
|
+
if(j == (k / 32)){
|
211
|
+
for(i = 0 ; i < pa ; i++){
|
212
|
+
mm[i * n_words + j] = mm[(i - pa) * n_words + j] & reverse_bit[k - (k / 32) * 32];
|
213
|
+
}
|
214
|
+
mm[d * n_words + j] = bit_mask[k - (k / 32) * 32];
|
215
|
+
}else{
|
216
|
+
for(i = 0 ; i < pa ; i++){
|
217
|
+
mm[i * n_words + j] = mm[(i - pa) * n_words + j];
|
218
|
+
}
|
219
|
+
mm[d * n_words + j] = ZERO;
|
220
|
+
}
|
221
|
+
}
|
222
|
+
// BEGIN
|
223
|
+
/* show(mm, pa, pb); */
|
224
|
+
// END
|
225
|
+
|
226
|
+
// Refinement step
|
227
|
+
// Hot Spot!!
|
228
|
+
dd = kk = 0;
|
229
|
+
/* printf("before refinement step \n"); */
|
230
|
+
/* show(mm, pa, pb); */
|
231
|
+
|
232
|
+
while(dd != pa){
|
233
|
+
while(kk != pb){
|
234
|
+
//Idea for optimization :
|
235
|
+
//refine_mm should not updated 1 / 32 times.mm[dd + ((kk - 1) / 32)]
|
236
|
+
|
237
|
+
//Idea for optimization :
|
238
|
+
// when mm is sparse there may be better algorithm
|
239
|
+
// for searching '1' bit.
|
240
|
+
if(mm[dd * n_words + ((kk - 1) / 32)] & bit_mask[kk - ((kk - 1) / 32) * 32]){
|
241
|
+
// Following loop can be flattened
|
242
|
+
for(i = 0 ; i < num_adj[dd] ; i++){
|
243
|
+
l = 0;
|
244
|
+
for(j = 0 ; j < n_words ; j++){
|
245
|
+
l |= (b[kk * n_words + j] & mm[point[dd][i] * n_words + j]);
|
246
|
+
}
|
247
|
+
if(l == 0){
|
248
|
+
mm[dd * n_words + (kk / 32)] &= reverse_bit[kk - (kk / 32) * 32];//remove bit
|
249
|
+
/* break;//quit for loop */
|
250
|
+
}
|
251
|
+
}
|
252
|
+
|
253
|
+
}
|
254
|
+
kk++;
|
255
|
+
}
|
256
|
+
// Idea for optimization
|
257
|
+
// every 32 bit is tested here.
|
258
|
+
kk = 0;
|
259
|
+
dd++;
|
260
|
+
}
|
261
|
+
/* show(mm, pa, pb); */
|
262
|
+
|
263
|
+
//Checking whether match matrices are valid.
|
264
|
+
// Subgraph isomorphism can be checked here before reaching d == pa.
|
265
|
+
vflag = SUCCESS;
|
266
|
+
for(i = 0 ; i < pa ; i++){
|
267
|
+
l = 0;
|
268
|
+
for(j = 0 ; j < n_words ; j++){
|
269
|
+
l |= mm[i * n_words + j];
|
270
|
+
}
|
271
|
+
if(l == 0){
|
272
|
+
vflag = FAIL;
|
273
|
+
break;
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
if(vflag){// Success
|
278
|
+
f[d] = k;
|
279
|
+
k = 0;
|
280
|
+
while(h[k / 32] & bit_mask[k - (k / 32) * 32])
|
281
|
+
k++;
|
282
|
+
d++;
|
283
|
+
if(d == pa){
|
284
|
+
/* show(mm, pa, pb); */
|
285
|
+
//printf("FOUND! d : %d\n", d);
|
286
|
+
return SUCCESS;
|
287
|
+
}
|
288
|
+
else{
|
289
|
+
h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
|
290
|
+
}
|
291
|
+
}else{//Failed
|
292
|
+
h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
|
293
|
+
k++;
|
294
|
+
//printf("d : %d k : %d\n", d, k);
|
295
|
+
while((h[k / 32] & bit_mask[k - (k / 32) * 32] ||
|
296
|
+
(m[d * n_words + (k / 32)] & bit_mask[k - (k / 32) * 32] ) == 0) &&
|
297
|
+
k < pb)
|
298
|
+
k++;
|
299
|
+
/* printf("d : %d k : %d\n", d, k); */
|
300
|
+
while(k > pb){
|
301
|
+
if(d == 0){
|
302
|
+
return FAIL;
|
303
|
+
}
|
304
|
+
d--;
|
305
|
+
k = f[d];
|
306
|
+
h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
|
307
|
+
k++;
|
308
|
+
while(h[k / 32] & bit_mask[k - (k / 32) * 32])
|
309
|
+
k++;
|
310
|
+
}
|
311
|
+
h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
|
312
|
+
}
|
313
|
+
}
|
314
|
+
//printf("d : %d k : %d FAIL!\n", d, k);
|
315
|
+
return FAIL;
|
316
|
+
}
|
317
|
+
|
318
|
+
static void set_adjacency(int * num_adj, long ** point, long * adj, VALUE ret){
|
319
|
+
int i, j, n_words;
|
320
|
+
int off_set = 0;
|
321
|
+
|
322
|
+
n_words = (RARRAY(ret)->len - 1) / (sizeof(int) * 8) + 1;
|
323
|
+
|
324
|
+
for(i = 0 ; i < RARRAY(ret)->len ; i++){
|
325
|
+
num_adj[i] = FIX2INT(rb_funcall(RARRAY(ret)->ptr[i], rb_intern("length"), 0));
|
326
|
+
point[i] = adj + off_set;
|
327
|
+
for(j = 0 ; j < RARRAY(RARRAY(ret)->ptr[i])->len ; j++){
|
328
|
+
adj[off_set++] = FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]);
|
329
|
+
//printf(" %d ", FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]));
|
330
|
+
}
|
331
|
+
//printf("\n");
|
332
|
+
}
|
333
|
+
}
|
334
|
+
|
335
|
+
static VALUE subcomp_match_by_ullmann(VALUE self, VALUE a_matrix, VALUE pa, VALUE other_adj, VALUE pb, VALUE match){
|
336
|
+
// variables for adjacency list of graph A
|
337
|
+
int num_adj[1000];
|
338
|
+
long * point[1000];
|
339
|
+
long adj[3000];//adjacency list
|
340
|
+
|
341
|
+
// match matrix; = pa * (n_words * pa)
|
342
|
+
long * mm;//[800000];
|
343
|
+
long * m;
|
344
|
+
|
345
|
+
//temporary variables
|
346
|
+
int i;
|
347
|
+
int result;
|
348
|
+
VALUE mapping;
|
349
|
+
|
350
|
+
int n_pb, n_pa;
|
351
|
+
int n_words;
|
352
|
+
int sizeof_mm;
|
353
|
+
|
354
|
+
n_pb = NUM2INT(pb);
|
355
|
+
n_pa = NUM2INT(pa);
|
356
|
+
|
357
|
+
if(n_pb > n_pa){
|
358
|
+
return Qfalse;
|
359
|
+
}
|
360
|
+
|
361
|
+
sizeof_mm = n_pa * (n_pb + 1) * n_words;
|
362
|
+
|
363
|
+
n_words = (n_pa - 1) / (sizeof(int) * 8) + 1;
|
364
|
+
|
365
|
+
mm = (long * )malloc(sizeof(long) * 800000);
|
366
|
+
if(RSTRING(match)->len > 800000 * sizeof(long))
|
367
|
+
rb_raise(rb_eArgError, "Length of match matrix too short! %d", sizeof(mm));
|
368
|
+
|
369
|
+
memcpy(mm, (long *)RSTRING(match)->ptr, RSTRING(match)->len); // BUG!!
|
370
|
+
|
371
|
+
Check_Type(a_matrix, T_STRING);
|
372
|
+
|
373
|
+
set_adjacency(num_adj, point, adj, other_adj);
|
374
|
+
|
375
|
+
//show(mm, n_pa, n_pb);
|
376
|
+
//show((long *)RSTRING(a_matrix)->ptr, n_pa, n_pa);
|
377
|
+
|
378
|
+
result = matchN(num_adj, point, (long *)RSTRING(a_matrix)->ptr, mm, n_pb, n_pa);
|
379
|
+
|
380
|
+
if(result == 1){//?
|
381
|
+
mapping = rb_ary_new();
|
382
|
+
//printf("n_words : %d n_pa : %d n_pb : %d n_words * n_pa * n_pa : %d", n_words, n_pa, n_pb, n_words * n_pa * n_pa);
|
383
|
+
//show(mm + n_words * n_pb * n_pb, n_pb, n_pa);
|
384
|
+
|
385
|
+
for(i = 0 ; i < n_pb ; i++){
|
386
|
+
rb_ary_push(mapping, INT2FIX(ntz_m(mm + n_words * n_pb * n_pb + i * n_words, n_pa)));
|
387
|
+
}
|
388
|
+
return mapping;
|
389
|
+
}
|
390
|
+
return Qfalse;
|
391
|
+
}
|
392
|
+
|
393
|
+
// DataBase for substructure search
|
394
|
+
|
395
|
+
struct dbmdata {
|
396
|
+
int di_size;
|
397
|
+
};
|
398
|
+
|
399
|
+
static VALUE sdb_s_search(VALUE dbname){
|
400
|
+
rb_p(dbname);
|
401
|
+
}
|
402
|
+
|
403
|
+
Init_subcomp(){
|
404
|
+
VALUE subcomp_cGraph;
|
405
|
+
VALUE subcomp_cSubGraphDB;
|
406
|
+
|
407
|
+
subcomp_cGraph = rb_define_module("Graph");
|
408
|
+
rb_define_method(subcomp_cGraph, "subcomp_match_by_ullmann", subcomp_match_by_ullmann, 5);
|
409
|
+
|
410
|
+
subcomp_cSubGraphDB = rb_define_class_under(subcomp_cGraph, "SubGraphDB", rb_cObject);
|
411
|
+
|
412
|
+
rb_define_method(subcomp_cSubGraphDB, "open_for_search", sdb_s_search, 0);
|
413
|
+
|
414
|
+
rb_define_singleton_method(subcomp_cSubGraphDB, "show", subcomp_show, 3);
|
415
|
+
rb_define_singleton_method(subcomp_cSubGraphDB, "match", subcomp_match_by_ullmann, 5);
|
416
|
+
}
|