chemruby 0.9.3 → 1.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
class Integer
|
4
|
+
|
5
|
+
def to_bit_positions
|
6
|
+
ary = []
|
7
|
+
i = 0
|
8
|
+
pow = 0
|
9
|
+
while pow <= self
|
10
|
+
pow = 1 << i
|
11
|
+
if((pow & self) != 0)
|
12
|
+
ary << i
|
13
|
+
end
|
14
|
+
i += 1
|
15
|
+
end
|
16
|
+
ary
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
module Chem
|
23
|
+
|
24
|
+
module Atom
|
25
|
+
attr_accessor :rings
|
26
|
+
end
|
27
|
+
|
28
|
+
module Molecule
|
29
|
+
|
30
|
+
def f_dfs node, path, max, &block
|
31
|
+
if not path.length > max
|
32
|
+
yield path
|
33
|
+
self.adjacent_to(node).each do |bond, n|
|
34
|
+
next if n.element == :H
|
35
|
+
if not path.include?(n)
|
36
|
+
path.push(n)
|
37
|
+
f_dfs(n, path, max, &block)
|
38
|
+
path.pop
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# ELEMNUM = {
|
45
|
+
# :C => 0,
|
46
|
+
# :N => 1,
|
47
|
+
# :O => 2,
|
48
|
+
# :P => 4}
|
49
|
+
# ELEMNUM.default = 32
|
50
|
+
|
51
|
+
ELEMNUM = Element2Number.inject({}) do |ret, (elem, num)|
|
52
|
+
ret[elem] = 1 << num
|
53
|
+
ret
|
54
|
+
end
|
55
|
+
ELEMNUM.default = 32
|
56
|
+
|
57
|
+
#
|
58
|
+
def fingerprint(max = 3, n_bits = 32)
|
59
|
+
|
60
|
+
find_sssr.each do |rings|
|
61
|
+
len = rings.length
|
62
|
+
rings.each do |atom|
|
63
|
+
(atom.rings ||= []) << len
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
fp = 0
|
68
|
+
set = Set.new
|
69
|
+
|
70
|
+
nodes.each do |node|
|
71
|
+
f_dfs(node, [node], max) do |path|
|
72
|
+
# Exclude unwanted path
|
73
|
+
key = path.collect{|atom| atom.element.to_s}.join(".")
|
74
|
+
next if set.include?(key)
|
75
|
+
|
76
|
+
set.add(key)
|
77
|
+
set.add(path.reverse.collect{|atom| atom.element.to_s}.join("."))
|
78
|
+
# seed calculation
|
79
|
+
seed = 0
|
80
|
+
path.each_with_index do |atom, idx|
|
81
|
+
seed += (1 << ( 5 * idx)) *
|
82
|
+
ELEMNUM[atom.element] *
|
83
|
+
(atom.rings.nil? ? 1 : (1 << atom.rings.length))
|
84
|
+
end
|
85
|
+
srand(seed)
|
86
|
+
1.times do |n|
|
87
|
+
fp |= 1 << rand(n_bits)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
fp
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
|
data/lib/chem/utils/geometry.rb
CHANGED
@@ -3,6 +3,14 @@ module Chem
|
|
3
3
|
|
4
4
|
module Molecule
|
5
5
|
|
6
|
+
# Return size of molecule with Array [x, y, z]
|
7
|
+
def box_size
|
8
|
+
size_x = nodes().max{|a1, a2| a1.x <=> a2.x}.x - nodes().min{|a1, a2| a1.x <=> a2.x}.x
|
9
|
+
size_y = nodes().max{|a1, a2| a1.y <=> a2.y}.y - nodes().min{|a1, a2| a1.y <=> a2.y}.y
|
10
|
+
size_z = nodes().max{|a1, a2| a1.z <=> a2.z}.z - nodes().min{|a1, a2| a1.z <=> a2.z}.z
|
11
|
+
[size_x, size_y, size_z]
|
12
|
+
end
|
13
|
+
|
6
14
|
# Automatically assigns 2-dimensional geometry
|
7
15
|
# This method may implicitly called from ChemRuby
|
8
16
|
# if nil is assigned to Atom#x
|
@@ -0,0 +1,303 @@
|
|
1
|
+
# Copyright (C) 2005, 2006 KADOWAKI Tadashi <tadakado@gmail.com>
|
2
|
+
# TANAKA Nobuya <nobuya.tanaka@gmail.com>
|
3
|
+
# APODACA Richard <r_apodaca@users.sf.net>
|
4
|
+
|
5
|
+
require 'net/http'
|
6
|
+
require 'net/ftp'
|
7
|
+
require 'date'
|
8
|
+
require 'rexml/document'
|
9
|
+
require 'cgi'
|
10
|
+
|
11
|
+
module Chem
|
12
|
+
|
13
|
+
module NetUtils
|
14
|
+
|
15
|
+
def http_get(str)
|
16
|
+
Net::HTTP.get(URI.parse(str))
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.search_net(term, options)
|
22
|
+
case options[:db]
|
23
|
+
when :pubmed
|
24
|
+
Chem::NCBI::ESearch.query(term, options)
|
25
|
+
when :pubchem
|
26
|
+
Chem::NCBI::ESearch.query(term, options)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class NCBI
|
31
|
+
EUtilsURI = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
32
|
+
PubChemURI = 'http://pubchem.ncbi.nlm.nih.gov/'
|
33
|
+
|
34
|
+
# EInfo
|
35
|
+
module EInfo
|
36
|
+
extend Chem::NetUtils
|
37
|
+
|
38
|
+
EInfoURI = EUtilsURI + 'einfo.fcgi?'
|
39
|
+
|
40
|
+
def self.query(params = {})
|
41
|
+
if params.empty?
|
42
|
+
xml = REXML::Document.new(http_get(EInfoURI))
|
43
|
+
dbs = []
|
44
|
+
xml.elements.each("eInfoResult/DbList/DbName") do |element|
|
45
|
+
dbs << element.text
|
46
|
+
end
|
47
|
+
dbs
|
48
|
+
elsif params[:db]
|
49
|
+
DbInfo.new(REXML::Document.new(http_get(EInfoURI + "db=" + params[:db].to_s)))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class DbInfo
|
54
|
+
attr_reader :db_name, :menu_name, :description, :count, :last_update
|
55
|
+
def initialize(xml)
|
56
|
+
@db_name = xml.elements["eInfoResult/DbInfo/DbName"].text
|
57
|
+
@menu_name = xml.elements["eInfoResult/DbInfo/MenuName"].text
|
58
|
+
@description = xml.elements["eInfoResult/DbInfo/Description"].text
|
59
|
+
@count = xml.elements["eInfoResult/DbInfo/Count"].text
|
60
|
+
@last_update = xml.elements["eInfoResult/DbInfo/LastUpdate"].text
|
61
|
+
@fields = []
|
62
|
+
xml.elements.each("eInfoResult/DbInfo/FieldList/Field") do |element|
|
63
|
+
@fields << {
|
64
|
+
:name => element.elements["Name" ].text,
|
65
|
+
:full_name => element.elements["FullName" ].text,
|
66
|
+
:description => element.elements["Description"].text,
|
67
|
+
:term_count => element.elements["TermCount" ].text,
|
68
|
+
:is_date => element.elements["IsDate" ].text == "Y",
|
69
|
+
:is_numerical => element.elements["IsNumerical"].text == "Y",
|
70
|
+
:single_token => element.elements["SingleToken"].text == "Y",
|
71
|
+
:hierarchy => element.elements["Hierarchy" ].text == "Y",
|
72
|
+
:is_hidden => element.elements["IsHidden" ].text == "Y",
|
73
|
+
}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end # EInfo module
|
79
|
+
|
80
|
+
module ESearch
|
81
|
+
extend Chem::NetUtils
|
82
|
+
|
83
|
+
ESearchURI = EUtilsURI + 'esearch.fcgi?'
|
84
|
+
|
85
|
+
def self.search(params)
|
86
|
+
result = {}
|
87
|
+
uri = ESearchURI + params.collect{|key, value| key.to_s + "=" + CGI.escape(value.to_s)}.join("&")
|
88
|
+
doc = http_get(uri)
|
89
|
+
xml = REXML::Document.new(doc)
|
90
|
+
raise "Error no result" unless xml.elements["eSearchResult/ERROR"].nil?
|
91
|
+
|
92
|
+
result[:count] = xml.elements["eSearchResult/Count" ].text.to_i
|
93
|
+
result[:retmax] = xml.elements["eSearchResult/RetMax" ].text.to_i
|
94
|
+
result[:retstart] = xml.elements["eSearchResult/RetStart"].text.to_i
|
95
|
+
|
96
|
+
result[:id_list] = list = []
|
97
|
+
xml.elements.each("eSearchResult/IdList/Id") do |element|
|
98
|
+
list << element.text.to_i
|
99
|
+
end
|
100
|
+
result
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
module PCFetch
|
106
|
+
|
107
|
+
extend Chem::NetUtils
|
108
|
+
|
109
|
+
PCFetchURI = PubChemURI + 'pc_fetch/pc_fetch.cgi?'
|
110
|
+
|
111
|
+
def self.fetch(params)
|
112
|
+
raise "You need to specify :retmode" if params[:retmode].nil?
|
113
|
+
|
114
|
+
uri = PCFetchURI + params.collect{|key, value| key.to_s + "=" + value.to_s}.join("&")
|
115
|
+
doc = http_get(uri)
|
116
|
+
num = 0
|
117
|
+
if m = /pubchem\/\.fetch\/(\d+).sdf/.match(doc)
|
118
|
+
puts 'ftp'
|
119
|
+
num = m[1].to_i
|
120
|
+
elsif m = /reqid=(\d+)/.match(doc)
|
121
|
+
puts 'http'
|
122
|
+
num = m[1].to_i
|
123
|
+
else
|
124
|
+
raise "Cannot retrieve file"
|
125
|
+
end
|
126
|
+
|
127
|
+
params[:localfilename] ||= "%s%d.sdf" % [params[:db], params[:id]]
|
128
|
+
|
129
|
+
begin
|
130
|
+
ftp = Net::FTP.open("ftp.ncbi.nih.gov")
|
131
|
+
ftp.login
|
132
|
+
ftp.gettextfile("pubchem/.fetch/%d.sdf" % num, params[:localfilename])
|
133
|
+
rescue Net::FTPPermError
|
134
|
+
puts "error : num"
|
135
|
+
retry
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
|
142
|
+
module ESummary
|
143
|
+
|
144
|
+
extend Chem::NetUtils
|
145
|
+
ESummaryURI = EUtilsURI + 'esummary.fcgi?'
|
146
|
+
|
147
|
+
def self.get(params)
|
148
|
+
uri = ESummaryURI + params.collect{|key, value| key.to_s + "=" + value.to_s}.join("&")
|
149
|
+
http_get(uri)
|
150
|
+
end
|
151
|
+
|
152
|
+
def self.get_parsed(params)
|
153
|
+
tree = {}
|
154
|
+
xml = REXML::Document.new(get(params))
|
155
|
+
xml.elements.each("eSummaryResult/DocSum/Item") do |element|
|
156
|
+
tree[element.attributes["Name"]] =
|
157
|
+
case element.attributes["Type"]
|
158
|
+
when "String"
|
159
|
+
element.text
|
160
|
+
when "Integer"
|
161
|
+
element.text.to_i
|
162
|
+
when "Date"
|
163
|
+
element.text
|
164
|
+
when "List"
|
165
|
+
ary = []
|
166
|
+
element.elements.each("Item"){|el|
|
167
|
+
ary << case el.attributes["Type"]
|
168
|
+
when "String"
|
169
|
+
el.text
|
170
|
+
when "Integer"
|
171
|
+
el.text.to_i
|
172
|
+
else
|
173
|
+
""
|
174
|
+
end
|
175
|
+
}
|
176
|
+
ary
|
177
|
+
end
|
178
|
+
end
|
179
|
+
tree
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# obsolete
|
184
|
+
class EFetch
|
185
|
+
|
186
|
+
include Chem::NetUtils
|
187
|
+
|
188
|
+
EFetchURI = EUtilsURI + 'efetch.fcgi' + '?'
|
189
|
+
|
190
|
+
def initialize(query_key, web_env)
|
191
|
+
uri = [PCFetchURI]
|
192
|
+
uri << 'db=pccompound'
|
193
|
+
uri << '&WebEnv=' + web_env
|
194
|
+
uri << '&query_key=' + query_key
|
195
|
+
uri << '&retmode=sdf'
|
196
|
+
uri << '&compression=none'
|
197
|
+
#"retmode=xml&"
|
198
|
+
#uri = EFetchURI + "&db=pccompound&retmode=xml&WebEnv=" + web_env + "&query_key=" + query_key + "&tool=oscar3&email=nobuya.tanaka%40gmail.com"
|
199
|
+
p uri.join
|
200
|
+
doc = http_get(uri.join)
|
201
|
+
if m = /bookmarking this page or by going to<\/p><p><a href=\"([^"]+)/.match(doc)
|
202
|
+
sleep 1
|
203
|
+
p m[1]
|
204
|
+
d = http_get(m[1])
|
205
|
+
m = /"ftp:\/\/([^"]+)/.match(d)
|
206
|
+
p m[1]
|
207
|
+
require 'net/ftp'
|
208
|
+
begin
|
209
|
+
sleep 0.5
|
210
|
+
ftp = Net::FTP.open("ftp.ncbi.nih.gov")
|
211
|
+
ftp.login
|
212
|
+
ftp.gettextfile("pubchem/.fetch/606874731181068179.sdf")
|
213
|
+
rescue Net::FTPPermError
|
214
|
+
sleep 1
|
215
|
+
puts 'OK'
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def self.fetch_all(query_key, web_env)
|
222
|
+
new(query_key, web_env)
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
if __FILE__ == $0
|
231
|
+
# search PubChem compounds using InChI
|
232
|
+
query = {
|
233
|
+
:db => :pccompound,
|
234
|
+
:term => '"InChI=1/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/f/h11H"'
|
235
|
+
}
|
236
|
+
|
237
|
+
p Chem::NCBI::ESearch.search(query)
|
238
|
+
|
239
|
+
# search PubChem substance with term
|
240
|
+
|
241
|
+
query = {
|
242
|
+
:db => "pcsubstance",
|
243
|
+
:term => 'benzene'
|
244
|
+
}
|
245
|
+
|
246
|
+
p Chem::NCBI::ESearch.search(query)
|
247
|
+
|
248
|
+
# search PubChem substance with complete synonyms
|
249
|
+
|
250
|
+
query = {
|
251
|
+
:db => "pcsubstance",
|
252
|
+
:term => 'benzene',
|
253
|
+
:field => 'CSYN',
|
254
|
+
}
|
255
|
+
|
256
|
+
p Chem::NCBI::ESearch.search(query)
|
257
|
+
|
258
|
+
# search PubMed
|
259
|
+
query = {
|
260
|
+
:db => "pubmed",
|
261
|
+
:term => "asthma[mh]+OR+hay+fever[mh]",
|
262
|
+
}
|
263
|
+
|
264
|
+
p query
|
265
|
+
p Chem::NCBI::ESearch.search(query)
|
266
|
+
|
267
|
+
# Retrieving more entries
|
268
|
+
|
269
|
+
query = {
|
270
|
+
:db => "pubmed",
|
271
|
+
:term => "cancer",
|
272
|
+
:reldate => 60,
|
273
|
+
:datetype => "edat",
|
274
|
+
:retmax => 100,
|
275
|
+
:retstart => 300,
|
276
|
+
}
|
277
|
+
|
278
|
+
p Chem::NCBI::ESearch.search(query)
|
279
|
+
|
280
|
+
# Retrieving Eutils database information
|
281
|
+
p Chem::NCBI::EInfo.query
|
282
|
+
|
283
|
+
|
284
|
+
# Rerieving information about PubChem Compounds
|
285
|
+
|
286
|
+
pp Chem::NCBI::EInfo.query(:db => :pccompound)
|
287
|
+
|
288
|
+
# Retrieving pccompound using PC_Fetch
|
289
|
+
# Not recommended
|
290
|
+
# It seems that EFetch does not accept db=pccompound
|
291
|
+
# PCFetch could be alternatives for EFetch.
|
292
|
+
|
293
|
+
100.upto(110) do |n|
|
294
|
+
puts n
|
295
|
+
Chem::NCBI::PCFetch::fetch({:db => :pccompound, :id => n, :retmode => :sdf})
|
296
|
+
end
|
297
|
+
|
298
|
+
# Retrieving parsed summary for entries
|
299
|
+
# CID:100
|
300
|
+
p Chem::NCBI::ESummary::get_parsed({:db => :pccompound, :id => 100})
|
301
|
+
|
302
|
+
end
|
303
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Once
|
2
|
+
|
3
|
+
def self.append_features(base)
|
4
|
+
super
|
5
|
+
base.extend(ClassMethods)
|
6
|
+
end
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def once(*ids) # :nodoc:
|
10
|
+
for id in ids
|
11
|
+
module_eval <<-"end;", __FILE__, __LINE__
|
12
|
+
alias_method :__#{id.to_i}__, :#{id.to_s}
|
13
|
+
private :__#{id.to_i}__
|
14
|
+
def #{id.to_s}(*args, &block)
|
15
|
+
if defined? @__#{id.to_i}__
|
16
|
+
@__#{id.to_i}__
|
17
|
+
elsif ! self.frozen?
|
18
|
+
@__#{id.to_i}__ ||= __#{id.to_i}__(*args, &block)
|
19
|
+
else
|
20
|
+
__#{id.to_i}__(*args, &block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end;
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# Copyright (C) 2006 Richard L. Apodaca
|
2
|
+
# Nobuya Tanaka
|
3
|
+
|
4
|
+
require 'chem'
|
5
|
+
|
6
|
+
module Chem
|
7
|
+
|
8
|
+
module Atom
|
9
|
+
attr_accessor :ob_atom
|
10
|
+
end
|
11
|
+
|
12
|
+
module Molecule
|
13
|
+
|
14
|
+
attr_reader :ob_mol
|
15
|
+
def ob_save_as(path, filetype)
|
16
|
+
use_open_babel if @ob_mol.nil?
|
17
|
+
conv = ::OpenBabel::OBConversion.new
|
18
|
+
conv.set_out_format(filetype.to_s)
|
19
|
+
conv.write_file(@ob_mol, path)
|
20
|
+
end
|
21
|
+
|
22
|
+
def ob_export_as(filetype)
|
23
|
+
use_open_babel if @ob_mol.nil?
|
24
|
+
conv = ::OpenBabel::OBConversion.new
|
25
|
+
conv.set_out_format(filetype.to_s)
|
26
|
+
conv.write_string(@ob_mol)
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_inchi
|
30
|
+
use_open_babel
|
31
|
+
ob_export_as("inchi").chop
|
32
|
+
end
|
33
|
+
|
34
|
+
# set OpenBabel OBMol object to instance variable @ob_mol
|
35
|
+
def use_open_babel
|
36
|
+
begin
|
37
|
+
require 'openbabel'
|
38
|
+
rescue Exception
|
39
|
+
require 'OpenBabel'
|
40
|
+
end
|
41
|
+
@ob_mol = ::OpenBabel::OBMol.new
|
42
|
+
nodes.each do |node|
|
43
|
+
atom = @ob_mol.new_atom
|
44
|
+
atom.set_atomic_num(Element2Number[node.element])
|
45
|
+
atom.set_vector(node.x.to_f, node.y.to_f, node.z.to_f)
|
46
|
+
node.ob_atom = atom
|
47
|
+
end
|
48
|
+
edges.each do |bond, atom1, atom2|
|
49
|
+
@ob_mol.add_bond(
|
50
|
+
atom1.ob_atom.get_idx,
|
51
|
+
atom2.ob_atom.get_idx,
|
52
|
+
bond.v.to_i
|
53
|
+
)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
module OpenBabel
|
60
|
+
|
61
|
+
def self.parse_smiles(smiles)
|
62
|
+
|
63
|
+
require 'openbabel'
|
64
|
+
|
65
|
+
converter = ::OpenBabel::OBConversion.new
|
66
|
+
converter.set_in_format("smi")
|
67
|
+
mol = ::OpenBabel::OBMol.new
|
68
|
+
converter.read_string(mol, smiles)
|
69
|
+
OBMolecule.new(mol)
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.load_sdf(path)
|
73
|
+
require 'openbabel'
|
74
|
+
|
75
|
+
conv = ::OpenBabel::OBConversion.new
|
76
|
+
conv.set_in_format("sdf")
|
77
|
+
mol = ::OpenBabel::OBMol.new
|
78
|
+
cond = conv.read_file(mol, path)
|
79
|
+
mols = [OBMolecule.new(mol)]
|
80
|
+
while cond
|
81
|
+
mol = ::OpenBabel::OBMol.new
|
82
|
+
cond = conv.read(mol)
|
83
|
+
mols << OBMolecule.new(mol) if cond
|
84
|
+
end
|
85
|
+
mols
|
86
|
+
end
|
87
|
+
|
88
|
+
class OBSmarts
|
89
|
+
|
90
|
+
def initialize(smarts)
|
91
|
+
require 'openbabel'
|
92
|
+
@pat = ::OpenBabel::OBSmartsPattern.new
|
93
|
+
@pat.init(smarts)
|
94
|
+
@pat
|
95
|
+
end
|
96
|
+
|
97
|
+
def match(mol)
|
98
|
+
mol.use_open_babel if mol.ob_mol.nil?
|
99
|
+
@pat.match(mol.ob_mol)
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_umap_list
|
103
|
+
@pat.get_umap_list.collect{|ary| ary.collect{|i| i.to_i}}
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.parse_smarts(smarts)
|
109
|
+
OBSmarts.new(smarts)
|
110
|
+
end
|
111
|
+
|
112
|
+
# load_as(path, filetype)
|
113
|
+
# path : path to input file
|
114
|
+
# filetype : "alc", "bgf"
|
115
|
+
# see http://openbabel.sourceforge.net/wiki/Babel
|
116
|
+
def self.load_as(path, filetype)
|
117
|
+
conv = ::OpenBabel::OBConversion.new
|
118
|
+
conv.set_in_format(filetype.to_s)
|
119
|
+
mol = ::OpenBabel::OBMol.new
|
120
|
+
conv.read_file(mol, path)
|
121
|
+
OBMolecule.new(mol)
|
122
|
+
end
|
123
|
+
|
124
|
+
module OBAtom
|
125
|
+
include Atom
|
126
|
+
end
|
127
|
+
|
128
|
+
class OBMolecule
|
129
|
+
include Molecule
|
130
|
+
|
131
|
+
attr_reader :ob_mol
|
132
|
+
attr_reader :nodes
|
133
|
+
def initialize(ob_mol)
|
134
|
+
@ob_mol = ob_mol
|
135
|
+
@nodes = []
|
136
|
+
1.upto(@ob_mol.num_atoms) do |n|
|
137
|
+
atom = @ob_mol.get_atom(n)
|
138
|
+
@nodes << atom.extend(OBAtom)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
end # OpenBabel module
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
if __FILE__ == $0
|
148
|
+
mol = SMILES("CCC")
|
149
|
+
mol.use_open_babel
|
150
|
+
p mol.num_atoms
|
151
|
+
p mol.num_bonds
|
152
|
+
p mol.get_mol_wt
|
153
|
+
p mol.get_exact_mass
|
154
|
+
# p mol.add_hydrogens # BUS Error !?
|
155
|
+
elsif false
|
156
|
+
ob = Chem::OpenBabel.parse_smiles('CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C')
|
157
|
+
p ob.get_mol_wt
|
158
|
+
p ob.num_atoms
|
159
|
+
p ob.num_bonds
|
160
|
+
# read
|
161
|
+
# is_last
|
162
|
+
|
163
|
+
atom = ob.new_atom
|
164
|
+
|
165
|
+
# creating new molecule
|
166
|
+
|
167
|
+
mol = OBMol.new
|
168
|
+
atom1 = mol.add_atom
|
169
|
+
atom2 = mol.add_atom
|
170
|
+
|
171
|
+
# Atom
|
172
|
+
|
173
|
+
atom1 = mol.get_first_atom
|
174
|
+
atom1 = mol.get_atom(1)
|
175
|
+
|
176
|
+
# Atom setter and getter
|
177
|
+
|
178
|
+
atom1.set_atomic_num(6) # Carbon
|
179
|
+
|
180
|
+
atom1.get_atomic_mass # Carbon : 12.0107
|
181
|
+
|
182
|
+
atom1.set_aromatic # aromatic
|
183
|
+
atom1.unset_aromatic # not aromatic
|
184
|
+
atom1.is_aromatic # return true or false
|
185
|
+
|
186
|
+
atom1.is_amide_nitrogen # return true or false
|
187
|
+
|
188
|
+
|
189
|
+
# atom count starts from 1 (not 0)
|
190
|
+
# mol.add_bond(0, 1, 1) fails!
|
191
|
+
mol.add_bond(1, 2, 1)# from, to, bond_order
|
192
|
+
# bond count starts from 0 (not 1)
|
193
|
+
|
194
|
+
bond = mol.get_bond(0)
|
195
|
+
bond.is_double
|
196
|
+
bond.is_single
|
197
|
+
bond.is_amide
|
198
|
+
bond.get_bond_order # bond.get_bo
|
199
|
+
|
200
|
+
# bond length
|
201
|
+
|
202
|
+
bond.get_length
|
203
|
+
bond.set_length# arguments ?
|
204
|
+
end
|