chemruby 0.9.3 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
class Integer
|
4
|
+
|
5
|
+
def to_bit_positions
|
6
|
+
ary = []
|
7
|
+
i = 0
|
8
|
+
pow = 0
|
9
|
+
while pow <= self
|
10
|
+
pow = 1 << i
|
11
|
+
if((pow & self) != 0)
|
12
|
+
ary << i
|
13
|
+
end
|
14
|
+
i += 1
|
15
|
+
end
|
16
|
+
ary
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
module Chem
|
23
|
+
|
24
|
+
module Atom
|
25
|
+
attr_accessor :rings
|
26
|
+
end
|
27
|
+
|
28
|
+
module Molecule
|
29
|
+
|
30
|
+
def f_dfs node, path, max, &block
|
31
|
+
if not path.length > max
|
32
|
+
yield path
|
33
|
+
self.adjacent_to(node).each do |bond, n|
|
34
|
+
next if n.element == :H
|
35
|
+
if not path.include?(n)
|
36
|
+
path.push(n)
|
37
|
+
f_dfs(n, path, max, &block)
|
38
|
+
path.pop
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# ELEMNUM = {
|
45
|
+
# :C => 0,
|
46
|
+
# :N => 1,
|
47
|
+
# :O => 2,
|
48
|
+
# :P => 4}
|
49
|
+
# ELEMNUM.default = 32
|
50
|
+
|
51
|
+
ELEMNUM = Element2Number.inject({}) do |ret, (elem, num)|
|
52
|
+
ret[elem] = 1 << num
|
53
|
+
ret
|
54
|
+
end
|
55
|
+
ELEMNUM.default = 32
|
56
|
+
|
57
|
+
#
|
58
|
+
def fingerprint(max = 3, n_bits = 32)
|
59
|
+
|
60
|
+
find_sssr.each do |rings|
|
61
|
+
len = rings.length
|
62
|
+
rings.each do |atom|
|
63
|
+
(atom.rings ||= []) << len
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
fp = 0
|
68
|
+
set = Set.new
|
69
|
+
|
70
|
+
nodes.each do |node|
|
71
|
+
f_dfs(node, [node], max) do |path|
|
72
|
+
# Exclude unwanted path
|
73
|
+
key = path.collect{|atom| atom.element.to_s}.join(".")
|
74
|
+
next if set.include?(key)
|
75
|
+
|
76
|
+
set.add(key)
|
77
|
+
set.add(path.reverse.collect{|atom| atom.element.to_s}.join("."))
|
78
|
+
# seed calculation
|
79
|
+
seed = 0
|
80
|
+
path.each_with_index do |atom, idx|
|
81
|
+
seed += (1 << ( 5 * idx)) *
|
82
|
+
ELEMNUM[atom.element] *
|
83
|
+
(atom.rings.nil? ? 1 : (1 << atom.rings.length))
|
84
|
+
end
|
85
|
+
srand(seed)
|
86
|
+
1.times do |n|
|
87
|
+
fp |= 1 << rand(n_bits)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
fp
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
|
data/lib/chem/utils/geometry.rb
CHANGED
@@ -3,6 +3,14 @@ module Chem
|
|
3
3
|
|
4
4
|
module Molecule
|
5
5
|
|
6
|
+
# Return size of molecule with Array [x, y, z]
|
7
|
+
def box_size
|
8
|
+
size_x = nodes().max{|a1, a2| a1.x <=> a2.x}.x - nodes().min{|a1, a2| a1.x <=> a2.x}.x
|
9
|
+
size_y = nodes().max{|a1, a2| a1.y <=> a2.y}.y - nodes().min{|a1, a2| a1.y <=> a2.y}.y
|
10
|
+
size_z = nodes().max{|a1, a2| a1.z <=> a2.z}.z - nodes().min{|a1, a2| a1.z <=> a2.z}.z
|
11
|
+
[size_x, size_y, size_z]
|
12
|
+
end
|
13
|
+
|
6
14
|
# Automatically assigns 2-dimensional geometry
|
7
15
|
# This method may implicitly called from ChemRuby
|
8
16
|
# if nil is assigned to Atom#x
|
@@ -0,0 +1,303 @@
|
|
1
|
+
# Copyright (C) 2005, 2006 KADOWAKI Tadashi <tadakado@gmail.com>
|
2
|
+
# TANAKA Nobuya <nobuya.tanaka@gmail.com>
|
3
|
+
# APODACA Richard <r_apodaca@users.sf.net>
|
4
|
+
|
5
|
+
require 'net/http'
|
6
|
+
require 'net/ftp'
|
7
|
+
require 'date'
|
8
|
+
require 'rexml/document'
|
9
|
+
require 'cgi'
|
10
|
+
|
11
|
+
module Chem
|
12
|
+
|
13
|
+
module NetUtils
|
14
|
+
|
15
|
+
def http_get(str)
|
16
|
+
Net::HTTP.get(URI.parse(str))
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.search_net(term, options)
|
22
|
+
case options[:db]
|
23
|
+
when :pubmed
|
24
|
+
Chem::NCBI::ESearch.query(term, options)
|
25
|
+
when :pubchem
|
26
|
+
Chem::NCBI::ESearch.query(term, options)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class NCBI
|
31
|
+
EUtilsURI = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
32
|
+
PubChemURI = 'http://pubchem.ncbi.nlm.nih.gov/'
|
33
|
+
|
34
|
+
# EInfo
|
35
|
+
module EInfo
|
36
|
+
extend Chem::NetUtils
|
37
|
+
|
38
|
+
EInfoURI = EUtilsURI + 'einfo.fcgi?'
|
39
|
+
|
40
|
+
def self.query(params = {})
|
41
|
+
if params.empty?
|
42
|
+
xml = REXML::Document.new(http_get(EInfoURI))
|
43
|
+
dbs = []
|
44
|
+
xml.elements.each("eInfoResult/DbList/DbName") do |element|
|
45
|
+
dbs << element.text
|
46
|
+
end
|
47
|
+
dbs
|
48
|
+
elsif params[:db]
|
49
|
+
DbInfo.new(REXML::Document.new(http_get(EInfoURI + "db=" + params[:db].to_s)))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class DbInfo
|
54
|
+
attr_reader :db_name, :menu_name, :description, :count, :last_update
|
55
|
+
def initialize(xml)
|
56
|
+
@db_name = xml.elements["eInfoResult/DbInfo/DbName"].text
|
57
|
+
@menu_name = xml.elements["eInfoResult/DbInfo/MenuName"].text
|
58
|
+
@description = xml.elements["eInfoResult/DbInfo/Description"].text
|
59
|
+
@count = xml.elements["eInfoResult/DbInfo/Count"].text
|
60
|
+
@last_update = xml.elements["eInfoResult/DbInfo/LastUpdate"].text
|
61
|
+
@fields = []
|
62
|
+
xml.elements.each("eInfoResult/DbInfo/FieldList/Field") do |element|
|
63
|
+
@fields << {
|
64
|
+
:name => element.elements["Name" ].text,
|
65
|
+
:full_name => element.elements["FullName" ].text,
|
66
|
+
:description => element.elements["Description"].text,
|
67
|
+
:term_count => element.elements["TermCount" ].text,
|
68
|
+
:is_date => element.elements["IsDate" ].text == "Y",
|
69
|
+
:is_numerical => element.elements["IsNumerical"].text == "Y",
|
70
|
+
:single_token => element.elements["SingleToken"].text == "Y",
|
71
|
+
:hierarchy => element.elements["Hierarchy" ].text == "Y",
|
72
|
+
:is_hidden => element.elements["IsHidden" ].text == "Y",
|
73
|
+
}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end # EInfo module
|
79
|
+
|
80
|
+
module ESearch
|
81
|
+
extend Chem::NetUtils
|
82
|
+
|
83
|
+
ESearchURI = EUtilsURI + 'esearch.fcgi?'
|
84
|
+
|
85
|
+
def self.search(params)
|
86
|
+
result = {}
|
87
|
+
uri = ESearchURI + params.collect{|key, value| key.to_s + "=" + CGI.escape(value.to_s)}.join("&")
|
88
|
+
doc = http_get(uri)
|
89
|
+
xml = REXML::Document.new(doc)
|
90
|
+
raise "Error no result" unless xml.elements["eSearchResult/ERROR"].nil?
|
91
|
+
|
92
|
+
result[:count] = xml.elements["eSearchResult/Count" ].text.to_i
|
93
|
+
result[:retmax] = xml.elements["eSearchResult/RetMax" ].text.to_i
|
94
|
+
result[:retstart] = xml.elements["eSearchResult/RetStart"].text.to_i
|
95
|
+
|
96
|
+
result[:id_list] = list = []
|
97
|
+
xml.elements.each("eSearchResult/IdList/Id") do |element|
|
98
|
+
list << element.text.to_i
|
99
|
+
end
|
100
|
+
result
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
module PCFetch
|
106
|
+
|
107
|
+
extend Chem::NetUtils
|
108
|
+
|
109
|
+
PCFetchURI = PubChemURI + 'pc_fetch/pc_fetch.cgi?'
|
110
|
+
|
111
|
+
def self.fetch(params)
|
112
|
+
raise "You need to specify :retmode" if params[:retmode].nil?
|
113
|
+
|
114
|
+
uri = PCFetchURI + params.collect{|key, value| key.to_s + "=" + value.to_s}.join("&")
|
115
|
+
doc = http_get(uri)
|
116
|
+
num = 0
|
117
|
+
if m = /pubchem\/\.fetch\/(\d+).sdf/.match(doc)
|
118
|
+
puts 'ftp'
|
119
|
+
num = m[1].to_i
|
120
|
+
elsif m = /reqid=(\d+)/.match(doc)
|
121
|
+
puts 'http'
|
122
|
+
num = m[1].to_i
|
123
|
+
else
|
124
|
+
raise "Cannot retrieve file"
|
125
|
+
end
|
126
|
+
|
127
|
+
params[:localfilename] ||= "%s%d.sdf" % [params[:db], params[:id]]
|
128
|
+
|
129
|
+
begin
|
130
|
+
ftp = Net::FTP.open("ftp.ncbi.nih.gov")
|
131
|
+
ftp.login
|
132
|
+
ftp.gettextfile("pubchem/.fetch/%d.sdf" % num, params[:localfilename])
|
133
|
+
rescue Net::FTPPermError
|
134
|
+
puts "error : num"
|
135
|
+
retry
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
|
142
|
+
module ESummary
|
143
|
+
|
144
|
+
extend Chem::NetUtils
|
145
|
+
ESummaryURI = EUtilsURI + 'esummary.fcgi?'
|
146
|
+
|
147
|
+
def self.get(params)
|
148
|
+
uri = ESummaryURI + params.collect{|key, value| key.to_s + "=" + value.to_s}.join("&")
|
149
|
+
http_get(uri)
|
150
|
+
end
|
151
|
+
|
152
|
+
def self.get_parsed(params)
|
153
|
+
tree = {}
|
154
|
+
xml = REXML::Document.new(get(params))
|
155
|
+
xml.elements.each("eSummaryResult/DocSum/Item") do |element|
|
156
|
+
tree[element.attributes["Name"]] =
|
157
|
+
case element.attributes["Type"]
|
158
|
+
when "String"
|
159
|
+
element.text
|
160
|
+
when "Integer"
|
161
|
+
element.text.to_i
|
162
|
+
when "Date"
|
163
|
+
element.text
|
164
|
+
when "List"
|
165
|
+
ary = []
|
166
|
+
element.elements.each("Item"){|el|
|
167
|
+
ary << case el.attributes["Type"]
|
168
|
+
when "String"
|
169
|
+
el.text
|
170
|
+
when "Integer"
|
171
|
+
el.text.to_i
|
172
|
+
else
|
173
|
+
""
|
174
|
+
end
|
175
|
+
}
|
176
|
+
ary
|
177
|
+
end
|
178
|
+
end
|
179
|
+
tree
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# obsolete
|
184
|
+
class EFetch
|
185
|
+
|
186
|
+
include Chem::NetUtils
|
187
|
+
|
188
|
+
EFetchURI = EUtilsURI + 'efetch.fcgi' + '?'
|
189
|
+
|
190
|
+
def initialize(query_key, web_env)
|
191
|
+
uri = [PCFetchURI]
|
192
|
+
uri << 'db=pccompound'
|
193
|
+
uri << '&WebEnv=' + web_env
|
194
|
+
uri << '&query_key=' + query_key
|
195
|
+
uri << '&retmode=sdf'
|
196
|
+
uri << '&compression=none'
|
197
|
+
#"retmode=xml&"
|
198
|
+
#uri = EFetchURI + "&db=pccompound&retmode=xml&WebEnv=" + web_env + "&query_key=" + query_key + "&tool=oscar3&email=nobuya.tanaka%40gmail.com"
|
199
|
+
p uri.join
|
200
|
+
doc = http_get(uri.join)
|
201
|
+
if m = /bookmarking this page or by going to<\/p><p><a href=\"([^"]+)/.match(doc)
|
202
|
+
sleep 1
|
203
|
+
p m[1]
|
204
|
+
d = http_get(m[1])
|
205
|
+
m = /"ftp:\/\/([^"]+)/.match(d)
|
206
|
+
p m[1]
|
207
|
+
require 'net/ftp'
|
208
|
+
begin
|
209
|
+
sleep 0.5
|
210
|
+
ftp = Net::FTP.open("ftp.ncbi.nih.gov")
|
211
|
+
ftp.login
|
212
|
+
ftp.gettextfile("pubchem/.fetch/606874731181068179.sdf")
|
213
|
+
rescue Net::FTPPermError
|
214
|
+
sleep 1
|
215
|
+
puts 'OK'
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def self.fetch_all(query_key, web_env)
|
222
|
+
new(query_key, web_env)
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
if __FILE__ == $0
|
231
|
+
# search PubChem compounds using InChI
|
232
|
+
query = {
|
233
|
+
:db => :pccompound,
|
234
|
+
:term => '"InChI=1/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/f/h11H"'
|
235
|
+
}
|
236
|
+
|
237
|
+
p Chem::NCBI::ESearch.search(query)
|
238
|
+
|
239
|
+
# search PubChem substance with term
|
240
|
+
|
241
|
+
query = {
|
242
|
+
:db => "pcsubstance",
|
243
|
+
:term => 'benzene'
|
244
|
+
}
|
245
|
+
|
246
|
+
p Chem::NCBI::ESearch.search(query)
|
247
|
+
|
248
|
+
# search PubChem substance with complete synonyms
|
249
|
+
|
250
|
+
query = {
|
251
|
+
:db => "pcsubstance",
|
252
|
+
:term => 'benzene',
|
253
|
+
:field => 'CSYN',
|
254
|
+
}
|
255
|
+
|
256
|
+
p Chem::NCBI::ESearch.search(query)
|
257
|
+
|
258
|
+
# search PubMed
|
259
|
+
query = {
|
260
|
+
:db => "pubmed",
|
261
|
+
:term => "asthma[mh]+OR+hay+fever[mh]",
|
262
|
+
}
|
263
|
+
|
264
|
+
p query
|
265
|
+
p Chem::NCBI::ESearch.search(query)
|
266
|
+
|
267
|
+
# Retrieving more entries
|
268
|
+
|
269
|
+
query = {
|
270
|
+
:db => "pubmed",
|
271
|
+
:term => "cancer",
|
272
|
+
:reldate => 60,
|
273
|
+
:datetype => "edat",
|
274
|
+
:retmax => 100,
|
275
|
+
:retstart => 300,
|
276
|
+
}
|
277
|
+
|
278
|
+
p Chem::NCBI::ESearch.search(query)
|
279
|
+
|
280
|
+
# Retrieving Eutils database information
|
281
|
+
p Chem::NCBI::EInfo.query
|
282
|
+
|
283
|
+
|
284
|
+
# Rerieving information about PubChem Compounds
|
285
|
+
|
286
|
+
pp Chem::NCBI::EInfo.query(:db => :pccompound)
|
287
|
+
|
288
|
+
# Retrieving pccompound using PC_Fetch
|
289
|
+
# Not recommended
|
290
|
+
# It seems that EFetch does not accept db=pccompound
|
291
|
+
# PCFetch could be alternatives for EFetch.
|
292
|
+
|
293
|
+
100.upto(110) do |n|
|
294
|
+
puts n
|
295
|
+
Chem::NCBI::PCFetch::fetch({:db => :pccompound, :id => n, :retmode => :sdf})
|
296
|
+
end
|
297
|
+
|
298
|
+
# Retrieving parsed summary for entries
|
299
|
+
# CID:100
|
300
|
+
p Chem::NCBI::ESummary::get_parsed({:db => :pccompound, :id => 100})
|
301
|
+
|
302
|
+
end
|
303
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Once
|
2
|
+
|
3
|
+
def self.append_features(base)
|
4
|
+
super
|
5
|
+
base.extend(ClassMethods)
|
6
|
+
end
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def once(*ids) # :nodoc:
|
10
|
+
for id in ids
|
11
|
+
module_eval <<-"end;", __FILE__, __LINE__
|
12
|
+
alias_method :__#{id.to_i}__, :#{id.to_s}
|
13
|
+
private :__#{id.to_i}__
|
14
|
+
def #{id.to_s}(*args, &block)
|
15
|
+
if defined? @__#{id.to_i}__
|
16
|
+
@__#{id.to_i}__
|
17
|
+
elsif ! self.frozen?
|
18
|
+
@__#{id.to_i}__ ||= __#{id.to_i}__(*args, &block)
|
19
|
+
else
|
20
|
+
__#{id.to_i}__(*args, &block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end;
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# Copyright (C) 2006 Richard L. Apodaca
|
2
|
+
# Nobuya Tanaka
|
3
|
+
|
4
|
+
require 'chem'
|
5
|
+
|
6
|
+
module Chem
|
7
|
+
|
8
|
+
module Atom
|
9
|
+
attr_accessor :ob_atom
|
10
|
+
end
|
11
|
+
|
12
|
+
module Molecule
|
13
|
+
|
14
|
+
attr_reader :ob_mol
|
15
|
+
def ob_save_as(path, filetype)
|
16
|
+
use_open_babel if @ob_mol.nil?
|
17
|
+
conv = ::OpenBabel::OBConversion.new
|
18
|
+
conv.set_out_format(filetype.to_s)
|
19
|
+
conv.write_file(@ob_mol, path)
|
20
|
+
end
|
21
|
+
|
22
|
+
def ob_export_as(filetype)
|
23
|
+
use_open_babel if @ob_mol.nil?
|
24
|
+
conv = ::OpenBabel::OBConversion.new
|
25
|
+
conv.set_out_format(filetype.to_s)
|
26
|
+
conv.write_string(@ob_mol)
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_inchi
|
30
|
+
use_open_babel
|
31
|
+
ob_export_as("inchi").chop
|
32
|
+
end
|
33
|
+
|
34
|
+
# set OpenBabel OBMol object to instance variable @ob_mol
|
35
|
+
def use_open_babel
|
36
|
+
begin
|
37
|
+
require 'openbabel'
|
38
|
+
rescue Exception
|
39
|
+
require 'OpenBabel'
|
40
|
+
end
|
41
|
+
@ob_mol = ::OpenBabel::OBMol.new
|
42
|
+
nodes.each do |node|
|
43
|
+
atom = @ob_mol.new_atom
|
44
|
+
atom.set_atomic_num(Element2Number[node.element])
|
45
|
+
atom.set_vector(node.x.to_f, node.y.to_f, node.z.to_f)
|
46
|
+
node.ob_atom = atom
|
47
|
+
end
|
48
|
+
edges.each do |bond, atom1, atom2|
|
49
|
+
@ob_mol.add_bond(
|
50
|
+
atom1.ob_atom.get_idx,
|
51
|
+
atom2.ob_atom.get_idx,
|
52
|
+
bond.v.to_i
|
53
|
+
)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
module OpenBabel
|
60
|
+
|
61
|
+
def self.parse_smiles(smiles)
|
62
|
+
|
63
|
+
require 'openbabel'
|
64
|
+
|
65
|
+
converter = ::OpenBabel::OBConversion.new
|
66
|
+
converter.set_in_format("smi")
|
67
|
+
mol = ::OpenBabel::OBMol.new
|
68
|
+
converter.read_string(mol, smiles)
|
69
|
+
OBMolecule.new(mol)
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.load_sdf(path)
|
73
|
+
require 'openbabel'
|
74
|
+
|
75
|
+
conv = ::OpenBabel::OBConversion.new
|
76
|
+
conv.set_in_format("sdf")
|
77
|
+
mol = ::OpenBabel::OBMol.new
|
78
|
+
cond = conv.read_file(mol, path)
|
79
|
+
mols = [OBMolecule.new(mol)]
|
80
|
+
while cond
|
81
|
+
mol = ::OpenBabel::OBMol.new
|
82
|
+
cond = conv.read(mol)
|
83
|
+
mols << OBMolecule.new(mol) if cond
|
84
|
+
end
|
85
|
+
mols
|
86
|
+
end
|
87
|
+
|
88
|
+
class OBSmarts
|
89
|
+
|
90
|
+
def initialize(smarts)
|
91
|
+
require 'openbabel'
|
92
|
+
@pat = ::OpenBabel::OBSmartsPattern.new
|
93
|
+
@pat.init(smarts)
|
94
|
+
@pat
|
95
|
+
end
|
96
|
+
|
97
|
+
def match(mol)
|
98
|
+
mol.use_open_babel if mol.ob_mol.nil?
|
99
|
+
@pat.match(mol.ob_mol)
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_umap_list
|
103
|
+
@pat.get_umap_list.collect{|ary| ary.collect{|i| i.to_i}}
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.parse_smarts(smarts)
|
109
|
+
OBSmarts.new(smarts)
|
110
|
+
end
|
111
|
+
|
112
|
+
# load_as(path, filetype)
|
113
|
+
# path : path to input file
|
114
|
+
# filetype : "alc", "bgf"
|
115
|
+
# see http://openbabel.sourceforge.net/wiki/Babel
|
116
|
+
def self.load_as(path, filetype)
|
117
|
+
conv = ::OpenBabel::OBConversion.new
|
118
|
+
conv.set_in_format(filetype.to_s)
|
119
|
+
mol = ::OpenBabel::OBMol.new
|
120
|
+
conv.read_file(mol, path)
|
121
|
+
OBMolecule.new(mol)
|
122
|
+
end
|
123
|
+
|
124
|
+
module OBAtom
|
125
|
+
include Atom
|
126
|
+
end
|
127
|
+
|
128
|
+
class OBMolecule
|
129
|
+
include Molecule
|
130
|
+
|
131
|
+
attr_reader :ob_mol
|
132
|
+
attr_reader :nodes
|
133
|
+
def initialize(ob_mol)
|
134
|
+
@ob_mol = ob_mol
|
135
|
+
@nodes = []
|
136
|
+
1.upto(@ob_mol.num_atoms) do |n|
|
137
|
+
atom = @ob_mol.get_atom(n)
|
138
|
+
@nodes << atom.extend(OBAtom)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
end # OpenBabel module
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
if __FILE__ == $0
|
148
|
+
mol = SMILES("CCC")
|
149
|
+
mol.use_open_babel
|
150
|
+
p mol.num_atoms
|
151
|
+
p mol.num_bonds
|
152
|
+
p mol.get_mol_wt
|
153
|
+
p mol.get_exact_mass
|
154
|
+
# p mol.add_hydrogens # BUS Error !?
|
155
|
+
elsif false
|
156
|
+
ob = Chem::OpenBabel.parse_smiles('CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C')
|
157
|
+
p ob.get_mol_wt
|
158
|
+
p ob.num_atoms
|
159
|
+
p ob.num_bonds
|
160
|
+
# read
|
161
|
+
# is_last
|
162
|
+
|
163
|
+
atom = ob.new_atom
|
164
|
+
|
165
|
+
# creating new molecule
|
166
|
+
|
167
|
+
mol = OBMol.new
|
168
|
+
atom1 = mol.add_atom
|
169
|
+
atom2 = mol.add_atom
|
170
|
+
|
171
|
+
# Atom
|
172
|
+
|
173
|
+
atom1 = mol.get_first_atom
|
174
|
+
atom1 = mol.get_atom(1)
|
175
|
+
|
176
|
+
# Atom setter and getter
|
177
|
+
|
178
|
+
atom1.set_atomic_num(6) # Carbon
|
179
|
+
|
180
|
+
atom1.get_atomic_mass # Carbon : 12.0107
|
181
|
+
|
182
|
+
atom1.set_aromatic # aromatic
|
183
|
+
atom1.unset_aromatic # not aromatic
|
184
|
+
atom1.is_aromatic # return true or false
|
185
|
+
|
186
|
+
atom1.is_amide_nitrogen # return true or false
|
187
|
+
|
188
|
+
|
189
|
+
# atom count starts from 1 (not 0)
|
190
|
+
# mol.add_bond(0, 1, 1) fails!
|
191
|
+
mol.add_bond(1, 2, 1)# from, to, bond_order
|
192
|
+
# bond count starts from 0 (not 1)
|
193
|
+
|
194
|
+
bond = mol.get_bond(0)
|
195
|
+
bond.is_double
|
196
|
+
bond.is_single
|
197
|
+
bond.is_amide
|
198
|
+
bond.get_bond_order # bond.get_bo
|
199
|
+
|
200
|
+
# bond length
|
201
|
+
|
202
|
+
bond.get_length
|
203
|
+
bond.set_length# arguments ?
|
204
|
+
end
|