bio-chembl 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'bio/version'
4
+ require 'curb'
5
+ require 'nokogiri'
6
+
7
+ #
8
+ module BioChEMBL
9
+
10
+ def self.website
11
+ "https://www.ebi.ac.uk/chembl/"
12
+ end
13
+
14
+ # BioChEMBL.to_array(aTarget.synonyms) #=> []
15
+ def self.to_array(str)
16
+ str.to_s.split('; ')
17
+ end
18
+
19
+ end
20
+
21
+
22
+ require 'bio-chembl/chemblid.rb'
23
+ require 'bio-chembl/rest_client.rb'
24
+
25
+ require 'bio-chembl/datamodel.rb'
26
+ require 'bio-chembl/compound.rb'
27
+ require 'bio-chembl/target.rb'
28
+ require 'bio-chembl/assay.rb'
29
+ require 'bio-chembl/bioactivity.rb'
30
+
@@ -0,0 +1,102 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'bio/version'
4
+ require 'curb'
5
+ require 'nokogiri'
6
+
7
+
8
+
9
+
10
+ module BioChEMBL
11
+
12
+ # ChEMBL ID
13
+ #
14
+ # CHEMBL1
15
+ #
16
+ # cid = BioChEMBL::ChEMBLID.new("CHEMBL1")
17
+ # cid.is_compound? #=> true
18
+ # cid.resolve #=> aBioChEMBL::Compound
19
+ #
20
+ class ChEMBLID < String
21
+
22
+ attr_accessor :data_type
23
+
24
+ def self.validate_chemblId(str)
25
+ unless str =~ /^CHEMBL\d+$/
26
+ raise Exception, "Invalid ChEMBL ID."
27
+ end
28
+ end
29
+
30
+ def initialize(str)
31
+ @data_type = nil
32
+ self.validate_chemblId(str)
33
+ super(str)
34
+ end
35
+
36
+
37
+ def resolve
38
+ case @data_type
39
+ when Compound
40
+ Compound.find(self.to_s)
41
+ when Target
42
+ Target.find(self.to_s)
43
+ when Assay
44
+ Assay.find(self.to_s)
45
+ else
46
+ begin
47
+ Compound.find(self.to_s)
48
+ rescue
49
+ end
50
+ begin
51
+ Target.find(self.to_s)
52
+ rescue
53
+ end
54
+ begin
55
+ Assay.find(self.to_s)
56
+ rescue
57
+ end
58
+ end
59
+ end
60
+
61
+ def is_compound?
62
+ if @data_type == Compound
63
+ return true
64
+ else
65
+ if Compound.find(self.to_s)
66
+ @data_type = Compound
67
+ return true
68
+ else
69
+ return false
70
+ end
71
+ end
72
+ end
73
+
74
+ def is_target?
75
+ if @data_type == Assay
76
+ return true
77
+ else
78
+ if Assay.find(self.to_s)
79
+ @data_type = Assay
80
+ return true
81
+ else
82
+ return false
83
+ end
84
+ end
85
+ end
86
+
87
+ def is_assay?
88
+ if @data_type == Assay
89
+ return true
90
+ else
91
+ if Assay.find(self.to_s)
92
+ @data_type = Assay
93
+ return true
94
+ else
95
+ return false
96
+ end
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+ end
@@ -0,0 +1,157 @@
1
+ require 'nokogiri'
2
+ require 'bio-chembl/datamodel.rb'
3
+
4
+ module BioChEMBL
5
+
6
+ # ChEMBL Compound Data Container and Parser
7
+ #
8
+ # XML Data string
9
+ # <compound>
10
+ # <chemblId>CHEMBL1</chemblId>
11
+ # <knownDrug>No</knownDrug>
12
+ # <medChemFriendly>Yes</medChemFriendly>
13
+ # <passesRuleOfThree>No</passesRuleOfThree>
14
+ # <molecularFormula>C32H32O8</molecularFormula>
15
+ # <smiles>COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56</smiles>
16
+ # <stdInChiKey>GHBOEFUAGSHXPO-XZOTUCIWSA-N</stdInChiKey>
17
+ # <numRo5Violations>1</numRo5Violations>
18
+ # <rotatableBonds>2</rotatableBonds>
19
+ # <molecularWeight>544.59167</molecularWeight>
20
+ # <alogp>3.627</alogp>
21
+ # <acdLogp>7.669</acdLogp>
22
+ # <acdLogd>7.669</acdLogd>
23
+ # </compound>
24
+ #
25
+ # Usage
26
+ # ```cpd = BioChEMBL::Compound.find("CHEMBL1")
27
+ # cpd.chemblId #=> "CHEMLB1"
28
+ # cpd.smiles
29
+ #
30
+ # cpd2 = BioChEMBL::Compound.find_all_by_smiles(cpd.smile)
31
+ #
32
+ # cpd3 = BioChEMBL::Compound.parse(xml)
33
+ # ```
34
+ class Compound
35
+ extend BioChEMBL::DataModel
36
+
37
+ ATTRIBUTES = [
38
+ :chemblId,
39
+ :knownDrug,
40
+ :medChemFriendly,
41
+ :passesRuleOfThree,
42
+ :molecularFormula,
43
+ :smiles,
44
+ :stdInChiKey,
45
+ :species,
46
+ :numRo5Violations,
47
+ :rotatableBonds,
48
+ :molecularWeight,
49
+ :alogp,
50
+ :acdAcidicPka,
51
+ :acdLogp,
52
+ :acdLogd
53
+ ]
54
+
55
+ # aBioChEMBL::Compound instance have attribute accessors.
56
+ # Values of all attributes are in String.
57
+ set_attr_accessors(ATTRIBUTES)
58
+
59
+ #
60
+ # BioChEMBL::Compound.parse(doc)
61
+ def self.parse(str)
62
+ case str
63
+ when /^</
64
+ format = 'xml'
65
+ when /^\{/
66
+ format = 'json'
67
+ else
68
+ raise ArgumentError, "Unexpected file format: #{str.inspect}"
69
+ end
70
+ begin
71
+ eval "self.parse_#{format}(str)"
72
+ rescue
73
+ raise NoMethodError
74
+ end
75
+ end
76
+
77
+ # XML
78
+ # <compound>
79
+ def self.parse_xml(str)
80
+ xml = Nokogiri::XML(str)
81
+ this = new
82
+ eval set_attr_values(ATTRIBUTES)
83
+ this
84
+ end
85
+
86
+ # XML
87
+ # <list><compound> ...
88
+ def self.parse_list_xml(str)
89
+ xmls = Nokogiri::XML(str)
90
+ xmls.xpath("/list/compound").map do |cpd|
91
+ self.parse_xml(cpd.to_s)
92
+ end
93
+ end
94
+
95
+ # JSON
96
+ def self.parse_json(str)
97
+ raise NotImplementedError
98
+ end
99
+
100
+ # RDF
101
+ def self.parse_rdf(str)
102
+ raise NotImplementedError
103
+ end
104
+
105
+
106
+ # Compound.find(chemblId)
107
+ # Find a compound data by a ChEMBL ID
108
+ def self.find(chemblId)
109
+ self.parse_xml(REST.new.compounds(chemblId))
110
+ end
111
+
112
+ # Compound.find_by_smiles(smiles)
113
+ # Find a compound data by a SMILES
114
+ def self.find_by_smiles(smiles)
115
+ self.find_all_by_smiles(smiles).first
116
+ end
117
+
118
+ # Compound.find_all_by_smiles(smiles)
119
+ # Find compounds by a SMILES.
120
+ def self.find_all_by_smiles(smiles)
121
+ self.parse_list_xml(REST.new.compounds_smiles(smiles))
122
+ end
123
+
124
+ # Compound.find_by_stdinchikey(stdinchikey)
125
+ # Find a compound data by a StdInChiKey
126
+ def self.find_by_stdinchikey(stdinchikey)
127
+ self.parse_xml(REST.new.compounds_stdinchikey(stdinchikey))
128
+ end
129
+
130
+ # Compound.find_all_by_substructure(smiles)
131
+ # Substructure Search by a SMILES
132
+ def self.find_all_by_substructure(smiles)
133
+ self.parse_list_xml(REST.new.compounds_substructure(smiles))
134
+ end
135
+
136
+ # Compound.find_similarity(smiles_with_similarity)
137
+ # Search compounds by a SMILES with similarity
138
+ def self.find_all_by_similarity(smiles_with_similarity)
139
+ self.parse_list_xml(REST.new.compounds_similarity(smiles_with_similarity))
140
+ end
141
+
142
+
143
+ # new
144
+ def initialize(chemblId = nil)
145
+ @chemblId = chemblId
146
+ end
147
+
148
+ # Resolve the compound data by given ChEMBL ID
149
+ def resolve
150
+ resolved = self.class.find(@chemblId)
151
+ ATTRIBUTES.each do |attr|
152
+ eval "@#{attr} = resolved.#{attr}"
153
+ end
154
+ end
155
+ end
156
+
157
+ end
@@ -0,0 +1,25 @@
1
+ require 'nokogiri'
2
+
3
+
4
+ module BioChEMBL
5
+
6
+ # BioChEMBL::DataModel
7
+ #
8
+ module DataModel
9
+
10
+ def set_attr_accessors(attributes)
11
+ attributes.each do |attr|
12
+ eval "attr_accessor :#{attr}"
13
+ end
14
+ end
15
+
16
+
17
+ def set_attr_values(attributes)
18
+ attributes.map do |attr|
19
+ "this.#{attr} = xml.xpath('/#{self.to_s.split('::').last.downcase}/#{attr}').text"
20
+ end.join("\n")
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,40 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'nokogiri'
4
+
5
+
6
+ module BioChEMBL
7
+ class REST
8
+ # serv = BioChEMBL::REST::Server.new
9
+ #
10
+ class Server
11
+ def initialize
12
+
13
+ end
14
+
15
+ # serv.query(:status)
16
+ # serv.query(:compounds, 'CHEML1')
17
+ def query(action, args, options)
18
+ end
19
+
20
+ # /compounds/CHEMBL1
21
+ def compounds(chemblId)
22
+ end
23
+ # /compounds/CHEMBL1/image
24
+ def compounds_image(chemblId)
25
+ end
26
+ # /compounds/CHEMBL1/bioactivities
27
+ def comopunds_bioactivities(chemblId)
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+
34
+ class DB
35
+ class Query
36
+ def initialize
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,264 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'bio/version'
4
+ require 'curb'
5
+ require 'nokogiri'
6
+
7
+ require 'bio-chembl/compound.rb'
8
+ require 'bio-chembl/target.rb'
9
+ require 'bio-chembl/assay.rb'
10
+ require 'bio-chembl/bioactivity.rb'
11
+
12
+ module BioChEMBL
13
+
14
+ class REST
15
+
16
+ HOST_NAME = "www.ebi.ac.uk"
17
+ API_ROOT = "chemblws"
18
+ BASE_URI = "https://" + HOST_NAME + "/" + API_ROOT
19
+
20
+
21
+ # BioChEMBL::REST::ChEMBL_URI module
22
+ #
23
+ module ChEMBL_URI
24
+ #
25
+ def self.address(path)
26
+ "#{BASE_URI}/#{path}"
27
+ end
28
+
29
+
30
+ # BioChEMBL::REST::ChEMBL_URI.status
31
+ def self.status
32
+ # Example URL: http://www.ebi.ac.uk/chemblws/status/
33
+ address("status/")
34
+ end
35
+
36
+ # BioChEMBL::REST::ChEMBL_URI.compounds()
37
+ # compounds("CHEMBL1")
38
+ def self.compounds(chemblId = nil, arg = nil, params = nil)
39
+ if chemblId and arg == nil and params == nil
40
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/CHEMBL1
41
+ address("compounds/#{chemblId}")
42
+ elsif chemblId and arg == 'image' and params == nil
43
+ # Example URL: http://www.ebi.ac.uk/chemblws/compounds/CHEMBL192/image
44
+ address("compounds/#{chemblId}/#{arg}")
45
+ elsif chemblId and arg == 'image' and params
46
+ # Example URL with dimensions parameter: http://www.ebi.ac.uk/chemblws/compounds/CHEMBL192/image?dimensions=200
47
+ address("compounds/#{chemblId}/#{arg}?" + params.map {|k,v| "#{k}=#{v}"}.join("&"))
48
+ elsif chemblId and arg == 'bioactivities' and params == nil
49
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/CHEMBL2/bioactivities
50
+ address("compounds/#{chemblId}/#{arg}")
51
+ else
52
+ raise Exception, "Undefined address. ID: #{chemblId}, arg: #{arg}, params: #{params.inspect}"
53
+ end
54
+ end
55
+ # BioChEMBL::REST::ChEMBL_URI.compounds_stdinchikey()
56
+ def self.compounds_stdinchikey(stdinchikey)
57
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/stdinchikey/QFFGVLORLPOAEC-SNVBAGLBSA-N
58
+ address("compounds/stdinchikey/#{stdinchikey}")
59
+ end
60
+ # BioChEMBL::REST::ChEMBL_URI.compounds_smiles()
61
+ def self.compounds_smiles(smiles)
62
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/smiles/COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56
63
+ address("compounds/smiles/#{smiles}")
64
+ end
65
+ # BioChEMBL::REST::ChEMBL_URI.compounds_substructure()
66
+ def self.compounds_substructure(smiles)
67
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/substructure/COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56
68
+ address("compounds/substructure/#{smiles}")
69
+ end
70
+ # BioChEMBL::REST::ChEMBL_URI.compounds_similarity()
71
+ def self.compounds_similarity(smiles)
72
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/similarity/COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56/70
73
+ address("compounds/similarity/#{smiles}")
74
+ end
75
+
76
+ # BioChEMBL::REST::ChEMBL_URI.targets()
77
+ def self.targets(chemblId = nil, arg = nil)
78
+ if chemblId and arg == nil
79
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/CHEMBL2477
80
+ address("targets/#{chemblId}")
81
+ elsif chemblId and arg == 'bioactivities'
82
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/CHEMBL240/bioactivities
83
+ address("targets/#{chemblId}/bioactivities")
84
+ elsif chemblId == nil and arg == nil
85
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets
86
+ address("targets")
87
+ else
88
+ raise Exception, "Undefined."
89
+ end
90
+ end
91
+ # BioChEMBL::REST::ChEMBL_URI.targets_uniprot()
92
+ def self.targets_uniprot(uniprot_id)
93
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/uniprot/Q13936
94
+ address("targets/uniprot/#{uniprot_id}")
95
+ end
96
+ # BioChEMBL::REST::ChEMBL_URI.targets_refseq()
97
+ def self.targets_refseq(refseq_id)
98
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/refseq/NP_001128722
99
+ address("targets/refseq/#{refseq_id}")
100
+ end
101
+
102
+ # BioChEMBL::REST::ChEMBL_URI.assays()
103
+ def self.assays(chemblId, arg = nil)
104
+ if chemblId and arg == nil
105
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/assays/CHEMBL1217643
106
+ address("assays/#{chemblId}")
107
+ elsif chemblId and arg == 'bioactivities'
108
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/assays/CHEMBL1217643/bioactivities
109
+ address("assays/#{chemblId}/bioactivities")
110
+ else
111
+ raise Exception, "Undefined. ChEMBL ID: #{chemblId}, arg: #{arg}"
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # BioChEMBL::REST.website
118
+ def self.website
119
+ "https://www.ebi.ac.uk/chembldb/index.php/ws"
120
+ end
121
+
122
+ # BioChEMBL::REST.usage
123
+ def self.usage
124
+ ["a = Bio::DB::ChEMBL::REST.new",
125
+ "a.status => https://www.ebi.ac.uk/chembldb/index.php/ws#serviceStatus",
126
+ 'a.compounds("CHEMBL1") => http://www.ebi.ac.uk/chemblws/compounds/CHEMBL1',
127
+ 'a.compounds.stdinchikey("QFFGVLORLPOAEC-SNVBAGLBSA-N") => http://www.ebi.ac.uk/chemblws/compounds/stdinchikey/QFFGVLORLPOAEC-SNVBAGLBSA-N'
128
+ ].join("\n")
129
+ end
130
+
131
+ # BioChEMBL::REST.up? #=> true/false
132
+ def self.up?
133
+ if new.status == "UP"
134
+ true
135
+ else
136
+ false
137
+ end
138
+ end
139
+
140
+
141
+ # new
142
+ def initialize(uri = BASE_URI)
143
+ uri = URI.parse(uri) unless uri.kind_of?(URI)
144
+ @header = {
145
+ 'User-Agent' => "BioChEMBL, BioRuby/#{Bio::BIORUBY_VERSION_ID}"
146
+ }
147
+ @debug = false
148
+ end
149
+
150
+ # If true, shows debug information to $stderr.
151
+ attr_accessor :debug
152
+
153
+ # get HTTP GET URL
154
+ def get(url)
155
+ easy = Curl::Easy.new(url) do |c|
156
+ @header.each do |k,v|
157
+ c.headers[k] = v
158
+ end
159
+ end
160
+ easy.perform
161
+ easy
162
+ end
163
+
164
+ #
165
+ def prepare_return_value(response)
166
+ if @debug then
167
+ $stderr.puts "ChEMBL: #{response.inspect}"
168
+ end
169
+ case response.response_code
170
+ when 200
171
+ response.body_str
172
+ when 400
173
+ raise Exception, "400 Bad request #{response.inspect}"
174
+ when 404
175
+ raise Exception, "404 Not found #{response.inspect}"
176
+ when 500
177
+ raise Exception, "500 Service unavailable"
178
+ else
179
+ nil
180
+ end
181
+ end
182
+
183
+ # uri
184
+ def uri
185
+ ChEMBL_URI
186
+ end
187
+
188
+ # address
189
+ def address(path)
190
+ "#{BASE_URI}/#{path}"
191
+ end
192
+
193
+
194
+
195
+ # API methods
196
+
197
+ def get_body(method, args = [])
198
+ code = case args.size
199
+ when 0
200
+ "get(uri.#{method})"
201
+ when 1
202
+ "get(uri.#{method}(#{args[0].inspect}))"
203
+ when 2
204
+ "get(uri.#{method}(#{args[0].inspect}, #{args[1].inspect}))"
205
+ when 3
206
+ "get(uri.#{method}(#{args[0].inspect}, #{args[1].inspect}, #{args[2].inspect}))"
207
+ else
208
+ raise Exception, "method=#{method}, args=#{args.inspect}"
209
+ end
210
+
211
+ response = eval code
212
+ prepare_return_value(response)
213
+ end
214
+ private :get_body
215
+
216
+ #
217
+ def current_method_name
218
+ caller(1).first.scan(/`(.*)'/)[0][0].to_s
219
+ end
220
+ private :current_method_name
221
+
222
+
223
+ def status
224
+ get_body(current_method_name)
225
+ end
226
+
227
+ def compounds(chemblId = nil, action = nil, params = nil)
228
+ get_body(current_method_name, [chemblId, action, params])
229
+ end
230
+
231
+ def compounds_stdinchikey(stdinchikey)
232
+ get_body(current_method_name, [stdinchikey])
233
+ end
234
+
235
+ def compounds_smiles(smiles)
236
+ get_body(current_method_name, [smiles])
237
+ end
238
+
239
+ def compounds_substructure(smiles)
240
+ get_body(current_method_name, [smiles])
241
+ end
242
+
243
+ def compounds_similarity(smiles)
244
+ get_body(current_method_name, [smiles])
245
+ end
246
+
247
+ def targets(chemblId = nil, action = nil)
248
+ get_body(current_method_name, [chemblId, action])
249
+ end
250
+
251
+ def targets_uniprot(uniprot_id)
252
+ get_body(current_method_name, [uniprot_id])
253
+ end
254
+
255
+ def targets_refseq(refseq_id)
256
+ get_body(current_method_name, [refseq_id])
257
+ end
258
+
259
+ def assays(chemblId = nil, action = nil)
260
+ get_body(current_method_name, [chemblId, action])
261
+ end
262
+ end
263
+
264
+ end