bio-chembl 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,30 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'bio/version'
4
+ require 'curb'
5
+ require 'nokogiri'
6
+
7
+ #
8
+ module BioChEMBL
9
+
10
+ def self.website
11
+ "https://www.ebi.ac.uk/chembl/"
12
+ end
13
+
14
+ # BioChEMBL.to_array(aTarget.synonyms) #=> []
15
+ def self.to_array(str)
16
+ str.to_s.split('; ')
17
+ end
18
+
19
+ end
20
+
21
+
22
+ require 'bio-chembl/chemblid.rb'
23
+ require 'bio-chembl/rest_client.rb'
24
+
25
+ require 'bio-chembl/datamodel.rb'
26
+ require 'bio-chembl/compound.rb'
27
+ require 'bio-chembl/target.rb'
28
+ require 'bio-chembl/assay.rb'
29
+ require 'bio-chembl/bioactivity.rb'
30
+
@@ -0,0 +1,102 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'bio/version'
4
+ require 'curb'
5
+ require 'nokogiri'
6
+
7
+
8
+
9
+
10
+ module BioChEMBL
11
+
12
+ # ChEMBL ID
13
+ #
14
+ # CHEMBL1
15
+ #
16
+ # cid = BioChEMBL::ChEMBLID.new("CHEMBL1")
17
+ # cid.is_compound? #=> true
18
+ # cid.resolve #=> aBioChEMBL::Compound
19
+ #
20
+ class ChEMBLID < String
21
+
22
+ attr_accessor :data_type
23
+
24
+ def self.validate_chemblId(str)
25
+ unless str =~ /^CHEMBL\d+$/
26
+ raise Exception, "Invalid ChEMBL ID."
27
+ end
28
+ end
29
+
30
+ def initialize(str)
31
+ @data_type = nil
32
+ self.validate_chemblId(str)
33
+ super(str)
34
+ end
35
+
36
+
37
+ def resolve
38
+ case @data_type
39
+ when Compound
40
+ Compound.find(self.to_s)
41
+ when Target
42
+ Target.find(self.to_s)
43
+ when Assay
44
+ Assay.find(self.to_s)
45
+ else
46
+ begin
47
+ Compound.find(self.to_s)
48
+ rescue
49
+ end
50
+ begin
51
+ Target.find(self.to_s)
52
+ rescue
53
+ end
54
+ begin
55
+ Assay.find(self.to_s)
56
+ rescue
57
+ end
58
+ end
59
+ end
60
+
61
+ def is_compound?
62
+ if @data_type == Compound
63
+ return true
64
+ else
65
+ if Compound.find(self.to_s)
66
+ @data_type = Compound
67
+ return true
68
+ else
69
+ return false
70
+ end
71
+ end
72
+ end
73
+
74
+ def is_target?
75
+ if @data_type == Assay
76
+ return true
77
+ else
78
+ if Assay.find(self.to_s)
79
+ @data_type = Assay
80
+ return true
81
+ else
82
+ return false
83
+ end
84
+ end
85
+ end
86
+
87
+ def is_assay?
88
+ if @data_type == Assay
89
+ return true
90
+ else
91
+ if Assay.find(self.to_s)
92
+ @data_type = Assay
93
+ return true
94
+ else
95
+ return false
96
+ end
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+ end
@@ -0,0 +1,157 @@
1
+ require 'nokogiri'
2
+ require 'bio-chembl/datamodel.rb'
3
+
4
+ module BioChEMBL
5
+
6
+ # ChEMBL Compound Data Container and Parser
7
+ #
8
+ # XML Data string
9
+ # <compound>
10
+ # <chemblId>CHEMBL1</chemblId>
11
+ # <knownDrug>No</knownDrug>
12
+ # <medChemFriendly>Yes</medChemFriendly>
13
+ # <passesRuleOfThree>No</passesRuleOfThree>
14
+ # <molecularFormula>C32H32O8</molecularFormula>
15
+ # <smiles>COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56</smiles>
16
+ # <stdInChiKey>GHBOEFUAGSHXPO-XZOTUCIWSA-N</stdInChiKey>
17
+ # <numRo5Violations>1</numRo5Violations>
18
+ # <rotatableBonds>2</rotatableBonds>
19
+ # <molecularWeight>544.59167</molecularWeight>
20
+ # <alogp>3.627</alogp>
21
+ # <acdLogp>7.669</acdLogp>
22
+ # <acdLogd>7.669</acdLogd>
23
+ # </compound>
24
+ #
25
+ # Usage
26
+ # ```cpd = BioChEMBL::Compound.find("CHEMBL1")
27
+ # cpd.chemblId #=> "CHEMLB1"
28
+ # cpd.smiles
29
+ #
30
+ # cpd2 = BioChEMBL::Compound.find_all_by_smiles(cpd.smile)
31
+ #
32
+ # cpd3 = BioChEMBL::Compound.parse(xml)
33
+ # ```
34
+ class Compound
35
+ extend BioChEMBL::DataModel
36
+
37
+ ATTRIBUTES = [
38
+ :chemblId,
39
+ :knownDrug,
40
+ :medChemFriendly,
41
+ :passesRuleOfThree,
42
+ :molecularFormula,
43
+ :smiles,
44
+ :stdInChiKey,
45
+ :species,
46
+ :numRo5Violations,
47
+ :rotatableBonds,
48
+ :molecularWeight,
49
+ :alogp,
50
+ :acdAcidicPka,
51
+ :acdLogp,
52
+ :acdLogd
53
+ ]
54
+
55
+ # aBioChEMBL::Compound instance have attribute accessors.
56
+ # Values of all attributes are in String.
57
+ set_attr_accessors(ATTRIBUTES)
58
+
59
+ #
60
+ # BioChEMBL::Compound.parse(doc)
61
+ def self.parse(str)
62
+ case str
63
+ when /^</
64
+ format = 'xml'
65
+ when /^\{/
66
+ format = 'json'
67
+ else
68
+ raise ArgumentError, "Unexpected file format: #{str.inspect}"
69
+ end
70
+ begin
71
+ eval "self.parse_#{format}(str)"
72
+ rescue
73
+ raise NoMethodError
74
+ end
75
+ end
76
+
77
+ # XML
78
+ # <compound>
79
+ def self.parse_xml(str)
80
+ xml = Nokogiri::XML(str)
81
+ this = new
82
+ eval set_attr_values(ATTRIBUTES)
83
+ this
84
+ end
85
+
86
+ # XML
87
+ # <list><compound> ...
88
+ def self.parse_list_xml(str)
89
+ xmls = Nokogiri::XML(str)
90
+ xmls.xpath("/list/compound").map do |cpd|
91
+ self.parse_xml(cpd.to_s)
92
+ end
93
+ end
94
+
95
+ # JSON
96
+ def self.parse_json(str)
97
+ raise NotImplementedError
98
+ end
99
+
100
+ # RDF
101
+ def self.parse_rdf(str)
102
+ raise NotImplementedError
103
+ end
104
+
105
+
106
+ # Compound.find(chemblId)
107
+ # Find a compound data by a ChEMBL ID
108
+ def self.find(chemblId)
109
+ self.parse_xml(REST.new.compounds(chemblId))
110
+ end
111
+
112
+ # Compound.find_by_smiles(smiles)
113
+ # Find a compound data by a SMILES
114
+ def self.find_by_smiles(smiles)
115
+ self.find_all_by_smiles(smiles).first
116
+ end
117
+
118
+ # Compound.find_all_by_smiles(smiles)
119
+ # Find compounds by a SMILES.
120
+ def self.find_all_by_smiles(smiles)
121
+ self.parse_list_xml(REST.new.compounds_smiles(smiles))
122
+ end
123
+
124
+ # Compound.find_by_stdinchikey(stdinchikey)
125
+ # Find a compound data by a StdInChiKey
126
+ def self.find_by_stdinchikey(stdinchikey)
127
+ self.parse_xml(REST.new.compounds_stdinchikey(stdinchikey))
128
+ end
129
+
130
+ # Compound.find_all_by_substructure(smiles)
131
+ # Substructure Search by a SMILES
132
+ def self.find_all_by_substructure(smiles)
133
+ self.parse_list_xml(REST.new.compounds_substructure(smiles))
134
+ end
135
+
136
+ # Compound.find_similarity(smiles_with_similarity)
137
+ # Search compounds by a SMILES with similarity
138
+ def self.find_all_by_similarity(smiles_with_similarity)
139
+ self.parse_list_xml(REST.new.compounds_similarity(smiles_with_similarity))
140
+ end
141
+
142
+
143
+ # new
144
+ def initialize(chemblId = nil)
145
+ @chemblId = chemblId
146
+ end
147
+
148
+ # Resolve the compound data by given ChEMBL ID
149
+ def resolve
150
+ resolved = self.class.find(@chemblId)
151
+ ATTRIBUTES.each do |attr|
152
+ eval "@#{attr} = resolved.#{attr}"
153
+ end
154
+ end
155
+ end
156
+
157
+ end
@@ -0,0 +1,25 @@
1
+ require 'nokogiri'
2
+
3
+
4
+ module BioChEMBL
5
+
6
+ # BioChEMBL::DataModel
7
+ #
8
+ module DataModel
9
+
10
+ def set_attr_accessors(attributes)
11
+ attributes.each do |attr|
12
+ eval "attr_accessor :#{attr}"
13
+ end
14
+ end
15
+
16
+
17
+ def set_attr_values(attributes)
18
+ attributes.map do |attr|
19
+ "this.#{attr} = xml.xpath('/#{self.to_s.split('::').last.downcase}/#{attr}').text"
20
+ end.join("\n")
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,40 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'nokogiri'
4
+
5
+
6
+ module BioChEMBL
7
+ class REST
8
+ # serv = BioChEMBL::REST::Server.new
9
+ #
10
+ class Server
11
+ def initialize
12
+
13
+ end
14
+
15
+ # serv.query(:status)
16
+ # serv.query(:compounds, 'CHEML1')
17
+ def query(action, args, options)
18
+ end
19
+
20
+ # /compounds/CHEMBL1
21
+ def compounds(chemblId)
22
+ end
23
+ # /compounds/CHEMBL1/image
24
+ def compounds_image(chemblId)
25
+ end
26
+ # /compounds/CHEMBL1/bioactivities
27
+ def comopunds_bioactivities(chemblId)
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+
34
+ class DB
35
+ class Query
36
+ def initialize
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,264 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'bio/version'
4
+ require 'curb'
5
+ require 'nokogiri'
6
+
7
+ require 'bio-chembl/compound.rb'
8
+ require 'bio-chembl/target.rb'
9
+ require 'bio-chembl/assay.rb'
10
+ require 'bio-chembl/bioactivity.rb'
11
+
12
+ module BioChEMBL
13
+
14
+ class REST
15
+
16
+ HOST_NAME = "www.ebi.ac.uk"
17
+ API_ROOT = "chemblws"
18
+ BASE_URI = "https://" + HOST_NAME + "/" + API_ROOT
19
+
20
+
21
+ # BioChEMBL::REST::ChEMBL_URI module
22
+ #
23
+ module ChEMBL_URI
24
+ #
25
+ def self.address(path)
26
+ "#{BASE_URI}/#{path}"
27
+ end
28
+
29
+
30
+ # BioChEMBL::REST::ChEMBL_URI.status
31
+ def self.status
32
+ # Example URL: http://www.ebi.ac.uk/chemblws/status/
33
+ address("status/")
34
+ end
35
+
36
+ # BioChEMBL::REST::ChEMBL_URI.compounds()
37
+ # compounds("CHEMBL1")
38
+ def self.compounds(chemblId = nil, arg = nil, params = nil)
39
+ if chemblId and arg == nil and params == nil
40
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/CHEMBL1
41
+ address("compounds/#{chemblId}")
42
+ elsif chemblId and arg == 'image' and params == nil
43
+ # Example URL: http://www.ebi.ac.uk/chemblws/compounds/CHEMBL192/image
44
+ address("compounds/#{chemblId}/#{arg}")
45
+ elsif chemblId and arg == 'image' and params
46
+ # Example URL with dimensions parameter: http://www.ebi.ac.uk/chemblws/compounds/CHEMBL192/image?dimensions=200
47
+ address("compounds/#{chemblId}/#{arg}?" + params.map {|k,v| "#{k}=#{v}"}.join("&"))
48
+ elsif chemblId and arg == 'bioactivities' and params == nil
49
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/CHEMBL2/bioactivities
50
+ address("compounds/#{chemblId}/#{arg}")
51
+ else
52
+ raise Exception, "Undefined address. ID: #{chemblId}, arg: #{arg}, params: #{params.inspect}"
53
+ end
54
+ end
55
+ # BioChEMBL::REST::ChEMBL_URI.compounds_stdinchikey()
56
+ def self.compounds_stdinchikey(stdinchikey)
57
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/stdinchikey/QFFGVLORLPOAEC-SNVBAGLBSA-N
58
+ address("compounds/stdinchikey/#{stdinchikey}")
59
+ end
60
+ # BioChEMBL::REST::ChEMBL_URI.compounds_smiles()
61
+ def self.compounds_smiles(smiles)
62
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/smiles/COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56
63
+ address("compounds/smiles/#{smiles}")
64
+ end
65
+ # BioChEMBL::REST::ChEMBL_URI.compounds_substructure()
66
+ def self.compounds_substructure(smiles)
67
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/substructure/COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56
68
+ address("compounds/substructure/#{smiles}")
69
+ end
70
+ # BioChEMBL::REST::ChEMBL_URI.compounds_similarity()
71
+ def self.compounds_similarity(smiles)
72
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/compounds/similarity/COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56/70
73
+ address("compounds/similarity/#{smiles}")
74
+ end
75
+
76
+ # BioChEMBL::REST::ChEMBL_URI.targets()
77
+ def self.targets(chemblId = nil, arg = nil)
78
+ if chemblId and arg == nil
79
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/CHEMBL2477
80
+ address("targets/#{chemblId}")
81
+ elsif chemblId and arg == 'bioactivities'
82
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/CHEMBL240/bioactivities
83
+ address("targets/#{chemblId}/bioactivities")
84
+ elsif chemblId == nil and arg == nil
85
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets
86
+ address("targets")
87
+ else
88
+ raise Exception, "Undefined."
89
+ end
90
+ end
91
+ # BioChEMBL::REST::ChEMBL_URI.targets_uniprot()
92
+ def self.targets_uniprot(uniprot_id)
93
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/uniprot/Q13936
94
+ address("targets/uniprot/#{uniprot_id}")
95
+ end
96
+ # BioChEMBL::REST::ChEMBL_URI.targets_refseq()
97
+ def self.targets_refseq(refseq_id)
98
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/targets/refseq/NP_001128722
99
+ address("targets/refseq/#{refseq_id}")
100
+ end
101
+
102
+ # BioChEMBL::REST::ChEMBL_URI.assays()
103
+ def self.assays(chemblId, arg = nil)
104
+ if chemblId and arg == nil
105
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/assays/CHEMBL1217643
106
+ address("assays/#{chemblId}")
107
+ elsif chemblId and arg == 'bioactivities'
108
+ # Example URL (XML Output): http://www.ebi.ac.uk/chemblws/assays/CHEMBL1217643/bioactivities
109
+ address("assays/#{chemblId}/bioactivities")
110
+ else
111
+ raise Exception, "Undefined. ChEMBL ID: #{chemblId}, arg: #{arg}"
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # BioChEMBL::REST.website
118
+ def self.website
119
+ "https://www.ebi.ac.uk/chembldb/index.php/ws"
120
+ end
121
+
122
+ # BioChEMBL::REST.usage
123
+ def self.usage
124
+ ["a = Bio::DB::ChEMBL::REST.new",
125
+ "a.status => https://www.ebi.ac.uk/chembldb/index.php/ws#serviceStatus",
126
+ 'a.compounds("CHEMBL1") => http://www.ebi.ac.uk/chemblws/compounds/CHEMBL1',
127
+ 'a.compounds.stdinchikey("QFFGVLORLPOAEC-SNVBAGLBSA-N") => http://www.ebi.ac.uk/chemblws/compounds/stdinchikey/QFFGVLORLPOAEC-SNVBAGLBSA-N'
128
+ ].join("\n")
129
+ end
130
+
131
+ # BioChEMBL::REST.up? #=> true/false
132
+ def self.up?
133
+ if new.status == "UP"
134
+ true
135
+ else
136
+ false
137
+ end
138
+ end
139
+
140
+
141
+ # new
142
+ def initialize(uri = BASE_URI)
143
+ uri = URI.parse(uri) unless uri.kind_of?(URI)
144
+ @header = {
145
+ 'User-Agent' => "BioChEMBL, BioRuby/#{Bio::BIORUBY_VERSION_ID}"
146
+ }
147
+ @debug = false
148
+ end
149
+
150
+ # If true, shows debug information to $stderr.
151
+ attr_accessor :debug
152
+
153
+ # get HTTP GET URL
154
+ def get(url)
155
+ easy = Curl::Easy.new(url) do |c|
156
+ @header.each do |k,v|
157
+ c.headers[k] = v
158
+ end
159
+ end
160
+ easy.perform
161
+ easy
162
+ end
163
+
164
+ #
165
+ def prepare_return_value(response)
166
+ if @debug then
167
+ $stderr.puts "ChEMBL: #{response.inspect}"
168
+ end
169
+ case response.response_code
170
+ when 200
171
+ response.body_str
172
+ when 400
173
+ raise Exception, "400 Bad request #{response.inspect}"
174
+ when 404
175
+ raise Exception, "404 Not found #{response.inspect}"
176
+ when 500
177
+ raise Exception, "500 Service unavailable"
178
+ else
179
+ nil
180
+ end
181
+ end
182
+
183
+ # uri
184
+ def uri
185
+ ChEMBL_URI
186
+ end
187
+
188
+ # address
189
+ def address(path)
190
+ "#{BASE_URI}/#{path}"
191
+ end
192
+
193
+
194
+
195
+ # API methods
196
+
197
+ def get_body(method, args = [])
198
+ code = case args.size
199
+ when 0
200
+ "get(uri.#{method})"
201
+ when 1
202
+ "get(uri.#{method}(#{args[0].inspect}))"
203
+ when 2
204
+ "get(uri.#{method}(#{args[0].inspect}, #{args[1].inspect}))"
205
+ when 3
206
+ "get(uri.#{method}(#{args[0].inspect}, #{args[1].inspect}, #{args[2].inspect}))"
207
+ else
208
+ raise Exception, "method=#{method}, args=#{args.inspect}"
209
+ end
210
+
211
+ response = eval code
212
+ prepare_return_value(response)
213
+ end
214
+ private :get_body
215
+
216
+ #
217
+ def current_method_name
218
+ caller(1).first.scan(/`(.*)'/)[0][0].to_s
219
+ end
220
+ private :current_method_name
221
+
222
+
223
+ def status
224
+ get_body(current_method_name)
225
+ end
226
+
227
+ def compounds(chemblId = nil, action = nil, params = nil)
228
+ get_body(current_method_name, [chemblId, action, params])
229
+ end
230
+
231
+ def compounds_stdinchikey(stdinchikey)
232
+ get_body(current_method_name, [stdinchikey])
233
+ end
234
+
235
+ def compounds_smiles(smiles)
236
+ get_body(current_method_name, [smiles])
237
+ end
238
+
239
+ def compounds_substructure(smiles)
240
+ get_body(current_method_name, [smiles])
241
+ end
242
+
243
+ def compounds_similarity(smiles)
244
+ get_body(current_method_name, [smiles])
245
+ end
246
+
247
+ def targets(chemblId = nil, action = nil)
248
+ get_body(current_method_name, [chemblId, action])
249
+ end
250
+
251
+ def targets_uniprot(uniprot_id)
252
+ get_body(current_method_name, [uniprot_id])
253
+ end
254
+
255
+ def targets_refseq(refseq_id)
256
+ get_body(current_method_name, [refseq_id])
257
+ end
258
+
259
+ def assays(chemblId = nil, action = nil)
260
+ get_body(current_method_name, [chemblId, action])
261
+ end
262
+ end
263
+
264
+ end