pubchem_api 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/pubchem_api/version.rb +3 -0
- data/lib/pubchem_api.rb +357 -0
- metadata +73 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7059d71502bdb3c98af07f3db60a6abc7454d58cdaf491adbb9bf1de2d15b3e6
|
4
|
+
data.tar.gz: 2048c868d69f83eb7b08237bf3470a3cc72b2de3ad0dff639b7cc687b476616d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b127bc9ca837c5606159fb17013ad10f192be96dce0ab7386c23e3b58dc4e4097bcbb77337b9e48cb9a875a3d74c211abc5bcd4232afab24787e3f7d5ca08416
|
7
|
+
data.tar.gz: 980b8f88f0bb225e6d5198bf411351ec5bf6a3784fcf8e5784ce4a6fe1fdedec2210959e775f04dec03bfcd710b8a989c7828c71f9c1b942bb17700e77fc8763
|
data/lib/pubchem_api.rb
ADDED
@@ -0,0 +1,357 @@
|
|
1
|
+
require 'httparty'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module PubChemAPI
|
5
|
+
# Custom exception class for API errors
|
6
|
+
class APIError < StandardError
|
7
|
+
attr_reader :code
|
8
|
+
|
9
|
+
def initialize(message, code = nil)
|
10
|
+
super(message)
|
11
|
+
@code = code
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# Base class for API responses
|
16
|
+
class APIResponse
|
17
|
+
def initialize(data)
|
18
|
+
@data = data
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class CompoundRecord < APIResponse
|
23
|
+
attr_reader :cid, :molecular_formula, :molecular_weight, :canonical_smiles, :inchi_key
|
24
|
+
|
25
|
+
def initialize(data)
|
26
|
+
super(data)
|
27
|
+
compound = data['PC_Compounds'].first
|
28
|
+
@cid = compound['id']['id']['cid']
|
29
|
+
|
30
|
+
# Extract properties from the 'props' array
|
31
|
+
props = compound['props']
|
32
|
+
@molecular_formula = extract_prop(props, 'Molecular Formula')
|
33
|
+
@molecular_weight = extract_prop(props, 'Molecular Weight').to_f
|
34
|
+
@canonical_smiles = extract_prop(props, 'SMILES', 'Canonical')
|
35
|
+
@inchi_key = extract_prop(props, 'InChIKey', 'Standard')
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def extract_prop(props, label, name = nil)
|
41
|
+
prop = props.find do |p|
|
42
|
+
p['urn']['label'] == label && (name.nil? || p['urn']['name'] == name)
|
43
|
+
end
|
44
|
+
prop ? prop['value'].values.first : nil
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class SubstanceRecord < APIResponse
|
49
|
+
attr_reader :sid, :synonyms, :source_name
|
50
|
+
|
51
|
+
def initialize(data)
|
52
|
+
super(data)
|
53
|
+
substance = data['PC_Substances'].first
|
54
|
+
@sid = substance['sid']['id']
|
55
|
+
@synonyms = substance['synonyms']
|
56
|
+
@source_name = substance['source']['db']['name']
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class AssayRecord < APIResponse
|
61
|
+
attr_reader :aid, :name, :description
|
62
|
+
|
63
|
+
def initialize(data)
|
64
|
+
super(data)
|
65
|
+
assay_container = data['PC_AssayContainer'].first
|
66
|
+
assay = assay_container['assay']
|
67
|
+
descr = assay['descr']
|
68
|
+
@aid = descr['aid']['id']
|
69
|
+
@name = descr['name']
|
70
|
+
@description = descr['description']
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class GeneSummary < APIResponse
|
75
|
+
attr_reader :gene_id, :symbol, :name, :taxonomy_id, :description
|
76
|
+
|
77
|
+
def initialize(data)
|
78
|
+
super(data)
|
79
|
+
gene = data['InformationList']['Information'].first
|
80
|
+
@gene_id = gene['GeneID']
|
81
|
+
@symbol = gene['Symbol']
|
82
|
+
@name = gene['Name']
|
83
|
+
@taxonomy_id = gene['TaxID']
|
84
|
+
@description = gene['Description']
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class TaxonomySummary < APIResponse
|
89
|
+
attr_reader :taxonomy_id, :scientific_name, :common_name, :rank, :synonyms
|
90
|
+
|
91
|
+
def initialize(data)
|
92
|
+
super(data)
|
93
|
+
taxon = data['TaxaInfo'].first
|
94
|
+
@taxonomy_id = taxon['TaxId']
|
95
|
+
@scientific_name = taxon['ScientificName']
|
96
|
+
@common_name = taxon['OtherNames']['CommonName']
|
97
|
+
@rank = taxon['Rank']
|
98
|
+
@synonyms = taxon['OtherNames']['Synonym']
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class PathwaySummary < APIResponse
|
103
|
+
attr_reader :pathway_accession, :source_name, :name, :category, :description, :taxonomy_id
|
104
|
+
|
105
|
+
def initialize(data)
|
106
|
+
super(data)
|
107
|
+
pathway = data['Pathway'].first
|
108
|
+
@pathway_accession = pathway['PathwayId']
|
109
|
+
@source_name = pathway['SourceName']
|
110
|
+
@name = pathway['Name']
|
111
|
+
@category = pathway['Category']
|
112
|
+
@description = pathway['Description']
|
113
|
+
@taxonomy_id = pathway['TaxId']
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class ProteinSummary < APIResponse
|
118
|
+
attr_reader :accession, :name, :taxonomy_id, :synonyms
|
119
|
+
|
120
|
+
def initialize(data)
|
121
|
+
super(data)
|
122
|
+
protein = data['InformationList']['Information'].first
|
123
|
+
@accession = protein['Accession']
|
124
|
+
@name = protein['Title']
|
125
|
+
@taxonomy_id = protein['TaxId']
|
126
|
+
@synonyms = protein['Synonym']
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
class Client
|
131
|
+
include HTTParty
|
132
|
+
base_uri 'https://pubchem.ncbi.nlm.nih.gov/rest/pug'
|
133
|
+
|
134
|
+
# Constants for allowed values
|
135
|
+
COMPOUND_OPERATIONS = %w[record property synonyms sids aids classification description conformers]
|
136
|
+
SUBSTANCE_OPERATIONS = %w[record synonyms cids aids classification description]
|
137
|
+
ASSAY_OPERATIONS = %w[record concise description summary doseresponse targets]
|
138
|
+
OUTPUT_FORMATS = %w[XML JSON JSONP ASNT ASNB SDF CSV TXT PNG]
|
139
|
+
OUTPUT_FORMATS_SIMPLE = %w[XML JSON]
|
140
|
+
SEARCH_TYPES = %w[fastsubstructure fastsuperstructure fastsimilarity_2d fastidentity fastformula]
|
141
|
+
NAMESPACES = %w[smiles inchi sdf cid]
|
142
|
+
ID_TYPES = %w[cid sid aid patent geneid protein taxonomyid pathwayid cellid]
|
143
|
+
TARGET_TYPES = %w[ProteinGI ProteinName GeneID GeneSymbol]
|
144
|
+
DEFAULT_OUTPUT = 'JSON'
|
145
|
+
|
146
|
+
# Retrieve compound data by CID
|
147
|
+
def get_compound_by_cid(cid, operation, output = DEFAULT_OUTPUT, options = {})
|
148
|
+
validate_operation(operation, COMPOUND_OPERATIONS)
|
149
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
150
|
+
path = "/compound/cid/#{cid}/#{operation}/#{output}"
|
151
|
+
response = self.class.get(path, query: options)
|
152
|
+
parse_response(response, output, 'CompoundRecord')
|
153
|
+
end
|
154
|
+
|
155
|
+
# Retrieve compound data by name
|
156
|
+
def get_compound_by_name(name, operation, output = DEFAULT_OUTPUT, options = {})
|
157
|
+
validate_operation(operation, COMPOUND_OPERATIONS)
|
158
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
159
|
+
name_encoded = CGI.escape(name)
|
160
|
+
path = "/compound/name/#{name_encoded}/#{operation}/#{output}"
|
161
|
+
response = self.class.get(path, query: options)
|
162
|
+
parse_response(response, output, 'CompoundRecord')
|
163
|
+
end
|
164
|
+
|
165
|
+
# Retrieve substance data by SID
|
166
|
+
def get_substance_by_sid(sid, operation, output = DEFAULT_OUTPUT, options = {})
|
167
|
+
validate_operation(operation, SUBSTANCE_OPERATIONS)
|
168
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
169
|
+
path = "/substance/sid/#{sid}/#{operation}/#{output}"
|
170
|
+
response = self.class.get(path, query: options)
|
171
|
+
parse_response(response, output, 'SubstanceRecord')
|
172
|
+
end
|
173
|
+
|
174
|
+
# Retrieve assay data by AID
|
175
|
+
def get_assay_by_aid(aid, operation, output = DEFAULT_OUTPUT, options = {})
|
176
|
+
validate_operation(operation, ASSAY_OPERATIONS)
|
177
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
178
|
+
path = "/assay/aid/#{aid}/#{operation}/#{output}"
|
179
|
+
response = self.class.get(path, query: options)
|
180
|
+
parse_response(response, output, 'AssayRecord')
|
181
|
+
end
|
182
|
+
|
183
|
+
# Retrieve gene summary by GeneID
|
184
|
+
def get_gene_summary_by_geneid(geneid, output = DEFAULT_OUTPUT, options = {})
|
185
|
+
validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
|
186
|
+
path = "/gene/geneid/#{geneid}/summary/#{output}"
|
187
|
+
response = self.class.get(path, query: options)
|
188
|
+
parse_response(response, output, 'GeneSummary')
|
189
|
+
end
|
190
|
+
|
191
|
+
# Retrieve taxonomy summary by TaxonomyID
|
192
|
+
def get_taxonomy_summary_by_taxid(taxid, output = DEFAULT_OUTPUT, options = {})
|
193
|
+
validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
|
194
|
+
path = "/taxonomy/taxid/#{taxid}/summary/#{output}"
|
195
|
+
response = self.class.get(path, query: options)
|
196
|
+
parse_response(response, output, 'TaxonomySummary')
|
197
|
+
end
|
198
|
+
|
199
|
+
# Retrieve pathway summary by pathway accession
|
200
|
+
def get_pathway_summary_by_pwacc(pwacc, output = DEFAULT_OUTPUT, options = {})
|
201
|
+
validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
|
202
|
+
pwacc_encoded = CGI.escape(pwacc)
|
203
|
+
path = "/pathway/pwacc/#{pwacc_encoded}/summary/#{output}"
|
204
|
+
response = self.class.get(path, query: options)
|
205
|
+
parse_response(response, output, 'PathwaySummary')
|
206
|
+
end
|
207
|
+
|
208
|
+
# Retrieve assay dose-response data
|
209
|
+
def get_assay_doseresponse(aid, output = DEFAULT_OUTPUT, options = {})
|
210
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
211
|
+
path = "/assay/aid/#{aid}/doseresponse/#{output}"
|
212
|
+
response = self.class.get(path, query: options)
|
213
|
+
parse_response(response, output)
|
214
|
+
end
|
215
|
+
|
216
|
+
# Retrieve assay targets by target type
|
217
|
+
def get_assay_targets(aid, target_type, output = DEFAULT_OUTPUT, options = {})
|
218
|
+
validate_value(target_type, TARGET_TYPES, 'target type')
|
219
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
220
|
+
path = "/assay/aid/#{aid}/targets/#{target_type}/#{output}"
|
221
|
+
response = self.class.get(path, query: options)
|
222
|
+
parse_response(response, output)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Retrieve gene summary by synonym
|
226
|
+
def get_gene_summary_by_synonym(synonym, output = DEFAULT_OUTPUT, options = {})
|
227
|
+
validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
|
228
|
+
synonym_encoded = CGI.escape(synonym)
|
229
|
+
path = "/gene/synonym/#{synonym_encoded}/summary/#{output}"
|
230
|
+
response = self.class.get(path, query: options)
|
231
|
+
parse_response(response, output, 'GeneSummary')
|
232
|
+
end
|
233
|
+
|
234
|
+
# Retrieve protein summary by synonym
|
235
|
+
def get_protein_summary_by_synonym(synonym, output = DEFAULT_OUTPUT, options = {})
|
236
|
+
validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
|
237
|
+
synonym_encoded = CGI.escape(synonym)
|
238
|
+
path = "/protein/synonym/#{synonym_encoded}/summary/#{output}"
|
239
|
+
response = self.class.get(path, query: options)
|
240
|
+
parse_response(response, output, 'ProteinSummary')
|
241
|
+
end
|
242
|
+
|
243
|
+
# Retrieve compound conformers by CID
|
244
|
+
def get_compound_conformers(cid, output = DEFAULT_OUTPUT, options = {})
|
245
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
246
|
+
path = "/compound/cid/#{cid}/conformers/#{output}"
|
247
|
+
response = self.class.get(path, query: options)
|
248
|
+
parse_response(response, output)
|
249
|
+
end
|
250
|
+
|
251
|
+
# Search within a previous result using cache key
|
252
|
+
def compound_fastsubstructure_search(smiles, cachekey, output = DEFAULT_OUTPUT, options = {})
|
253
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
254
|
+
smiles_encoded = CGI.escape(smiles)
|
255
|
+
path = "/compound/fastsubstructure/smiles/#{smiles_encoded}/cids/#{output}"
|
256
|
+
options = options.merge('cachekey' => cachekey)
|
257
|
+
response = self.class.get(path, query: options)
|
258
|
+
parse_response(response, output)
|
259
|
+
end
|
260
|
+
|
261
|
+
# Retrieve classification nodes as cache key
|
262
|
+
def get_classification_nodes(hnid, idtype, list_return, output = DEFAULT_OUTPUT, options = {})
|
263
|
+
validate_value(idtype, ID_TYPES, 'ID type')
|
264
|
+
validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
|
265
|
+
path = "/classification/hnid/#{hnid}/#{idtype}/#{output}"
|
266
|
+
options = options.merge('list_return' => list_return)
|
267
|
+
response = self.class.get(path, query: options)
|
268
|
+
parse_response(response, output)
|
269
|
+
end
|
270
|
+
|
271
|
+
# Retrieve compounds by listkey with pagination
|
272
|
+
def get_compounds_by_listkey(listkey, output = DEFAULT_OUTPUT, options = {})
|
273
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
274
|
+
path = "/compound/listkey/#{listkey}/cids/#{output}"
|
275
|
+
response = self.class.get(path, query: options)
|
276
|
+
parse_response(response, output)
|
277
|
+
end
|
278
|
+
|
279
|
+
# Compound structure search operations
|
280
|
+
def compound_structure_search(search_type, namespace, identifier, output = DEFAULT_OUTPUT, options = {})
|
281
|
+
validate_value(search_type, SEARCH_TYPES, 'search type')
|
282
|
+
validate_value(namespace, NAMESPACES, 'namespace')
|
283
|
+
validate_output_format(output, OUTPUT_FORMATS)
|
284
|
+
identifier_encoded = CGI.escape(identifier)
|
285
|
+
path = "/compound/#{search_type}/#{namespace}/#{identifier_encoded}/cids/#{output}"
|
286
|
+
response = self.class.get(path, query: options)
|
287
|
+
parse_response(response, output)
|
288
|
+
end
|
289
|
+
|
290
|
+
private
|
291
|
+
|
292
|
+
# Validate operation
|
293
|
+
def validate_operation(operation, allowed_operations)
|
294
|
+
unless allowed_operations.include?(operation)
|
295
|
+
raise ArgumentError, "Invalid operation: #{operation}. Allowed operations: #{allowed_operations.join(', ')}"
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# Validate output format
|
300
|
+
def validate_output_format(output, allowed_formats)
|
301
|
+
unless allowed_formats.include?(output)
|
302
|
+
raise ArgumentError, "Invalid output format: #{output}. Allowed formats: #{allowed_formats.join(', ')}"
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
# Validate value against allowed values
|
307
|
+
def validate_value(value, allowed_values, name)
|
308
|
+
unless allowed_values.include?(value)
|
309
|
+
raise ArgumentError, "Invalid #{name}: #{value}. Allowed values: #{allowed_values.join(', ')}"
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# Parse API response and map to Ruby objects
|
314
|
+
def parse_response(response, output_format, schema_class = nil)
|
315
|
+
if response.success?
|
316
|
+
if output_format == 'JSON'
|
317
|
+
data = response.parsed_response
|
318
|
+
if schema_class
|
319
|
+
klass = PubChemAPI.const_get(schema_class)
|
320
|
+
klass.new(data)
|
321
|
+
else
|
322
|
+
data
|
323
|
+
end
|
324
|
+
elsif output_format == 'XML'
|
325
|
+
doc = Nokogiri::XML(response.body)
|
326
|
+
doc.remove_namespaces!
|
327
|
+
if schema_class
|
328
|
+
parse_xml_to_object(doc, schema_class)
|
329
|
+
else
|
330
|
+
doc
|
331
|
+
end
|
332
|
+
else
|
333
|
+
response.body
|
334
|
+
end
|
335
|
+
else
|
336
|
+
handle_error_response(response)
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# Handle error responses
|
341
|
+
def handle_error_response(response)
|
342
|
+
if response.headers['Content-Type'] && response.headers['Content-Type'].include?('application/json')
|
343
|
+
error_info = response.parsed_response
|
344
|
+
message = error_info['Fault']['Message'] rescue 'Unknown error'
|
345
|
+
raise APIError.new(message, response.code)
|
346
|
+
else
|
347
|
+
raise APIError.new("HTTP Error #{response.code}: #{response.message}", response.code)
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
# Parse XML to Ruby object
|
352
|
+
def parse_xml_to_object(doc, schema_class)
|
353
|
+
# TODO: For now, just return the raw Nokogiri document
|
354
|
+
doc
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pubchem_api
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- coderobe
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-11-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: httparty
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.22.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.22.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.16.7
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.16.7
|
41
|
+
description: PubChem PUG REST api wrapper module
|
42
|
+
email:
|
43
|
+
- git@coderobe.net
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- lib/pubchem_api.rb
|
49
|
+
- lib/pubchem_api/version.rb
|
50
|
+
homepage: https://github.com/coderobe/ruby-pubchem_api
|
51
|
+
licenses:
|
52
|
+
- MIT
|
53
|
+
metadata: {}
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
requirements: []
|
69
|
+
rubygems_version: 3.5.4
|
70
|
+
signing_key:
|
71
|
+
specification_version: 4
|
72
|
+
summary: PubChem PUG REST api wrapper
|
73
|
+
test_files: []
|