pubchem_api 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7059d71502bdb3c98af07f3db60a6abc7454d58cdaf491adbb9bf1de2d15b3e6
4
+ data.tar.gz: 2048c868d69f83eb7b08237bf3470a3cc72b2de3ad0dff639b7cc687b476616d
5
+ SHA512:
6
+ metadata.gz: b127bc9ca837c5606159fb17013ad10f192be96dce0ab7386c23e3b58dc4e4097bcbb77337b9e48cb9a875a3d74c211abc5bcd4232afab24787e3f7d5ca08416
7
+ data.tar.gz: 980b8f88f0bb225e6d5198bf411351ec5bf6a3784fcf8e5784ce4a6fe1fdedec2210959e775f04dec03bfcd710b8a989c7828c71f9c1b942bb17700e77fc8763
@@ -0,0 +1,3 @@
1
+ module PubChemAPI
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,357 @@
1
+ require 'httparty'
2
+ require 'nokogiri'
3
+
4
+ module PubChemAPI
5
+ # Custom exception class for API errors
6
+ class APIError < StandardError
7
+ attr_reader :code
8
+
9
+ def initialize(message, code = nil)
10
+ super(message)
11
+ @code = code
12
+ end
13
+ end
14
+
15
+ # Base class for API responses
16
+ class APIResponse
17
+ def initialize(data)
18
+ @data = data
19
+ end
20
+ end
21
+
22
+ class CompoundRecord < APIResponse
23
+ attr_reader :cid, :molecular_formula, :molecular_weight, :canonical_smiles, :inchi_key
24
+
25
+ def initialize(data)
26
+ super(data)
27
+ compound = data['PC_Compounds'].first
28
+ @cid = compound['id']['id']['cid']
29
+
30
+ # Extract properties from the 'props' array
31
+ props = compound['props']
32
+ @molecular_formula = extract_prop(props, 'Molecular Formula')
33
+ @molecular_weight = extract_prop(props, 'Molecular Weight').to_f
34
+ @canonical_smiles = extract_prop(props, 'SMILES', 'Canonical')
35
+ @inchi_key = extract_prop(props, 'InChIKey', 'Standard')
36
+ end
37
+
38
+ private
39
+
40
+ def extract_prop(props, label, name = nil)
41
+ prop = props.find do |p|
42
+ p['urn']['label'] == label && (name.nil? || p['urn']['name'] == name)
43
+ end
44
+ prop ? prop['value'].values.first : nil
45
+ end
46
+ end
47
+
48
+ class SubstanceRecord < APIResponse
49
+ attr_reader :sid, :synonyms, :source_name
50
+
51
+ def initialize(data)
52
+ super(data)
53
+ substance = data['PC_Substances'].first
54
+ @sid = substance['sid']['id']
55
+ @synonyms = substance['synonyms']
56
+ @source_name = substance['source']['db']['name']
57
+ end
58
+ end
59
+
60
+ class AssayRecord < APIResponse
61
+ attr_reader :aid, :name, :description
62
+
63
+ def initialize(data)
64
+ super(data)
65
+ assay_container = data['PC_AssayContainer'].first
66
+ assay = assay_container['assay']
67
+ descr = assay['descr']
68
+ @aid = descr['aid']['id']
69
+ @name = descr['name']
70
+ @description = descr['description']
71
+ end
72
+ end
73
+
74
+ class GeneSummary < APIResponse
75
+ attr_reader :gene_id, :symbol, :name, :taxonomy_id, :description
76
+
77
+ def initialize(data)
78
+ super(data)
79
+ gene = data['InformationList']['Information'].first
80
+ @gene_id = gene['GeneID']
81
+ @symbol = gene['Symbol']
82
+ @name = gene['Name']
83
+ @taxonomy_id = gene['TaxID']
84
+ @description = gene['Description']
85
+ end
86
+ end
87
+
88
+ class TaxonomySummary < APIResponse
89
+ attr_reader :taxonomy_id, :scientific_name, :common_name, :rank, :synonyms
90
+
91
+ def initialize(data)
92
+ super(data)
93
+ taxon = data['TaxaInfo'].first
94
+ @taxonomy_id = taxon['TaxId']
95
+ @scientific_name = taxon['ScientificName']
96
+ @common_name = taxon['OtherNames']['CommonName']
97
+ @rank = taxon['Rank']
98
+ @synonyms = taxon['OtherNames']['Synonym']
99
+ end
100
+ end
101
+
102
+ class PathwaySummary < APIResponse
103
+ attr_reader :pathway_accession, :source_name, :name, :category, :description, :taxonomy_id
104
+
105
+ def initialize(data)
106
+ super(data)
107
+ pathway = data['Pathway'].first
108
+ @pathway_accession = pathway['PathwayId']
109
+ @source_name = pathway['SourceName']
110
+ @name = pathway['Name']
111
+ @category = pathway['Category']
112
+ @description = pathway['Description']
113
+ @taxonomy_id = pathway['TaxId']
114
+ end
115
+ end
116
+
117
+ class ProteinSummary < APIResponse
118
+ attr_reader :accession, :name, :taxonomy_id, :synonyms
119
+
120
+ def initialize(data)
121
+ super(data)
122
+ protein = data['InformationList']['Information'].first
123
+ @accession = protein['Accession']
124
+ @name = protein['Title']
125
+ @taxonomy_id = protein['TaxId']
126
+ @synonyms = protein['Synonym']
127
+ end
128
+ end
129
+
130
+ class Client
131
+ include HTTParty
132
+ base_uri 'https://pubchem.ncbi.nlm.nih.gov/rest/pug'
133
+
134
+ # Constants for allowed values
135
+ COMPOUND_OPERATIONS = %w[record property synonyms sids aids classification description conformers]
136
+ SUBSTANCE_OPERATIONS = %w[record synonyms cids aids classification description]
137
+ ASSAY_OPERATIONS = %w[record concise description summary doseresponse targets]
138
+ OUTPUT_FORMATS = %w[XML JSON JSONP ASNT ASNB SDF CSV TXT PNG]
139
+ OUTPUT_FORMATS_SIMPLE = %w[XML JSON]
140
+ SEARCH_TYPES = %w[fastsubstructure fastsuperstructure fastsimilarity_2d fastidentity fastformula]
141
+ NAMESPACES = %w[smiles inchi sdf cid]
142
+ ID_TYPES = %w[cid sid aid patent geneid protein taxonomyid pathwayid cellid]
143
+ TARGET_TYPES = %w[ProteinGI ProteinName GeneID GeneSymbol]
144
+ DEFAULT_OUTPUT = 'JSON'
145
+
146
+ # Retrieve compound data by CID
147
+ def get_compound_by_cid(cid, operation, output = DEFAULT_OUTPUT, options = {})
148
+ validate_operation(operation, COMPOUND_OPERATIONS)
149
+ validate_output_format(output, OUTPUT_FORMATS)
150
+ path = "/compound/cid/#{cid}/#{operation}/#{output}"
151
+ response = self.class.get(path, query: options)
152
+ parse_response(response, output, 'CompoundRecord')
153
+ end
154
+
155
+ # Retrieve compound data by name
156
+ def get_compound_by_name(name, operation, output = DEFAULT_OUTPUT, options = {})
157
+ validate_operation(operation, COMPOUND_OPERATIONS)
158
+ validate_output_format(output, OUTPUT_FORMATS)
159
+ name_encoded = CGI.escape(name)
160
+ path = "/compound/name/#{name_encoded}/#{operation}/#{output}"
161
+ response = self.class.get(path, query: options)
162
+ parse_response(response, output, 'CompoundRecord')
163
+ end
164
+
165
+ # Retrieve substance data by SID
166
+ def get_substance_by_sid(sid, operation, output = DEFAULT_OUTPUT, options = {})
167
+ validate_operation(operation, SUBSTANCE_OPERATIONS)
168
+ validate_output_format(output, OUTPUT_FORMATS)
169
+ path = "/substance/sid/#{sid}/#{operation}/#{output}"
170
+ response = self.class.get(path, query: options)
171
+ parse_response(response, output, 'SubstanceRecord')
172
+ end
173
+
174
+ # Retrieve assay data by AID
175
+ def get_assay_by_aid(aid, operation, output = DEFAULT_OUTPUT, options = {})
176
+ validate_operation(operation, ASSAY_OPERATIONS)
177
+ validate_output_format(output, OUTPUT_FORMATS)
178
+ path = "/assay/aid/#{aid}/#{operation}/#{output}"
179
+ response = self.class.get(path, query: options)
180
+ parse_response(response, output, 'AssayRecord')
181
+ end
182
+
183
+ # Retrieve gene summary by GeneID
184
+ def get_gene_summary_by_geneid(geneid, output = DEFAULT_OUTPUT, options = {})
185
+ validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
186
+ path = "/gene/geneid/#{geneid}/summary/#{output}"
187
+ response = self.class.get(path, query: options)
188
+ parse_response(response, output, 'GeneSummary')
189
+ end
190
+
191
+ # Retrieve taxonomy summary by TaxonomyID
192
+ def get_taxonomy_summary_by_taxid(taxid, output = DEFAULT_OUTPUT, options = {})
193
+ validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
194
+ path = "/taxonomy/taxid/#{taxid}/summary/#{output}"
195
+ response = self.class.get(path, query: options)
196
+ parse_response(response, output, 'TaxonomySummary')
197
+ end
198
+
199
+ # Retrieve pathway summary by pathway accession
200
+ def get_pathway_summary_by_pwacc(pwacc, output = DEFAULT_OUTPUT, options = {})
201
+ validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
202
+ pwacc_encoded = CGI.escape(pwacc)
203
+ path = "/pathway/pwacc/#{pwacc_encoded}/summary/#{output}"
204
+ response = self.class.get(path, query: options)
205
+ parse_response(response, output, 'PathwaySummary')
206
+ end
207
+
208
+ # Retrieve assay dose-response data
209
+ def get_assay_doseresponse(aid, output = DEFAULT_OUTPUT, options = {})
210
+ validate_output_format(output, OUTPUT_FORMATS)
211
+ path = "/assay/aid/#{aid}/doseresponse/#{output}"
212
+ response = self.class.get(path, query: options)
213
+ parse_response(response, output)
214
+ end
215
+
216
+ # Retrieve assay targets by target type
217
+ def get_assay_targets(aid, target_type, output = DEFAULT_OUTPUT, options = {})
218
+ validate_value(target_type, TARGET_TYPES, 'target type')
219
+ validate_output_format(output, OUTPUT_FORMATS)
220
+ path = "/assay/aid/#{aid}/targets/#{target_type}/#{output}"
221
+ response = self.class.get(path, query: options)
222
+ parse_response(response, output)
223
+ end
224
+
225
+ # Retrieve gene summary by synonym
226
+ def get_gene_summary_by_synonym(synonym, output = DEFAULT_OUTPUT, options = {})
227
+ validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
228
+ synonym_encoded = CGI.escape(synonym)
229
+ path = "/gene/synonym/#{synonym_encoded}/summary/#{output}"
230
+ response = self.class.get(path, query: options)
231
+ parse_response(response, output, 'GeneSummary')
232
+ end
233
+
234
+ # Retrieve protein summary by synonym
235
+ def get_protein_summary_by_synonym(synonym, output = DEFAULT_OUTPUT, options = {})
236
+ validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
237
+ synonym_encoded = CGI.escape(synonym)
238
+ path = "/protein/synonym/#{synonym_encoded}/summary/#{output}"
239
+ response = self.class.get(path, query: options)
240
+ parse_response(response, output, 'ProteinSummary')
241
+ end
242
+
243
+ # Retrieve compound conformers by CID
244
+ def get_compound_conformers(cid, output = DEFAULT_OUTPUT, options = {})
245
+ validate_output_format(output, OUTPUT_FORMATS)
246
+ path = "/compound/cid/#{cid}/conformers/#{output}"
247
+ response = self.class.get(path, query: options)
248
+ parse_response(response, output)
249
+ end
250
+
251
+ # Search within a previous result using cache key
252
+ def compound_fastsubstructure_search(smiles, cachekey, output = DEFAULT_OUTPUT, options = {})
253
+ validate_output_format(output, OUTPUT_FORMATS)
254
+ smiles_encoded = CGI.escape(smiles)
255
+ path = "/compound/fastsubstructure/smiles/#{smiles_encoded}/cids/#{output}"
256
+ options = options.merge('cachekey' => cachekey)
257
+ response = self.class.get(path, query: options)
258
+ parse_response(response, output)
259
+ end
260
+
261
+ # Retrieve classification nodes as cache key
262
+ def get_classification_nodes(hnid, idtype, list_return, output = DEFAULT_OUTPUT, options = {})
263
+ validate_value(idtype, ID_TYPES, 'ID type')
264
+ validate_output_format(output, OUTPUT_FORMATS_SIMPLE)
265
+ path = "/classification/hnid/#{hnid}/#{idtype}/#{output}"
266
+ options = options.merge('list_return' => list_return)
267
+ response = self.class.get(path, query: options)
268
+ parse_response(response, output)
269
+ end
270
+
271
+ # Retrieve compounds by listkey with pagination
272
+ def get_compounds_by_listkey(listkey, output = DEFAULT_OUTPUT, options = {})
273
+ validate_output_format(output, OUTPUT_FORMATS)
274
+ path = "/compound/listkey/#{listkey}/cids/#{output}"
275
+ response = self.class.get(path, query: options)
276
+ parse_response(response, output)
277
+ end
278
+
279
+ # Compound structure search operations
280
+ def compound_structure_search(search_type, namespace, identifier, output = DEFAULT_OUTPUT, options = {})
281
+ validate_value(search_type, SEARCH_TYPES, 'search type')
282
+ validate_value(namespace, NAMESPACES, 'namespace')
283
+ validate_output_format(output, OUTPUT_FORMATS)
284
+ identifier_encoded = CGI.escape(identifier)
285
+ path = "/compound/#{search_type}/#{namespace}/#{identifier_encoded}/cids/#{output}"
286
+ response = self.class.get(path, query: options)
287
+ parse_response(response, output)
288
+ end
289
+
290
+ private
291
+
292
+ # Validate operation
293
+ def validate_operation(operation, allowed_operations)
294
+ unless allowed_operations.include?(operation)
295
+ raise ArgumentError, "Invalid operation: #{operation}. Allowed operations: #{allowed_operations.join(', ')}"
296
+ end
297
+ end
298
+
299
+ # Validate output format
300
+ def validate_output_format(output, allowed_formats)
301
+ unless allowed_formats.include?(output)
302
+ raise ArgumentError, "Invalid output format: #{output}. Allowed formats: #{allowed_formats.join(', ')}"
303
+ end
304
+ end
305
+
306
+ # Validate value against allowed values
307
+ def validate_value(value, allowed_values, name)
308
+ unless allowed_values.include?(value)
309
+ raise ArgumentError, "Invalid #{name}: #{value}. Allowed values: #{allowed_values.join(', ')}"
310
+ end
311
+ end
312
+
313
+ # Parse API response and map to Ruby objects
314
+ def parse_response(response, output_format, schema_class = nil)
315
+ if response.success?
316
+ if output_format == 'JSON'
317
+ data = response.parsed_response
318
+ if schema_class
319
+ klass = PubChemAPI.const_get(schema_class)
320
+ klass.new(data)
321
+ else
322
+ data
323
+ end
324
+ elsif output_format == 'XML'
325
+ doc = Nokogiri::XML(response.body)
326
+ doc.remove_namespaces!
327
+ if schema_class
328
+ parse_xml_to_object(doc, schema_class)
329
+ else
330
+ doc
331
+ end
332
+ else
333
+ response.body
334
+ end
335
+ else
336
+ handle_error_response(response)
337
+ end
338
+ end
339
+
340
+ # Handle error responses
341
+ def handle_error_response(response)
342
+ if response.headers['Content-Type'] && response.headers['Content-Type'].include?('application/json')
343
+ error_info = response.parsed_response
344
+ message = error_info['Fault']['Message'] rescue 'Unknown error'
345
+ raise APIError.new(message, response.code)
346
+ else
347
+ raise APIError.new("HTTP Error #{response.code}: #{response.message}", response.code)
348
+ end
349
+ end
350
+
351
+ # Parse XML to Ruby object
352
+ def parse_xml_to_object(doc, schema_class)
353
+ # TODO: For now, just return the raw Nokogiri document
354
+ doc
355
+ end
356
+ end
357
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pubchem_api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - coderobe
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: httparty
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.22.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.22.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.16.7
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.16.7
41
+ description: PubChem PUG REST api wrapper module
42
+ email:
43
+ - git@coderobe.net
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - lib/pubchem_api.rb
49
+ - lib/pubchem_api/version.rb
50
+ homepage: https://github.com/coderobe/ruby-pubchem_api
51
+ licenses:
52
+ - MIT
53
+ metadata: {}
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubygems_version: 3.5.4
70
+ signing_key:
71
+ specification_version: 4
72
+ summary: PubChem PUG REST api wrapper
73
+ test_files: []