@toxplanet/pegasus-sdk 1.2.8 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/chemicals.js +98 -13
  2. package/package.json +1 -1
package/lib/chemicals.js CHANGED
@@ -8,6 +8,16 @@ const SEARCH_BOOST_PREFIX_SECONDARY = 10;
8
8
 
9
9
  const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
10
10
 
11
+ // Equivalence groups for CDI identifier_key normalization. When any member of
12
+ // a group is present on a row, the others are backfilled with the same values
13
+ // so consumers can look up by their preferred spelling.
14
+ const IDENTIFIER_ALIAS_GROUPS = [
15
+ { keys: ['cid', 'pubchem_cid', 'pubchem'], names: { cid: 'CID', pubchem_cid: 'PubChem CID', pubchem: 'PubChem' } },
16
+ { keys: ['inchikey', 'inchi_key'], names: { inchikey: 'InChIKey', inchi_key: 'InChIKey' } },
17
+ { keys: ['ec', 'ec_number'], names: { ec: 'EC', ec_number: 'EC Number' } },
18
+ { keys: ['cas', 'cas_number', 'cas_rn'], names: { cas: 'CAS', cas_number: 'CAS Number', cas_rn: 'CAS RN' } }
19
+ ];
20
+
11
21
  function escapeLikePattern(value) {
12
22
  return value.replace(/[%_\\]/g, '\\$&');
13
23
  }
@@ -159,6 +169,85 @@ class ChemicalsService {
159
169
  });
160
170
  }
161
171
 
172
+ _expandIdentifierAliases(identifiers) {
173
+ const byKey = new Map(identifiers.map((i) => [i.identifier_key, i]));
174
+ const additions = [];
175
+ for (const group of IDENTIFIER_ALIAS_GROUPS) {
176
+ const present = group.keys.find((k) => byKey.has(k));
177
+ if (!present) continue;
178
+ const source = byKey.get(present);
179
+ for (const aliasKey of group.keys) {
180
+ if (byKey.has(aliasKey)) continue;
181
+ const alias = {
182
+ identifier_key: aliasKey,
183
+ identifier_name: group.names[aliasKey] || aliasKey,
184
+ identifier_value: [...source.identifier_value]
185
+ };
186
+ additions.push(alias);
187
+ byKey.set(aliasKey, alias);
188
+ }
189
+ }
190
+ return identifiers.concat(additions);
191
+ }
192
+
193
+ _chemicalRowToCDISource(chemical) {
194
+ if (!chemical) return null;
195
+
196
+ const identifierGroups = new Map();
197
+ const identifierList = Array.isArray(chemical.chemicalIdentifiers) ? chemical.chemicalIdentifiers : [];
198
+ for (const item of identifierList) {
199
+ if (!item || typeof item !== 'object') continue;
200
+ const type = item.type || item.identifier_key || '';
201
+ if (!type) continue;
202
+ const rawValue = item.value !== undefined ? item.value : item.identifier_value;
203
+ const values = Array.isArray(rawValue) ? rawValue : (rawValue != null ? [rawValue] : []);
204
+ if (!identifierGroups.has(type)) {
205
+ identifierGroups.set(type, {
206
+ identifier_key: String(type).toLowerCase(),
207
+ identifier_name: String(type),
208
+ identifier_value: []
209
+ });
210
+ }
211
+ const group = identifierGroups.get(type);
212
+ for (const v of values) {
213
+ if (v == null || v === '') continue;
214
+ group.identifier_value.push(String(v));
215
+ }
216
+ }
217
+ const identifiers = this._expandIdentifierAliases(Array.from(identifierGroups.values()));
218
+
219
+ const metaList = Array.isArray(chemical.chemicalMeta) ? chemical.chemicalMeta : [];
220
+ const meta = metaList.map((item) => {
221
+ const key = (item && (item.key || item.meta_key)) || '';
222
+ const rawValue = item && (item.value !== undefined ? item.value : item.meta_value_text);
223
+ const valueArr = Array.isArray(rawValue)
224
+ ? rawValue.map((v) => String(v))
225
+ : (rawValue != null ? [String(rawValue)] : []);
226
+ const out = {
227
+ meta_key: String(key).toLowerCase(),
228
+ meta_value_text: valueArr
229
+ };
230
+ const unit = item && (item.unit || item.meta_value_unit);
231
+ if (unit) out.meta_value_unit = unit;
232
+ return out;
233
+ });
234
+
235
+ const synonyms = Array.isArray(chemical.chemicalSynonyms) ? chemical.chemicalSynonyms : [];
236
+ const names = [chemical.chemicalName, ...synonyms].filter(Boolean);
237
+
238
+ return {
239
+ chemical_set_identifier: chemical.sourceId || '',
240
+ chemical_primary_name: chemical.chemicalName || '',
241
+ chemical_names: names,
242
+ chemical_synonyms: synonyms,
243
+ chemical_categories: chemical.chemicalCategories || [],
244
+ chemical_identifiers: identifiers,
245
+ chemical_meta: meta,
246
+ chemical_created_at: chemical.createdAt,
247
+ chemical_updated_at: chemical.updatedAt
248
+ };
249
+ }
250
+
162
251
  _mapChemicalRow(row) {
163
252
  if (!row) return null;
164
253
  return {
@@ -1102,24 +1191,16 @@ class ChemicalsService {
1102
1191
 
1103
1192
  get: async (params) => {
1104
1193
  const id = params.id;
1105
- let source = null;
1194
+ let chemical = await this.getChemicalBySourceId(id);
1106
1195
 
1107
- const chemical = await this.getChemicalBySourceId(id);
1108
- if (chemical) {
1109
- source = this._chemicalRowToLegacySource(chemical);
1110
- } else {
1196
+ if (!chemical) {
1111
1197
  const hit = await this.findChemicalByIdentifier(id);
1112
- if (hit) {
1113
- source = this._toLegacyChemicalSource({
1114
- name: hit.name,
1115
- cas: hit.cas,
1116
- identifiers: hit.identifiers,
1117
- id: hit.id
1118
- });
1198
+ if (hit && hit.id) {
1199
+ chemical = await this.getChemicalById(hit.id);
1119
1200
  }
1120
1201
  }
1121
1202
 
1122
- if (!source) {
1203
+ if (!chemical) {
1123
1204
  return {
1124
1205
  body: {
1125
1206
  _index: params.index,
@@ -1130,6 +1211,10 @@ class ChemicalsService {
1130
1211
  };
1131
1212
  }
1132
1213
 
1214
+ const source = (params.index === 'chemical_data_index')
1215
+ ? this._chemicalRowToCDISource(chemical)
1216
+ : this._chemicalRowToLegacySource(chemical);
1217
+
1133
1218
  return {
1134
1219
  body: {
1135
1220
  _index: params.index,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.8",
3
+ "version": "1.2.9",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",