@toxplanet/pegasus-sdk 1.2.7 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/chemicals.js +144 -27
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -8,9 +8,15 @@ const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
|
8
8
|
|
|
9
9
|
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
// Equivalence groups for CDI identifier_key normalization. When any member of
|
|
12
|
+
// a group is present on a row, the others are backfilled with the same values
|
|
13
|
+
// so consumers can look up by their preferred spelling.
|
|
14
|
+
const IDENTIFIER_ALIAS_GROUPS = [
|
|
15
|
+
{ keys: ['cid', 'pubchem_cid', 'pubchem'], names: { cid: 'CID', pubchem_cid: 'PubChem CID', pubchem: 'PubChem' } },
|
|
16
|
+
{ keys: ['inchikey', 'inchi_key'], names: { inchikey: 'InChIKey', inchi_key: 'InChIKey' } },
|
|
17
|
+
{ keys: ['ec', 'ec_number'], names: { ec: 'EC', ec_number: 'EC Number' } },
|
|
18
|
+
{ keys: ['cas', 'cas_number', 'cas_rn'], names: { cas: 'CAS', cas_number: 'CAS Number', cas_rn: 'CAS RN' } }
|
|
19
|
+
];
|
|
14
20
|
|
|
15
21
|
function escapeLikePattern(value) {
|
|
16
22
|
return value.replace(/[%_\\]/g, '\\$&');
|
|
@@ -65,18 +71,29 @@ function transformChemicalMeta(meta) {
|
|
|
65
71
|
|
|
66
72
|
function transformChemicalIdentifiers(identifiers) {
|
|
67
73
|
if (!identifiers || typeof identifiers !== 'object') return [];
|
|
74
|
+
|
|
68
75
|
if (Array.isArray(identifiers)) {
|
|
69
|
-
|
|
70
|
-
if (identifiers
|
|
71
|
-
|
|
72
|
-
}
|
|
73
|
-
// Transform from old format { identifier_key, identifier_value, ... } to new format { type, value }
|
|
76
|
+
if (identifiers.length === 0) return [];
|
|
77
|
+
if (identifiers[0].type !== undefined) return identifiers;
|
|
78
|
+
// Legacy { identifier_key, identifier_value } row form
|
|
74
79
|
return identifiers.map(item => ({
|
|
75
80
|
type: item.identifier_key || item.type,
|
|
76
81
|
value: Array.isArray(item.identifier_value) ? item.identifier_value[0] : (item.value || item.identifier_value)
|
|
77
82
|
}));
|
|
78
83
|
}
|
|
79
|
-
|
|
84
|
+
|
|
85
|
+
// Legacy ES-doc form: { CAS: ["71-43-2"], CID: ["241"], InChIKey: "ABC..." }
|
|
86
|
+
// Flatten one entry per value, preserving multi-valued types.
|
|
87
|
+
const result = [];
|
|
88
|
+
for (const [type, value] of Object.entries(identifiers)) {
|
|
89
|
+
if (value == null) continue;
|
|
90
|
+
const values = Array.isArray(value) ? value : [value];
|
|
91
|
+
for (const v of values) {
|
|
92
|
+
if (v == null || v === '') continue;
|
|
93
|
+
result.push({ type, value: String(v) });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return result;
|
|
80
97
|
}
|
|
81
98
|
|
|
82
99
|
class ChemicalsService {
|
|
@@ -152,6 +169,85 @@ class ChemicalsService {
|
|
|
152
169
|
});
|
|
153
170
|
}
|
|
154
171
|
|
|
172
|
+
_expandIdentifierAliases(identifiers) {
|
|
173
|
+
const byKey = new Map(identifiers.map((i) => [i.identifier_key, i]));
|
|
174
|
+
const additions = [];
|
|
175
|
+
for (const group of IDENTIFIER_ALIAS_GROUPS) {
|
|
176
|
+
const present = group.keys.find((k) => byKey.has(k));
|
|
177
|
+
if (!present) continue;
|
|
178
|
+
const source = byKey.get(present);
|
|
179
|
+
for (const aliasKey of group.keys) {
|
|
180
|
+
if (byKey.has(aliasKey)) continue;
|
|
181
|
+
const alias = {
|
|
182
|
+
identifier_key: aliasKey,
|
|
183
|
+
identifier_name: group.names[aliasKey] || aliasKey,
|
|
184
|
+
identifier_value: [...source.identifier_value]
|
|
185
|
+
};
|
|
186
|
+
additions.push(alias);
|
|
187
|
+
byKey.set(aliasKey, alias);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return identifiers.concat(additions);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
_chemicalRowToCDISource(chemical) {
|
|
194
|
+
if (!chemical) return null;
|
|
195
|
+
|
|
196
|
+
const identifierGroups = new Map();
|
|
197
|
+
const identifierList = Array.isArray(chemical.chemicalIdentifiers) ? chemical.chemicalIdentifiers : [];
|
|
198
|
+
for (const item of identifierList) {
|
|
199
|
+
if (!item || typeof item !== 'object') continue;
|
|
200
|
+
const type = item.type || item.identifier_key || '';
|
|
201
|
+
if (!type) continue;
|
|
202
|
+
const rawValue = item.value !== undefined ? item.value : item.identifier_value;
|
|
203
|
+
const values = Array.isArray(rawValue) ? rawValue : (rawValue != null ? [rawValue] : []);
|
|
204
|
+
if (!identifierGroups.has(type)) {
|
|
205
|
+
identifierGroups.set(type, {
|
|
206
|
+
identifier_key: String(type).toLowerCase(),
|
|
207
|
+
identifier_name: String(type),
|
|
208
|
+
identifier_value: []
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
const group = identifierGroups.get(type);
|
|
212
|
+
for (const v of values) {
|
|
213
|
+
if (v == null || v === '') continue;
|
|
214
|
+
group.identifier_value.push(String(v));
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
const identifiers = this._expandIdentifierAliases(Array.from(identifierGroups.values()));
|
|
218
|
+
|
|
219
|
+
const metaList = Array.isArray(chemical.chemicalMeta) ? chemical.chemicalMeta : [];
|
|
220
|
+
const meta = metaList.map((item) => {
|
|
221
|
+
const key = (item && (item.key || item.meta_key)) || '';
|
|
222
|
+
const rawValue = item && (item.value !== undefined ? item.value : item.meta_value_text);
|
|
223
|
+
const valueArr = Array.isArray(rawValue)
|
|
224
|
+
? rawValue.map((v) => String(v))
|
|
225
|
+
: (rawValue != null ? [String(rawValue)] : []);
|
|
226
|
+
const out = {
|
|
227
|
+
meta_key: String(key).toLowerCase(),
|
|
228
|
+
meta_value_text: valueArr
|
|
229
|
+
};
|
|
230
|
+
const unit = item && (item.unit || item.meta_value_unit);
|
|
231
|
+
if (unit) out.meta_value_unit = unit;
|
|
232
|
+
return out;
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
const synonyms = Array.isArray(chemical.chemicalSynonyms) ? chemical.chemicalSynonyms : [];
|
|
236
|
+
const names = [chemical.chemicalName, ...synonyms].filter(Boolean);
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
chemical_set_identifier: chemical.sourceId || '',
|
|
240
|
+
chemical_primary_name: chemical.chemicalName || '',
|
|
241
|
+
chemical_names: names,
|
|
242
|
+
chemical_synonyms: synonyms,
|
|
243
|
+
chemical_categories: chemical.chemicalCategories || [],
|
|
244
|
+
chemical_identifiers: identifiers,
|
|
245
|
+
chemical_meta: meta,
|
|
246
|
+
chemical_created_at: chemical.createdAt,
|
|
247
|
+
chemical_updated_at: chemical.updatedAt
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
155
251
|
_mapChemicalRow(row) {
|
|
156
252
|
if (!row) return null;
|
|
157
253
|
return {
|
|
@@ -658,7 +754,7 @@ class ChemicalsService {
|
|
|
658
754
|
}
|
|
659
755
|
|
|
660
756
|
await this.connection.ensureConnected();
|
|
661
|
-
|
|
757
|
+
|
|
662
758
|
const sql = `SELECT * FROM chemicals WHERE chemical_identifiers->>'${identifierType}' = :value OR chemical_identifiers->'${identifierType}' ? :value`;
|
|
663
759
|
const params = [{ name: 'value', value: { stringValue: identifierValue } }];
|
|
664
760
|
const result = await this.connection.query(sql, params);
|
|
@@ -669,6 +765,36 @@ class ChemicalsService {
|
|
|
669
765
|
}
|
|
670
766
|
}
|
|
671
767
|
|
|
768
|
+
async findChemicalByIdentifier(identifierValue) {
|
|
769
|
+
if (!identifierValue) return null;
|
|
770
|
+
const result = await this.connection.invokeOpenSearch({
|
|
771
|
+
operation: 'search',
|
|
772
|
+
body: {
|
|
773
|
+
size: 1,
|
|
774
|
+
query: {
|
|
775
|
+
bool: {
|
|
776
|
+
should: [
|
|
777
|
+
{ term: { 'cas_numbers.keyword': identifierValue } },
|
|
778
|
+
{ term: { 'identifier_values.keyword': identifierValue } }
|
|
779
|
+
],
|
|
780
|
+
minimum_should_match: 1
|
|
781
|
+
}
|
|
782
|
+
},
|
|
783
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
784
|
+
}
|
|
785
|
+
});
|
|
786
|
+
|
|
787
|
+
const hit = result?.hits?.hits?.[0]?._source;
|
|
788
|
+
if (!hit) return null;
|
|
789
|
+
return {
|
|
790
|
+
id: hit.postgres_id,
|
|
791
|
+
name: hit.chemical_name,
|
|
792
|
+
cas: hit.cas_numbers || [],
|
|
793
|
+
identifiers: hit.identifier_values || [],
|
|
794
|
+
synonyms: hit.synonyms || []
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
|
|
672
798
|
async countByCollection(collectionName) {
|
|
673
799
|
try {
|
|
674
800
|
await this.connection.ensureConnected();
|
|
@@ -1068,22 +1194,9 @@ class ChemicalsService {
|
|
|
1068
1194
|
let chemical = await this.getChemicalBySourceId(id);
|
|
1069
1195
|
|
|
1070
1196
|
if (!chemical) {
|
|
1071
|
-
const
|
|
1072
|
-
if (
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
if (!chemical) {
|
|
1076
|
-
let identifierType = null;
|
|
1077
|
-
if (CID_PATTERN.test(id)) {
|
|
1078
|
-
identifierType = 'CID';
|
|
1079
|
-
} else if (INCHIKEY_PATTERN.test(id)) {
|
|
1080
|
-
identifierType = 'InChIKey';
|
|
1081
|
-
} else if (EC_PATTERN.test(id)) {
|
|
1082
|
-
identifierType = 'EC';
|
|
1083
|
-
}
|
|
1084
|
-
if (identifierType) {
|
|
1085
|
-
const matches = await this.getChemicalsByIdentifier(identifierType, id);
|
|
1086
|
-
if (matches.length > 0) chemical = matches[0];
|
|
1197
|
+
const hit = await this.findChemicalByIdentifier(id);
|
|
1198
|
+
if (hit && hit.id) {
|
|
1199
|
+
chemical = await this.getChemicalById(hit.id);
|
|
1087
1200
|
}
|
|
1088
1201
|
}
|
|
1089
1202
|
|
|
@@ -1098,13 +1211,17 @@ class ChemicalsService {
|
|
|
1098
1211
|
};
|
|
1099
1212
|
}
|
|
1100
1213
|
|
|
1214
|
+
const source = (params.index === 'chemical_data_index')
|
|
1215
|
+
? this._chemicalRowToCDISource(chemical)
|
|
1216
|
+
: this._chemicalRowToLegacySource(chemical);
|
|
1217
|
+
|
|
1101
1218
|
return {
|
|
1102
1219
|
body: {
|
|
1103
1220
|
_index: params.index,
|
|
1104
1221
|
_id: params.id,
|
|
1105
1222
|
_version: 1,
|
|
1106
1223
|
found: true,
|
|
1107
|
-
_source:
|
|
1224
|
+
_source: source
|
|
1108
1225
|
},
|
|
1109
1226
|
statusCode: 200
|
|
1110
1227
|
};
|
package/package.json
CHANGED