@toxplanet/pegasus-sdk 1.2.5 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/chemicals.js +104 -30
- package/lib/search.js +6 -3
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -6,7 +6,11 @@ const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
|
6
6
|
const SEARCH_BOOST_EXACT_SECONDARY = 30;
|
|
7
7
|
const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
8
8
|
|
|
9
|
-
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
|
|
9
|
+
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
|
|
10
|
+
|
|
11
|
+
const INCHIKEY_PATTERN = /^[A-Z0-9]{14}-[A-Z0-9]{8,10}-[A-Z0-9]$/;
|
|
12
|
+
const EC_PATTERN = /^\d{3}-\d{3}-\d$/;
|
|
13
|
+
const CID_PATTERN = /^CID/i;
|
|
10
14
|
|
|
11
15
|
function escapeLikePattern(value) {
|
|
12
16
|
return value.replace(/[%_\\]/g, '\\$&');
|
|
@@ -94,6 +98,60 @@ class ChemicalsService {
|
|
|
94
98
|
return d instanceof Date ? d.toISOString() : (d || new Date().toISOString());
|
|
95
99
|
}
|
|
96
100
|
|
|
101
|
+
_toLegacyChemicalSource({ name = '', cas = [], identifiers = [], id = '', formula = null } = {}) {
|
|
102
|
+
return {
|
|
103
|
+
chemical_name: name,
|
|
104
|
+
chemical_name_sensitive: name,
|
|
105
|
+
chemical_name_sort: (name || '').toLowerCase(),
|
|
106
|
+
chemical_identifier: [...cas, ...identifiers],
|
|
107
|
+
chemical_set_identifier: cas[0] || id || '',
|
|
108
|
+
chemical_formula: formula
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
_chemicalRowToLegacySource(chemical) {
|
|
113
|
+
if (!chemical) return null;
|
|
114
|
+
const identifierObj = chemical.chemicalIdentifiers;
|
|
115
|
+
const cas = [];
|
|
116
|
+
const other = [];
|
|
117
|
+
let formula = null;
|
|
118
|
+
|
|
119
|
+
const visit = (type, value) => {
|
|
120
|
+
if (!type) return;
|
|
121
|
+
const values = Array.isArray(value) ? value : (value != null ? [value] : []);
|
|
122
|
+
const stringValues = values.map((v) => String(v)).filter((v) => v.length > 0);
|
|
123
|
+
if (type === 'CAS') {
|
|
124
|
+
cas.push(...stringValues);
|
|
125
|
+
} else if (type === 'formula' || type === 'Formula' || type === 'chemical_formula') {
|
|
126
|
+
if (formula == null && stringValues.length > 0) formula = stringValues[0];
|
|
127
|
+
} else {
|
|
128
|
+
other.push(...stringValues);
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
if (Array.isArray(identifierObj)) {
|
|
133
|
+
for (const item of identifierObj) {
|
|
134
|
+
if (item && typeof item === 'object') {
|
|
135
|
+
const type = item.type || item.identifier_key;
|
|
136
|
+
const value = item.value !== undefined ? item.value : item.identifier_value;
|
|
137
|
+
visit(type, value);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
} else if (identifierObj && typeof identifierObj === 'object') {
|
|
141
|
+
for (const [type, value] of Object.entries(identifierObj)) {
|
|
142
|
+
visit(type, value);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return this._toLegacyChemicalSource({
|
|
147
|
+
name: chemical.chemicalName || '',
|
|
148
|
+
cas,
|
|
149
|
+
identifiers: other,
|
|
150
|
+
id: chemical.sourceId,
|
|
151
|
+
formula
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
97
155
|
_mapChemicalRow(row) {
|
|
98
156
|
if (!row) return null;
|
|
99
157
|
return {
|
|
@@ -734,7 +792,7 @@ class ChemicalsService {
|
|
|
734
792
|
*/
|
|
735
793
|
async searchByName(searchTerm, limit = 10) {
|
|
736
794
|
if (!searchTerm) {
|
|
737
|
-
return { results: [] };
|
|
795
|
+
return { results: [], total: { value: 0, relation: 'eq' } };
|
|
738
796
|
}
|
|
739
797
|
|
|
740
798
|
try {
|
|
@@ -771,7 +829,10 @@ class ChemicalsService {
|
|
|
771
829
|
score: hit._score
|
|
772
830
|
}));
|
|
773
831
|
|
|
774
|
-
return {
|
|
832
|
+
return {
|
|
833
|
+
results,
|
|
834
|
+
total: result?.hits?.total ?? { value: results.length, relation: 'eq' }
|
|
835
|
+
};
|
|
775
836
|
} catch (error) {
|
|
776
837
|
logError('pegasus-sdk', 'ChemicalsService', 'searchByName', error);
|
|
777
838
|
throw error;
|
|
@@ -786,7 +847,7 @@ class ChemicalsService {
|
|
|
786
847
|
*/
|
|
787
848
|
async searchBySynonym(synonymTerm, limit = 10) {
|
|
788
849
|
if (!synonymTerm) {
|
|
789
|
-
return { results: [] };
|
|
850
|
+
return { results: [], total: { value: 0, relation: 'eq' } };
|
|
790
851
|
}
|
|
791
852
|
|
|
792
853
|
try {
|
|
@@ -823,7 +884,10 @@ class ChemicalsService {
|
|
|
823
884
|
score: hit._score
|
|
824
885
|
}));
|
|
825
886
|
|
|
826
|
-
return {
|
|
887
|
+
return {
|
|
888
|
+
results,
|
|
889
|
+
total: result?.hits?.total ?? { value: results.length, relation: 'eq' }
|
|
890
|
+
};
|
|
827
891
|
} catch (error) {
|
|
828
892
|
logError('pegasus-sdk', 'ChemicalsService', 'searchBySynonym', error);
|
|
829
893
|
throw error;
|
|
@@ -1000,9 +1064,30 @@ class ChemicalsService {
|
|
|
1000
1064
|
},
|
|
1001
1065
|
|
|
1002
1066
|
get: async (params) => {
|
|
1003
|
-
const
|
|
1067
|
+
const id = params.id;
|
|
1068
|
+
let chemical = await this.getChemicalBySourceId(id);
|
|
1069
|
+
|
|
1070
|
+
if (!chemical) {
|
|
1071
|
+
const casMatches = await this.getChemicalsByCAS(id);
|
|
1072
|
+
if (casMatches.length > 0) chemical = casMatches[0];
|
|
1073
|
+
}
|
|
1004
1074
|
|
|
1005
|
-
if (!
|
|
1075
|
+
if (!chemical) {
|
|
1076
|
+
let identifierType = null;
|
|
1077
|
+
if (CID_PATTERN.test(id)) {
|
|
1078
|
+
identifierType = 'CID';
|
|
1079
|
+
} else if (INCHIKEY_PATTERN.test(id)) {
|
|
1080
|
+
identifierType = 'InChIKey';
|
|
1081
|
+
} else if (EC_PATTERN.test(id)) {
|
|
1082
|
+
identifierType = 'EC';
|
|
1083
|
+
}
|
|
1084
|
+
if (identifierType) {
|
|
1085
|
+
const matches = await this.getChemicalsByIdentifier(identifierType, id);
|
|
1086
|
+
if (matches.length > 0) chemical = matches[0];
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
if (!chemical) {
|
|
1006
1091
|
return {
|
|
1007
1092
|
body: {
|
|
1008
1093
|
_index: params.index,
|
|
@@ -1019,7 +1104,7 @@ class ChemicalsService {
|
|
|
1019
1104
|
_id: params.id,
|
|
1020
1105
|
_version: 1,
|
|
1021
1106
|
found: true,
|
|
1022
|
-
_source:
|
|
1107
|
+
_source: this._chemicalRowToLegacySource(chemical)
|
|
1023
1108
|
},
|
|
1024
1109
|
statusCode: 200
|
|
1025
1110
|
};
|
|
@@ -1082,6 +1167,13 @@ class ChemicalsService {
|
|
|
1082
1167
|
let searchTerm = '';
|
|
1083
1168
|
let limit = params.body?.size || 10;
|
|
1084
1169
|
|
|
1170
|
+
const toLegacySource = (r) => this._toLegacyChemicalSource({
|
|
1171
|
+
name: r.name,
|
|
1172
|
+
cas: r.cas || [],
|
|
1173
|
+
identifiers: r.identifiers || [],
|
|
1174
|
+
id: r.id
|
|
1175
|
+
});
|
|
1176
|
+
|
|
1085
1177
|
if (params.index === 'synonym_lookup_index') {
|
|
1086
1178
|
const query = params.body?.query;
|
|
1087
1179
|
searchTerm = query?.match?.chemical_name ||
|
|
@@ -1100,22 +1192,13 @@ class ChemicalsService {
|
|
|
1100
1192
|
failed: 0
|
|
1101
1193
|
},
|
|
1102
1194
|
hits: {
|
|
1103
|
-
total: {
|
|
1104
|
-
value: searchResults.results.length,
|
|
1105
|
-
relation: 'eq'
|
|
1106
|
-
},
|
|
1195
|
+
total: searchResults.total ?? { value: searchResults.results.length, relation: 'eq' },
|
|
1107
1196
|
max_score: searchResults.results[0]?.score || 0,
|
|
1108
1197
|
hits: searchResults.results.map(result => ({
|
|
1109
1198
|
_index: params.index,
|
|
1110
1199
|
_id: result.id,
|
|
1111
1200
|
_score: result.score,
|
|
1112
|
-
_source:
|
|
1113
|
-
postgres_id: result.id,
|
|
1114
|
-
chemical_name: result.name,
|
|
1115
|
-
cas_numbers: result.cas,
|
|
1116
|
-
identifier_values: result.identifiers,
|
|
1117
|
-
synonyms: result.synonyms
|
|
1118
|
-
}
|
|
1201
|
+
_source: toLegacySource(result)
|
|
1119
1202
|
}))
|
|
1120
1203
|
}
|
|
1121
1204
|
},
|
|
@@ -1139,22 +1222,13 @@ class ChemicalsService {
|
|
|
1139
1222
|
failed: 0
|
|
1140
1223
|
},
|
|
1141
1224
|
hits: {
|
|
1142
|
-
total: {
|
|
1143
|
-
value: searchResults.results.length,
|
|
1144
|
-
relation: 'eq'
|
|
1145
|
-
},
|
|
1225
|
+
total: searchResults.total ?? { value: searchResults.results.length, relation: 'eq' },
|
|
1146
1226
|
max_score: searchResults.results[0]?.score || 0,
|
|
1147
1227
|
hits: searchResults.results.map(result => ({
|
|
1148
1228
|
_index: params.index,
|
|
1149
1229
|
_id: result.id,
|
|
1150
1230
|
_score: result.score,
|
|
1151
|
-
_source:
|
|
1152
|
-
postgres_id: result.id,
|
|
1153
|
-
chemical_name: result.name,
|
|
1154
|
-
cas_numbers: result.cas,
|
|
1155
|
-
identifier_values: result.identifiers,
|
|
1156
|
-
synonyms: result.synonyms
|
|
1157
|
-
}
|
|
1231
|
+
_source: toLegacySource(result)
|
|
1158
1232
|
}))
|
|
1159
1233
|
}
|
|
1160
1234
|
},
|
package/lib/search.js
CHANGED
|
@@ -94,7 +94,7 @@ class SearchService {
|
|
|
94
94
|
*/
|
|
95
95
|
async searchChemicals(query, options = {}) {
|
|
96
96
|
if (!query) {
|
|
97
|
-
return { results: [] };
|
|
97
|
+
return { results: [], total: { value: 0, relation: 'eq' } };
|
|
98
98
|
}
|
|
99
99
|
|
|
100
100
|
// Extract options with defaults
|
|
@@ -165,7 +165,10 @@ class SearchService {
|
|
|
165
165
|
score: hit._score
|
|
166
166
|
}));
|
|
167
167
|
|
|
168
|
-
return {
|
|
168
|
+
return {
|
|
169
|
+
results,
|
|
170
|
+
total: result?.hits?.total ?? { value: results.length, relation: 'eq' }
|
|
171
|
+
};
|
|
169
172
|
} catch (error) {
|
|
170
173
|
logError('pegasus-sdk', 'SearchService', 'searchChemicals', error);
|
|
171
174
|
throw error;
|
|
@@ -356,7 +359,7 @@ class SearchService {
|
|
|
356
359
|
chemical_set_identifier: (r.cas && r.cas[0]) || r.id || '',
|
|
357
360
|
}
|
|
358
361
|
})),
|
|
359
|
-
total: { value: pegasusResults.results.length, relation: 'eq' }
|
|
362
|
+
total: pegasusResults.total ?? { value: pegasusResults.results.length, relation: 'eq' }
|
|
360
363
|
},
|
|
361
364
|
timed_out: false,
|
|
362
365
|
_shards: { total: 1, successful: 1, failed: 0 }
|
package/package.json
CHANGED