@toxplanet/pegasus-sdk 1.2.9 → 1.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/chemicals.js +62 -30
- package/lib/db/index.js +10 -0
- package/lib/search.js +1 -0
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
2
|
const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
|
|
3
|
+
const { getCasNumberVariations } = require('./search');
|
|
3
4
|
|
|
4
5
|
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
5
6
|
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
@@ -23,6 +24,7 @@ function escapeLikePattern(value) {
|
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
function parsePostgresArray(str) {
|
|
27
|
+
if (Array.isArray(str)) return str.map((v) => String(v));
|
|
26
28
|
if (!str || str === '{}') return [];
|
|
27
29
|
const trimmed = str.slice(1, -1);
|
|
28
30
|
if (!trimmed) return [];
|
|
@@ -214,7 +216,7 @@ class ChemicalsService {
|
|
|
214
216
|
group.identifier_value.push(String(v));
|
|
215
217
|
}
|
|
216
218
|
}
|
|
217
|
-
const identifiers =
|
|
219
|
+
const identifiers = Array.from(identifierGroups.values());
|
|
218
220
|
|
|
219
221
|
const metaList = Array.isArray(chemical.chemicalMeta) ? chemical.chemicalMeta : [];
|
|
220
222
|
const meta = metaList.map((item) => {
|
|
@@ -223,22 +225,22 @@ class ChemicalsService {
|
|
|
223
225
|
const valueArr = Array.isArray(rawValue)
|
|
224
226
|
? rawValue.map((v) => String(v))
|
|
225
227
|
: (rawValue != null ? [String(rawValue)] : []);
|
|
226
|
-
const
|
|
228
|
+
const unit = item && (item.unit || item.meta_value_unit);
|
|
229
|
+
if (unit && valueArr.length > 0) {
|
|
230
|
+
valueArr[valueArr.length - 1] = `${valueArr[valueArr.length - 1]} ${unit}`;
|
|
231
|
+
}
|
|
232
|
+
return {
|
|
227
233
|
meta_key: String(key).toLowerCase(),
|
|
228
234
|
meta_value_text: valueArr
|
|
229
235
|
};
|
|
230
|
-
const unit = item && (item.unit || item.meta_value_unit);
|
|
231
|
-
if (unit) out.meta_value_unit = unit;
|
|
232
|
-
return out;
|
|
233
236
|
});
|
|
234
237
|
|
|
235
238
|
const synonyms = Array.isArray(chemical.chemicalSynonyms) ? chemical.chemicalSynonyms : [];
|
|
236
|
-
const names = [chemical.chemicalName, ...synonyms].filter(Boolean);
|
|
237
239
|
|
|
238
240
|
return {
|
|
239
241
|
chemical_set_identifier: chemical.sourceId || '',
|
|
240
242
|
chemical_primary_name: chemical.chemicalName || '',
|
|
241
|
-
chemical_names:
|
|
243
|
+
chemical_names: synonyms,
|
|
242
244
|
chemical_synonyms: synonyms,
|
|
243
245
|
chemical_categories: chemical.chemicalCategories || [],
|
|
244
246
|
chemical_identifiers: identifiers,
|
|
@@ -922,22 +924,32 @@ class ChemicalsService {
|
|
|
922
924
|
}
|
|
923
925
|
|
|
924
926
|
try {
|
|
927
|
+
const variations = getCasNumberVariations(searchTerm);
|
|
928
|
+
const shouldClauses = [];
|
|
929
|
+
for (const v of variations) {
|
|
930
|
+
shouldClauses.push(
|
|
931
|
+
{ term: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
932
|
+
{ prefix: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
933
|
+
{ term: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
934
|
+
{ prefix: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
|
|
935
|
+
{ term: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
|
|
936
|
+
{ prefix: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
|
|
937
|
+
{ term: { 'identifier_values': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
|
|
938
|
+
{ prefix: { 'identifier_values': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } },
|
|
939
|
+
{ match: { 'chemical_name': { query: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
|
|
940
|
+
{ match: { 'synonyms': { query: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
|
|
941
|
+
{ match_phrase_prefix: { 'chemical_name': { query: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
|
|
942
|
+
{ match_phrase_prefix: { 'synonyms': { query: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
|
|
943
|
+
);
|
|
944
|
+
}
|
|
945
|
+
|
|
925
946
|
const result = await this.connection.invokeOpenSearch({
|
|
926
947
|
operation: 'search',
|
|
927
948
|
body: {
|
|
928
949
|
size: limit,
|
|
929
950
|
query: {
|
|
930
951
|
bool: {
|
|
931
|
-
should:
|
|
932
|
-
{ term: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
933
|
-
{ prefix: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
934
|
-
{ term: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
935
|
-
{ prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
|
|
936
|
-
{ match: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
|
|
937
|
-
{ match: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
|
|
938
|
-
{ match_phrase_prefix: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
|
|
939
|
-
{ match_phrase_prefix: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
|
|
940
|
-
],
|
|
952
|
+
should: shouldClauses,
|
|
941
953
|
minimum_should_match: 1
|
|
942
954
|
}
|
|
943
955
|
},
|
|
@@ -977,22 +989,32 @@ class ChemicalsService {
|
|
|
977
989
|
}
|
|
978
990
|
|
|
979
991
|
try {
|
|
992
|
+
const variations = getCasNumberVariations(synonymTerm);
|
|
993
|
+
const shouldClauses = [];
|
|
994
|
+
for (const v of variations) {
|
|
995
|
+
shouldClauses.push(
|
|
996
|
+
{ term: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
997
|
+
{ prefix: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
998
|
+
{ term: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
999
|
+
{ prefix: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
|
|
1000
|
+
{ term: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
|
|
1001
|
+
{ prefix: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
|
|
1002
|
+
{ term: { 'identifier_values': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
|
|
1003
|
+
{ prefix: { 'identifier_values': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } },
|
|
1004
|
+
{ match: { 'synonyms': { query: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
|
|
1005
|
+
{ match: { 'chemical_name': { query: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
|
|
1006
|
+
{ match_phrase_prefix: { 'synonyms': { query: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
|
|
1007
|
+
{ match_phrase_prefix: { 'chemical_name': { query: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
|
|
1008
|
+
);
|
|
1009
|
+
}
|
|
1010
|
+
|
|
980
1011
|
const result = await this.connection.invokeOpenSearch({
|
|
981
1012
|
operation: 'search',
|
|
982
1013
|
body: {
|
|
983
1014
|
size: limit,
|
|
984
1015
|
query: {
|
|
985
1016
|
bool: {
|
|
986
|
-
should:
|
|
987
|
-
{ term: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
988
|
-
{ prefix: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
989
|
-
{ term: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
990
|
-
{ prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
|
|
991
|
-
{ match: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
|
|
992
|
-
{ match: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
|
|
993
|
-
{ match_phrase_prefix: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
|
|
994
|
-
{ match_phrase_prefix: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
|
|
995
|
-
],
|
|
1017
|
+
should: shouldClauses,
|
|
996
1018
|
minimum_should_match: 1
|
|
997
1019
|
}
|
|
998
1020
|
},
|
|
@@ -1283,7 +1305,7 @@ class ChemicalsService {
|
|
|
1283
1305
|
search: async (params) => {
|
|
1284
1306
|
let searchTerm = '';
|
|
1285
1307
|
let limit = params.body?.size || 10;
|
|
1286
|
-
|
|
1308
|
+
|
|
1287
1309
|
const toLegacySource = (r) => this._toLegacyChemicalSource({
|
|
1288
1310
|
name: r.name,
|
|
1289
1311
|
cas: r.cas || [],
|
|
@@ -1291,11 +1313,21 @@ class ChemicalsService {
|
|
|
1291
1313
|
id: r.id
|
|
1292
1314
|
});
|
|
1293
1315
|
|
|
1316
|
+
// Callers may pass an Elasticsearch query_string with reserved chars escaped
|
|
1317
|
+
// (e.g. "71\-43\-2"). Strip the escapes plus a trailing wildcard so keyword
|
|
1318
|
+
// term/prefix clauses against cas_numbers / identifier_values can match.
|
|
1319
|
+
const normalizeQueryString = (raw) => {
|
|
1320
|
+
if (!raw) return '';
|
|
1321
|
+
let s = String(raw);
|
|
1322
|
+
if (s.endsWith('*')) s = s.slice(0, -1);
|
|
1323
|
+
return s.replace(/\\(.)/g, '$1');
|
|
1324
|
+
};
|
|
1325
|
+
|
|
1294
1326
|
if (params.index === 'synonym_lookup_index') {
|
|
1295
1327
|
const query = params.body?.query;
|
|
1296
1328
|
searchTerm = query?.match?.chemical_name ||
|
|
1297
1329
|
query?.term?.chemical_name ||
|
|
1298
|
-
query?.query_string?.query || '';
|
|
1330
|
+
normalizeQueryString(query?.query_string?.query) || '';
|
|
1299
1331
|
const searchResults = await this.searchBySynonym(searchTerm, limit);
|
|
1300
1332
|
|
|
1301
1333
|
return {
|
|
@@ -1325,7 +1357,7 @@ class ChemicalsService {
|
|
|
1325
1357
|
const query = params.body?.query;
|
|
1326
1358
|
searchTerm = query?.match?.chemical_name ||
|
|
1327
1359
|
query?.term?.chemical_name ||
|
|
1328
|
-
query?.query_string?.query || '';
|
|
1360
|
+
normalizeQueryString(query?.query_string?.query) || '';
|
|
1329
1361
|
const searchResults = await this.searchByName(searchTerm, limit);
|
|
1330
1362
|
|
|
1331
1363
|
return {
|
package/lib/db/index.js
CHANGED
|
@@ -4,6 +4,16 @@ function getFieldValue(field) {
|
|
|
4
4
|
if ('longValue' in field) return field.longValue;
|
|
5
5
|
if ('doubleValue' in field) return field.doubleValue;
|
|
6
6
|
if ('booleanValue' in field) return field.booleanValue;
|
|
7
|
+
if ('blobValue' in field) return field.blobValue;
|
|
8
|
+
if ('arrayValue' in field && field.arrayValue) {
|
|
9
|
+
const av = field.arrayValue;
|
|
10
|
+
if (Array.isArray(av.stringValues)) return av.stringValues;
|
|
11
|
+
if (Array.isArray(av.longValues)) return av.longValues;
|
|
12
|
+
if (Array.isArray(av.doubleValues)) return av.doubleValues;
|
|
13
|
+
if (Array.isArray(av.booleanValues)) return av.booleanValues;
|
|
14
|
+
if (Array.isArray(av.arrayValues)) return av.arrayValues;
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
7
17
|
return null;
|
|
8
18
|
}
|
|
9
19
|
|
package/lib/search.js
CHANGED
package/package.json
CHANGED