@toxplanet/pegasus-sdk 1.2.9 → 1.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/chemicals.js CHANGED
@@ -1,5 +1,6 @@
1
1
  const { logError, logInfo } = require('@toxplanet/tphelper/logging');
2
2
  const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
3
+ const { getCasNumberVariations } = require('./search');
3
4
 
4
5
  const SEARCH_BOOST_EXACT_PRIMARY = 100;
5
6
  const SEARCH_BOOST_PREFIX_PRIMARY = 50;
@@ -23,6 +24,7 @@ function escapeLikePattern(value) {
23
24
  }
24
25
 
25
26
  function parsePostgresArray(str) {
27
+ if (Array.isArray(str)) return str.map((v) => String(v));
26
28
  if (!str || str === '{}') return [];
27
29
  const trimmed = str.slice(1, -1);
28
30
  if (!trimmed) return [];
@@ -214,7 +216,7 @@ class ChemicalsService {
214
216
  group.identifier_value.push(String(v));
215
217
  }
216
218
  }
217
- const identifiers = this._expandIdentifierAliases(Array.from(identifierGroups.values()));
219
+ const identifiers = Array.from(identifierGroups.values());
218
220
 
219
221
  const metaList = Array.isArray(chemical.chemicalMeta) ? chemical.chemicalMeta : [];
220
222
  const meta = metaList.map((item) => {
@@ -223,22 +225,22 @@ class ChemicalsService {
223
225
  const valueArr = Array.isArray(rawValue)
224
226
  ? rawValue.map((v) => String(v))
225
227
  : (rawValue != null ? [String(rawValue)] : []);
226
- const out = {
228
+ const unit = item && (item.unit || item.meta_value_unit);
229
+ if (unit && valueArr.length > 0) {
230
+ valueArr[valueArr.length - 1] = `${valueArr[valueArr.length - 1]} ${unit}`;
231
+ }
232
+ return {
227
233
  meta_key: String(key).toLowerCase(),
228
234
  meta_value_text: valueArr
229
235
  };
230
- const unit = item && (item.unit || item.meta_value_unit);
231
- if (unit) out.meta_value_unit = unit;
232
- return out;
233
236
  });
234
237
 
235
238
  const synonyms = Array.isArray(chemical.chemicalSynonyms) ? chemical.chemicalSynonyms : [];
236
- const names = [chemical.chemicalName, ...synonyms].filter(Boolean);
237
239
 
238
240
  return {
239
241
  chemical_set_identifier: chemical.sourceId || '',
240
242
  chemical_primary_name: chemical.chemicalName || '',
241
- chemical_names: names,
243
+ chemical_names: synonyms,
242
244
  chemical_synonyms: synonyms,
243
245
  chemical_categories: chemical.chemicalCategories || [],
244
246
  chemical_identifiers: identifiers,
@@ -922,22 +924,32 @@ class ChemicalsService {
922
924
  }
923
925
 
924
926
  try {
927
+ const variations = getCasNumberVariations(searchTerm);
928
+ const shouldClauses = [];
929
+ for (const v of variations) {
930
+ shouldClauses.push(
931
+ { term: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
932
+ { prefix: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
933
+ { term: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
934
+ { prefix: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
935
+ { term: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
936
+ { prefix: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
937
+ { term: { 'identifier_values': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
938
+ { prefix: { 'identifier_values': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } },
939
+ { match: { 'chemical_name': { query: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
940
+ { match: { 'synonyms': { query: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
941
+ { match_phrase_prefix: { 'chemical_name': { query: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
942
+ { match_phrase_prefix: { 'synonyms': { query: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
943
+ );
944
+ }
945
+
925
946
  const result = await this.connection.invokeOpenSearch({
926
947
  operation: 'search',
927
948
  body: {
928
949
  size: limit,
929
950
  query: {
930
951
  bool: {
931
- should: [
932
- { term: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
933
- { prefix: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
934
- { term: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
935
- { prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
936
- { match: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
937
- { match: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
938
- { match_phrase_prefix: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
939
- { match_phrase_prefix: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
940
- ],
952
+ should: shouldClauses,
941
953
  minimum_should_match: 1
942
954
  }
943
955
  },
@@ -977,22 +989,32 @@ class ChemicalsService {
977
989
  }
978
990
 
979
991
  try {
992
+ const variations = getCasNumberVariations(synonymTerm);
993
+ const shouldClauses = [];
994
+ for (const v of variations) {
995
+ shouldClauses.push(
996
+ { term: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
997
+ { prefix: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
998
+ { term: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
999
+ { prefix: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
1000
+ { term: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
1001
+ { prefix: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
1002
+ { term: { 'identifier_values': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
1003
+ { prefix: { 'identifier_values': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } },
1004
+ { match: { 'synonyms': { query: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
1005
+ { match: { 'chemical_name': { query: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
1006
+ { match_phrase_prefix: { 'synonyms': { query: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
1007
+ { match_phrase_prefix: { 'chemical_name': { query: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
1008
+ );
1009
+ }
1010
+
980
1011
  const result = await this.connection.invokeOpenSearch({
981
1012
  operation: 'search',
982
1013
  body: {
983
1014
  size: limit,
984
1015
  query: {
985
1016
  bool: {
986
- should: [
987
- { term: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
988
- { prefix: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
989
- { term: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
990
- { prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
991
- { match: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
992
- { match: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
993
- { match_phrase_prefix: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
994
- { match_phrase_prefix: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
995
- ],
1017
+ should: shouldClauses,
996
1018
  minimum_should_match: 1
997
1019
  }
998
1020
  },
@@ -1283,7 +1305,7 @@ class ChemicalsService {
1283
1305
  search: async (params) => {
1284
1306
  let searchTerm = '';
1285
1307
  let limit = params.body?.size || 10;
1286
-
1308
+
1287
1309
  const toLegacySource = (r) => this._toLegacyChemicalSource({
1288
1310
  name: r.name,
1289
1311
  cas: r.cas || [],
@@ -1291,11 +1313,21 @@ class ChemicalsService {
1291
1313
  id: r.id
1292
1314
  });
1293
1315
 
1316
+ // Callers may pass an Elasticsearch query_string with reserved chars escaped
1317
+ // (e.g. "71\-43\-2"). Strip the escapes plus a trailing wildcard so keyword
1318
+ // term/prefix clauses against cas_numbers / identifier_values can match.
1319
+ const normalizeQueryString = (raw) => {
1320
+ if (!raw) return '';
1321
+ let s = String(raw);
1322
+ if (s.endsWith('*')) s = s.slice(0, -1);
1323
+ return s.replace(/\\(.)/g, '$1');
1324
+ };
1325
+
1294
1326
  if (params.index === 'synonym_lookup_index') {
1295
1327
  const query = params.body?.query;
1296
1328
  searchTerm = query?.match?.chemical_name ||
1297
1329
  query?.term?.chemical_name ||
1298
- query?.query_string?.query || '';
1330
+ normalizeQueryString(query?.query_string?.query) || '';
1299
1331
  const searchResults = await this.searchBySynonym(searchTerm, limit);
1300
1332
 
1301
1333
  return {
@@ -1325,7 +1357,7 @@ class ChemicalsService {
1325
1357
  const query = params.body?.query;
1326
1358
  searchTerm = query?.match?.chemical_name ||
1327
1359
  query?.term?.chemical_name ||
1328
- query?.query_string?.query || '';
1360
+ normalizeQueryString(query?.query_string?.query) || '';
1329
1361
  const searchResults = await this.searchByName(searchTerm, limit);
1330
1362
 
1331
1363
  return {
package/lib/db/index.js CHANGED
@@ -4,6 +4,16 @@ function getFieldValue(field) {
4
4
  if ('longValue' in field) return field.longValue;
5
5
  if ('doubleValue' in field) return field.doubleValue;
6
6
  if ('booleanValue' in field) return field.booleanValue;
7
+ if ('blobValue' in field) return field.blobValue;
8
+ if ('arrayValue' in field && field.arrayValue) {
9
+ const av = field.arrayValue;
10
+ if (Array.isArray(av.stringValues)) return av.stringValues;
11
+ if (Array.isArray(av.longValues)) return av.longValues;
12
+ if (Array.isArray(av.doubleValues)) return av.doubleValues;
13
+ if (Array.isArray(av.booleanValues)) return av.booleanValues;
14
+ if (Array.isArray(av.arrayValues)) return av.arrayValues;
15
+ return [];
16
+ }
7
17
  return null;
8
18
  }
9
19
 
package/lib/search.js CHANGED
@@ -383,3 +383,4 @@ class SearchService {
383
383
  }
384
384
 
385
385
  module.exports = SearchService;
386
+ module.exports.getCasNumberVariations = getCasNumberVariations;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.9",
3
+ "version": "1.2.12",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",