@toxplanet/pegasus-sdk 1.2.9 → 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/chemicals.js CHANGED
@@ -1,5 +1,6 @@
1
1
  const { logError, logInfo } = require('@toxplanet/tphelper/logging');
2
2
  const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
3
+ const { getCasNumberVariations } = require('./search');
3
4
 
4
5
  const SEARCH_BOOST_EXACT_PRIMARY = 100;
5
6
  const SEARCH_BOOST_PREFIX_PRIMARY = 50;
@@ -23,6 +24,7 @@ function escapeLikePattern(value) {
23
24
  }
24
25
 
25
26
  function parsePostgresArray(str) {
27
+ if (Array.isArray(str)) return str.map((v) => String(v));
26
28
  if (!str || str === '{}') return [];
27
29
  const trimmed = str.slice(1, -1);
28
30
  if (!trimmed) return [];
@@ -214,7 +216,7 @@ class ChemicalsService {
214
216
  group.identifier_value.push(String(v));
215
217
  }
216
218
  }
217
- const identifiers = this._expandIdentifierAliases(Array.from(identifierGroups.values()));
219
+ const identifiers = Array.from(identifierGroups.values());
218
220
 
219
221
  const metaList = Array.isArray(chemical.chemicalMeta) ? chemical.chemicalMeta : [];
220
222
  const meta = metaList.map((item) => {
@@ -223,22 +225,22 @@ class ChemicalsService {
223
225
  const valueArr = Array.isArray(rawValue)
224
226
  ? rawValue.map((v) => String(v))
225
227
  : (rawValue != null ? [String(rawValue)] : []);
226
- const out = {
228
+ const unit = item && (item.unit || item.meta_value_unit);
229
+ if (unit && valueArr.length > 0) {
230
+ valueArr[valueArr.length - 1] = `${valueArr[valueArr.length - 1]} ${unit}`;
231
+ }
232
+ return {
227
233
  meta_key: String(key).toLowerCase(),
228
234
  meta_value_text: valueArr
229
235
  };
230
- const unit = item && (item.unit || item.meta_value_unit);
231
- if (unit) out.meta_value_unit = unit;
232
- return out;
233
236
  });
234
237
 
235
238
  const synonyms = Array.isArray(chemical.chemicalSynonyms) ? chemical.chemicalSynonyms : [];
236
- const names = [chemical.chemicalName, ...synonyms].filter(Boolean);
237
239
 
238
240
  return {
239
241
  chemical_set_identifier: chemical.sourceId || '',
240
242
  chemical_primary_name: chemical.chemicalName || '',
241
- chemical_names: names,
243
+ chemical_names: synonyms,
242
244
  chemical_synonyms: synonyms,
243
245
  chemical_categories: chemical.chemicalCategories || [],
244
246
  chemical_identifiers: identifiers,
@@ -922,22 +924,32 @@ class ChemicalsService {
922
924
  }
923
925
 
924
926
  try {
927
+ const variations = getCasNumberVariations(searchTerm);
928
+ const shouldClauses = [];
929
+ for (const v of variations) {
930
+ shouldClauses.push(
931
+ { term: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
932
+ { prefix: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
933
+ { term: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
934
+ { prefix: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
935
+ { term: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
936
+ { prefix: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
937
+ { term: { 'identifier_values': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
938
+ { prefix: { 'identifier_values': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } },
939
+ { match: { 'chemical_name': { query: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
940
+ { match: { 'synonyms': { query: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
941
+ { match_phrase_prefix: { 'chemical_name': { query: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
942
+ { match_phrase_prefix: { 'synonyms': { query: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
943
+ );
944
+ }
945
+
925
946
  const result = await this.connection.invokeOpenSearch({
926
947
  operation: 'search',
927
948
  body: {
928
949
  size: limit,
929
950
  query: {
930
951
  bool: {
931
- should: [
932
- { term: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
933
- { prefix: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
934
- { term: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
935
- { prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
936
- { match: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
937
- { match: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
938
- { match_phrase_prefix: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
939
- { match_phrase_prefix: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
940
- ],
952
+ should: shouldClauses,
941
953
  minimum_should_match: 1
942
954
  }
943
955
  },
@@ -977,22 +989,32 @@ class ChemicalsService {
977
989
  }
978
990
 
979
991
  try {
992
+ const variations = getCasNumberVariations(synonymTerm);
993
+ const shouldClauses = [];
994
+ for (const v of variations) {
995
+ shouldClauses.push(
996
+ { term: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
997
+ { prefix: { 'synonyms.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
998
+ { term: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
999
+ { prefix: { 'chemical_name.keyword': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
1000
+ { term: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
1001
+ { prefix: { 'cas_numbers': { value: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
1002
+ { term: { 'identifier_values': { value: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
1003
+ { prefix: { 'identifier_values': { value: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } },
1004
+ { match: { 'synonyms': { query: v, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
1005
+ { match: { 'chemical_name': { query: v, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
1006
+ { match_phrase_prefix: { 'synonyms': { query: v, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
1007
+ { match_phrase_prefix: { 'chemical_name': { query: v, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
1008
+ );
1009
+ }
1010
+
980
1011
  const result = await this.connection.invokeOpenSearch({
981
1012
  operation: 'search',
982
1013
  body: {
983
1014
  size: limit,
984
1015
  query: {
985
1016
  bool: {
986
- should: [
987
- { term: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
988
- { prefix: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
989
- { term: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
990
- { prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
991
- { match: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
992
- { match: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
993
- { match_phrase_prefix: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
994
- { match_phrase_prefix: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
995
- ],
1017
+ should: shouldClauses,
996
1018
  minimum_should_match: 1
997
1019
  }
998
1020
  },
package/lib/db/index.js CHANGED
@@ -4,6 +4,16 @@ function getFieldValue(field) {
4
4
  if ('longValue' in field) return field.longValue;
5
5
  if ('doubleValue' in field) return field.doubleValue;
6
6
  if ('booleanValue' in field) return field.booleanValue;
7
+ if ('blobValue' in field) return field.blobValue;
8
+ if ('arrayValue' in field && field.arrayValue) {
9
+ const av = field.arrayValue;
10
+ if (Array.isArray(av.stringValues)) return av.stringValues;
11
+ if (Array.isArray(av.longValues)) return av.longValues;
12
+ if (Array.isArray(av.doubleValues)) return av.doubleValues;
13
+ if (Array.isArray(av.booleanValues)) return av.booleanValues;
14
+ if (Array.isArray(av.arrayValues)) return av.arrayValues;
15
+ return [];
16
+ }
7
17
  return null;
8
18
  }
9
19
 
package/lib/search.js CHANGED
@@ -383,3 +383,4 @@ class SearchService {
383
383
  }
384
384
 
385
385
  module.exports = SearchService;
386
+ module.exports.getCasNumberVariations = getCasNumberVariations;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.9",
3
+ "version": "1.2.11",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",