@toxplanet/pegasus-sdk 1.2.7 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/chemicals.js +63 -31
  2. package/package.json +1 -1
package/lib/chemicals.js CHANGED
@@ -8,10 +8,6 @@ const SEARCH_BOOST_PREFIX_SECONDARY = 10;
8
8
 
9
9
  const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
10
10
 
11
- const INCHIKEY_PATTERN = /^[A-Z0-9]{14}-[A-Z0-9]{8,10}-[A-Z0-9]$/;
12
- const EC_PATTERN = /^\d{3}-\d{3}-\d$/;
13
- const CID_PATTERN = /^CID/i;
14
-
15
11
  function escapeLikePattern(value) {
16
12
  return value.replace(/[%_\\]/g, '\\$&');
17
13
  }
@@ -65,18 +61,29 @@ function transformChemicalMeta(meta) {
65
61
 
66
62
  function transformChemicalIdentifiers(identifiers) {
67
63
  if (!identifiers || typeof identifiers !== 'object') return [];
64
+
68
65
  if (Array.isArray(identifiers)) {
69
- // If it's already in new format, return as-is
70
- if (identifiers.length > 0 && identifiers[0].type !== undefined) {
71
- return identifiers;
72
- }
73
- // Transform from old format { identifier_key, identifier_value, ... } to new format { type, value }
66
+ if (identifiers.length === 0) return [];
67
+ if (identifiers[0].type !== undefined) return identifiers;
68
+ // Legacy { identifier_key, identifier_value } row form
74
69
  return identifiers.map(item => ({
75
70
  type: item.identifier_key || item.type,
76
71
  value: Array.isArray(item.identifier_value) ? item.identifier_value[0] : (item.value || item.identifier_value)
77
72
  }));
78
73
  }
79
- return [];
74
+
75
+ // Legacy ES-doc form: { CAS: ["71-43-2"], CID: ["241"], InChIKey: "ABC..." }
76
+ // Flatten one entry per value, preserving multi-valued types.
77
+ const result = [];
78
+ for (const [type, value] of Object.entries(identifiers)) {
79
+ if (value == null) continue;
80
+ const values = Array.isArray(value) ? value : [value];
81
+ for (const v of values) {
82
+ if (v == null || v === '') continue;
83
+ result.push({ type, value: String(v) });
84
+ }
85
+ }
86
+ return result;
80
87
  }
81
88
 
82
89
  class ChemicalsService {
@@ -658,7 +665,7 @@ class ChemicalsService {
658
665
  }
659
666
 
660
667
  await this.connection.ensureConnected();
661
-
668
+
662
669
  const sql = `SELECT * FROM chemicals WHERE chemical_identifiers->>'${identifierType}' = :value OR chemical_identifiers->'${identifierType}' ? :value`;
663
670
  const params = [{ name: 'value', value: { stringValue: identifierValue } }];
664
671
  const result = await this.connection.query(sql, params);
@@ -669,6 +676,36 @@ class ChemicalsService {
669
676
  }
670
677
  }
671
678
 
679
+ async findChemicalByIdentifier(identifierValue) {
680
+ if (!identifierValue) return null;
681
+ const result = await this.connection.invokeOpenSearch({
682
+ operation: 'search',
683
+ body: {
684
+ size: 1,
685
+ query: {
686
+ bool: {
687
+ should: [
688
+ { term: { 'cas_numbers.keyword': identifierValue } },
689
+ { term: { 'identifier_values.keyword': identifierValue } }
690
+ ],
691
+ minimum_should_match: 1
692
+ }
693
+ },
694
+ _source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
695
+ }
696
+ });
697
+
698
+ const hit = result?.hits?.hits?.[0]?._source;
699
+ if (!hit) return null;
700
+ return {
701
+ id: hit.postgres_id,
702
+ name: hit.chemical_name,
703
+ cas: hit.cas_numbers || [],
704
+ identifiers: hit.identifier_values || [],
705
+ synonyms: hit.synonyms || []
706
+ };
707
+ }
708
+
672
709
  async countByCollection(collectionName) {
673
710
  try {
674
711
  await this.connection.ensureConnected();
@@ -1065,29 +1102,24 @@ class ChemicalsService {
1065
1102
 
1066
1103
  get: async (params) => {
1067
1104
  const id = params.id;
1068
- let chemical = await this.getChemicalBySourceId(id);
1069
-
1070
- if (!chemical) {
1071
- const casMatches = await this.getChemicalsByCAS(id);
1072
- if (casMatches.length > 0) chemical = casMatches[0];
1073
- }
1105
+ let source = null;
1074
1106
 
1075
- if (!chemical) {
1076
- let identifierType = null;
1077
- if (CID_PATTERN.test(id)) {
1078
- identifierType = 'CID';
1079
- } else if (INCHIKEY_PATTERN.test(id)) {
1080
- identifierType = 'InChIKey';
1081
- } else if (EC_PATTERN.test(id)) {
1082
- identifierType = 'EC';
1083
- }
1084
- if (identifierType) {
1085
- const matches = await this.getChemicalsByIdentifier(identifierType, id);
1086
- if (matches.length > 0) chemical = matches[0];
1107
+ const chemical = await this.getChemicalBySourceId(id);
1108
+ if (chemical) {
1109
+ source = this._chemicalRowToLegacySource(chemical);
1110
+ } else {
1111
+ const hit = await this.findChemicalByIdentifier(id);
1112
+ if (hit) {
1113
+ source = this._toLegacyChemicalSource({
1114
+ name: hit.name,
1115
+ cas: hit.cas,
1116
+ identifiers: hit.identifiers,
1117
+ id: hit.id
1118
+ });
1087
1119
  }
1088
1120
  }
1089
1121
 
1090
- if (!chemical) {
1122
+ if (!source) {
1091
1123
  return {
1092
1124
  body: {
1093
1125
  _index: params.index,
@@ -1104,7 +1136,7 @@ class ChemicalsService {
1104
1136
  _id: params.id,
1105
1137
  _version: 1,
1106
1138
  found: true,
1107
- _source: this._chemicalRowToLegacySource(chemical)
1139
+ _source: source
1108
1140
  },
1109
1141
  statusCode: 200
1110
1142
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.7",
3
+ "version": "1.2.8",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",