@toxplanet/pegasus-sdk 1.2.5 → 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/chemicals.js CHANGED
@@ -6,7 +6,11 @@ const SEARCH_BOOST_PREFIX_PRIMARY = 50;
6
6
  const SEARCH_BOOST_EXACT_SECONDARY = 30;
7
7
  const SEARCH_BOOST_PREFIX_SECONDARY = 10;
8
8
 
9
- const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
9
+ const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
10
+
11
+ const INCHIKEY_PATTERN = /^[A-Z0-9]{14}-[A-Z0-9]{8,10}-[A-Z0-9]$/;
12
+ const EC_PATTERN = /^\d{3}-\d{3}-\d$/;
13
+ const CID_PATTERN = /^CID/i;
10
14
 
11
15
  function escapeLikePattern(value) {
12
16
  return value.replace(/[%_\\]/g, '\\$&');
@@ -94,6 +98,60 @@ class ChemicalsService {
94
98
  return d instanceof Date ? d.toISOString() : (d || new Date().toISOString());
95
99
  }
96
100
 
101
+ _toLegacyChemicalSource({ name = '', cas = [], identifiers = [], id = '', formula = null } = {}) {
102
+ return {
103
+ chemical_name: name,
104
+ chemical_name_sensitive: name,
105
+ chemical_name_sort: (name || '').toLowerCase(),
106
+ chemical_identifier: [...cas, ...identifiers],
107
+ chemical_set_identifier: cas[0] || id || '',
108
+ chemical_formula: formula
109
+ };
110
+ }
111
+
112
+ _chemicalRowToLegacySource(chemical) {
113
+ if (!chemical) return null;
114
+ const identifierObj = chemical.chemicalIdentifiers;
115
+ const cas = [];
116
+ const other = [];
117
+ let formula = null;
118
+
119
+ const visit = (type, value) => {
120
+ if (!type) return;
121
+ const values = Array.isArray(value) ? value : (value != null ? [value] : []);
122
+ const stringValues = values.map((v) => String(v)).filter((v) => v.length > 0);
123
+ if (type === 'CAS') {
124
+ cas.push(...stringValues);
125
+ } else if (type === 'formula' || type === 'Formula' || type === 'chemical_formula') {
126
+ if (formula == null && stringValues.length > 0) formula = stringValues[0];
127
+ } else {
128
+ other.push(...stringValues);
129
+ }
130
+ };
131
+
132
+ if (Array.isArray(identifierObj)) {
133
+ for (const item of identifierObj) {
134
+ if (item && typeof item === 'object') {
135
+ const type = item.type || item.identifier_key;
136
+ const value = item.value !== undefined ? item.value : item.identifier_value;
137
+ visit(type, value);
138
+ }
139
+ }
140
+ } else if (identifierObj && typeof identifierObj === 'object') {
141
+ for (const [type, value] of Object.entries(identifierObj)) {
142
+ visit(type, value);
143
+ }
144
+ }
145
+
146
+ return this._toLegacyChemicalSource({
147
+ name: chemical.chemicalName || '',
148
+ cas,
149
+ identifiers: other,
150
+ id: chemical.sourceId,
151
+ formula
152
+ });
153
+ }
154
+
97
155
  _mapChemicalRow(row) {
98
156
  if (!row) return null;
99
157
  return {
@@ -734,7 +792,7 @@ class ChemicalsService {
734
792
  */
735
793
  async searchByName(searchTerm, limit = 10) {
736
794
  if (!searchTerm) {
737
- return { results: [] };
795
+ return { results: [], total: { value: 0, relation: 'eq' } };
738
796
  }
739
797
 
740
798
  try {
@@ -771,7 +829,10 @@ class ChemicalsService {
771
829
  score: hit._score
772
830
  }));
773
831
 
774
- return { results };
832
+ return {
833
+ results,
834
+ total: result?.hits?.total ?? { value: results.length, relation: 'eq' }
835
+ };
775
836
  } catch (error) {
776
837
  logError('pegasus-sdk', 'ChemicalsService', 'searchByName', error);
777
838
  throw error;
@@ -786,7 +847,7 @@ class ChemicalsService {
786
847
  */
787
848
  async searchBySynonym(synonymTerm, limit = 10) {
788
849
  if (!synonymTerm) {
789
- return { results: [] };
850
+ return { results: [], total: { value: 0, relation: 'eq' } };
790
851
  }
791
852
 
792
853
  try {
@@ -823,7 +884,10 @@ class ChemicalsService {
823
884
  score: hit._score
824
885
  }));
825
886
 
826
- return { results };
887
+ return {
888
+ results,
889
+ total: result?.hits?.total ?? { value: results.length, relation: 'eq' }
890
+ };
827
891
  } catch (error) {
828
892
  logError('pegasus-sdk', 'ChemicalsService', 'searchBySynonym', error);
829
893
  throw error;
@@ -1000,9 +1064,30 @@ class ChemicalsService {
1000
1064
  },
1001
1065
 
1002
1066
  get: async (params) => {
1003
- const result = await this.getChemicalBySourceId(params.id);
1067
+ const id = params.id;
1068
+ let chemical = await this.getChemicalBySourceId(id);
1069
+
1070
+ if (!chemical) {
1071
+ const casMatches = await this.getChemicalsByCAS(id);
1072
+ if (casMatches.length > 0) chemical = casMatches[0];
1073
+ }
1004
1074
 
1005
- if (!result) {
1075
+ if (!chemical) {
1076
+ let identifierType = null;
1077
+ if (CID_PATTERN.test(id)) {
1078
+ identifierType = 'CID';
1079
+ } else if (INCHIKEY_PATTERN.test(id)) {
1080
+ identifierType = 'InChIKey';
1081
+ } else if (EC_PATTERN.test(id)) {
1082
+ identifierType = 'EC';
1083
+ }
1084
+ if (identifierType) {
1085
+ const matches = await this.getChemicalsByIdentifier(identifierType, id);
1086
+ if (matches.length > 0) chemical = matches[0];
1087
+ }
1088
+ }
1089
+
1090
+ if (!chemical) {
1006
1091
  return {
1007
1092
  body: {
1008
1093
  _index: params.index,
@@ -1019,7 +1104,7 @@ class ChemicalsService {
1019
1104
  _id: params.id,
1020
1105
  _version: 1,
1021
1106
  found: true,
1022
- _source: result
1107
+ _source: this._chemicalRowToLegacySource(chemical)
1023
1108
  },
1024
1109
  statusCode: 200
1025
1110
  };
@@ -1082,6 +1167,13 @@ class ChemicalsService {
1082
1167
  let searchTerm = '';
1083
1168
  let limit = params.body?.size || 10;
1084
1169
 
1170
+ const toLegacySource = (r) => this._toLegacyChemicalSource({
1171
+ name: r.name,
1172
+ cas: r.cas || [],
1173
+ identifiers: r.identifiers || [],
1174
+ id: r.id
1175
+ });
1176
+
1085
1177
  if (params.index === 'synonym_lookup_index') {
1086
1178
  const query = params.body?.query;
1087
1179
  searchTerm = query?.match?.chemical_name ||
@@ -1100,22 +1192,13 @@ class ChemicalsService {
1100
1192
  failed: 0
1101
1193
  },
1102
1194
  hits: {
1103
- total: {
1104
- value: searchResults.results.length,
1105
- relation: 'eq'
1106
- },
1195
+ total: searchResults.total ?? { value: searchResults.results.length, relation: 'eq' },
1107
1196
  max_score: searchResults.results[0]?.score || 0,
1108
1197
  hits: searchResults.results.map(result => ({
1109
1198
  _index: params.index,
1110
1199
  _id: result.id,
1111
1200
  _score: result.score,
1112
- _source: {
1113
- postgres_id: result.id,
1114
- chemical_name: result.name,
1115
- cas_numbers: result.cas,
1116
- identifier_values: result.identifiers,
1117
- synonyms: result.synonyms
1118
- }
1201
+ _source: toLegacySource(result)
1119
1202
  }))
1120
1203
  }
1121
1204
  },
@@ -1139,22 +1222,13 @@ class ChemicalsService {
1139
1222
  failed: 0
1140
1223
  },
1141
1224
  hits: {
1142
- total: {
1143
- value: searchResults.results.length,
1144
- relation: 'eq'
1145
- },
1225
+ total: searchResults.total ?? { value: searchResults.results.length, relation: 'eq' },
1146
1226
  max_score: searchResults.results[0]?.score || 0,
1147
1227
  hits: searchResults.results.map(result => ({
1148
1228
  _index: params.index,
1149
1229
  _id: result.id,
1150
1230
  _score: result.score,
1151
- _source: {
1152
- postgres_id: result.id,
1153
- chemical_name: result.name,
1154
- cas_numbers: result.cas,
1155
- identifier_values: result.identifiers,
1156
- synonyms: result.synonyms
1157
- }
1231
+ _source: toLegacySource(result)
1158
1232
  }))
1159
1233
  }
1160
1234
  },
package/lib/search.js CHANGED
@@ -94,7 +94,7 @@ class SearchService {
94
94
  */
95
95
  async searchChemicals(query, options = {}) {
96
96
  if (!query) {
97
- return { results: [] };
97
+ return { results: [], total: { value: 0, relation: 'eq' } };
98
98
  }
99
99
 
100
100
  // Extract options with defaults
@@ -165,7 +165,10 @@ class SearchService {
165
165
  score: hit._score
166
166
  }));
167
167
 
168
- return { results };
168
+ return {
169
+ results,
170
+ total: result?.hits?.total ?? { value: results.length, relation: 'eq' }
171
+ };
169
172
  } catch (error) {
170
173
  logError('pegasus-sdk', 'SearchService', 'searchChemicals', error);
171
174
  throw error;
@@ -356,7 +359,7 @@ class SearchService {
356
359
  chemical_set_identifier: (r.cas && r.cas[0]) || r.id || '',
357
360
  }
358
361
  })),
359
- total: { value: pegasusResults.results.length, relation: 'eq' }
362
+ total: pegasusResults.total ?? { value: pegasusResults.results.length, relation: 'eq' }
360
363
  },
361
364
  timed_out: false,
362
365
  _shards: { total: 1, successful: 1, failed: 0 }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.5",
3
+ "version": "1.2.7",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",