@toxplanet/pegasus-sdk 1.2.6 → 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/chemicals.js +88 -10
  2. package/package.json +1 -1
package/lib/chemicals.js CHANGED
@@ -6,7 +6,11 @@ const SEARCH_BOOST_PREFIX_PRIMARY = 50;
6
6
  const SEARCH_BOOST_EXACT_SECONDARY = 30;
7
7
  const SEARCH_BOOST_PREFIX_SECONDARY = 10;
8
8
 
9
- const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
9
+ const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
10
+
11
+ const INCHIKEY_PATTERN = /^[A-Z0-9]{14}-[A-Z0-9]{8,10}-[A-Z0-9]$/;
12
+ const EC_PATTERN = /^\d{3}-\d{3}-\d$/;
13
+ const CID_PATTERN = /^CID/i;
10
14
 
11
15
  function escapeLikePattern(value) {
12
16
  return value.replace(/[%_\\]/g, '\\$&');
@@ -94,6 +98,60 @@ class ChemicalsService {
94
98
  return d instanceof Date ? d.toISOString() : (d || new Date().toISOString());
95
99
  }
96
100
 
101
+ _toLegacyChemicalSource({ name = '', cas = [], identifiers = [], id = '', formula = null } = {}) {
102
+ return {
103
+ chemical_name: name,
104
+ chemical_name_sensitive: name,
105
+ chemical_name_sort: (name || '').toLowerCase(),
106
+ chemical_identifier: [...cas, ...identifiers],
107
+ chemical_set_identifier: cas[0] || id || '',
108
+ chemical_formula: formula
109
+ };
110
+ }
111
+
112
+ _chemicalRowToLegacySource(chemical) {
113
+ if (!chemical) return null;
114
+ const identifierObj = chemical.chemicalIdentifiers;
115
+ const cas = [];
116
+ const other = [];
117
+ let formula = null;
118
+
119
+ const visit = (type, value) => {
120
+ if (!type) return;
121
+ const values = Array.isArray(value) ? value : (value != null ? [value] : []);
122
+ const stringValues = values.map((v) => String(v)).filter((v) => v.length > 0);
123
+ if (type === 'CAS') {
124
+ cas.push(...stringValues);
125
+ } else if (type === 'formula' || type === 'Formula' || type === 'chemical_formula') {
126
+ if (formula == null && stringValues.length > 0) formula = stringValues[0];
127
+ } else {
128
+ other.push(...stringValues);
129
+ }
130
+ };
131
+
132
+ if (Array.isArray(identifierObj)) {
133
+ for (const item of identifierObj) {
134
+ if (item && typeof item === 'object') {
135
+ const type = item.type || item.identifier_key;
136
+ const value = item.value !== undefined ? item.value : item.identifier_value;
137
+ visit(type, value);
138
+ }
139
+ }
140
+ } else if (identifierObj && typeof identifierObj === 'object') {
141
+ for (const [type, value] of Object.entries(identifierObj)) {
142
+ visit(type, value);
143
+ }
144
+ }
145
+
146
+ return this._toLegacyChemicalSource({
147
+ name: chemical.chemicalName || '',
148
+ cas,
149
+ identifiers: other,
150
+ id: chemical.sourceId,
151
+ formula
152
+ });
153
+ }
154
+
97
155
  _mapChemicalRow(row) {
98
156
  if (!row) return null;
99
157
  return {
@@ -1006,9 +1064,30 @@ class ChemicalsService {
1006
1064
  },
1007
1065
 
1008
1066
  get: async (params) => {
1009
- const result = await this.getChemicalBySourceId(params.id);
1067
+ const id = params.id;
1068
+ let chemical = await this.getChemicalBySourceId(id);
1069
+
1070
+ if (!chemical) {
1071
+ const casMatches = await this.getChemicalsByCAS(id);
1072
+ if (casMatches.length > 0) chemical = casMatches[0];
1073
+ }
1074
+
1075
+ if (!chemical) {
1076
+ let identifierType = null;
1077
+ if (CID_PATTERN.test(id)) {
1078
+ identifierType = 'CID';
1079
+ } else if (INCHIKEY_PATTERN.test(id)) {
1080
+ identifierType = 'InChIKey';
1081
+ } else if (EC_PATTERN.test(id)) {
1082
+ identifierType = 'EC';
1083
+ }
1084
+ if (identifierType) {
1085
+ const matches = await this.getChemicalsByIdentifier(identifierType, id);
1086
+ if (matches.length > 0) chemical = matches[0];
1087
+ }
1088
+ }
1010
1089
 
1011
- if (!result) {
1090
+ if (!chemical) {
1012
1091
  return {
1013
1092
  body: {
1014
1093
  _index: params.index,
@@ -1025,7 +1104,7 @@ class ChemicalsService {
1025
1104
  _id: params.id,
1026
1105
  _version: 1,
1027
1106
  found: true,
1028
- _source: result
1107
+ _source: this._chemicalRowToLegacySource(chemical)
1029
1108
  },
1030
1109
  statusCode: 200
1031
1110
  };
@@ -1088,12 +1167,11 @@ class ChemicalsService {
1088
1167
  let searchTerm = '';
1089
1168
  let limit = params.body?.size || 10;
1090
1169
 
1091
- const toLegacySource = (r) => ({
1092
- chemical_name: r.name,
1093
- chemical_name_sensitive: r.name,
1094
- chemical_name_sort: (r.name || '').toLowerCase(),
1095
- chemical_identifier: [...(r.cas || []), ...(r.identifiers || [])],
1096
- chemical_set_identifier: (r.cas && r.cas[0]) || r.id || ''
1170
+ const toLegacySource = (r) => this._toLegacyChemicalSource({
1171
+ name: r.name,
1172
+ cas: r.cas || [],
1173
+ identifiers: r.identifiers || [],
1174
+ id: r.id
1097
1175
  });
1098
1176
 
1099
1177
  if (params.index === 'synonym_lookup_index') {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.6",
3
+ "version": "1.2.7",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",