@toxplanet/pegasus-sdk 1.2.6 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/chemicals.js +127 -17
  2. package/package.json +1 -1
package/lib/chemicals.js CHANGED
@@ -6,7 +6,7 @@ const SEARCH_BOOST_PREFIX_PRIMARY = 50;
6
6
  const SEARCH_BOOST_EXACT_SECONDARY = 30;
7
7
  const SEARCH_BOOST_PREFIX_SECONDARY = 10;
8
8
 
9
- const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
9
+ const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
10
10
 
11
11
  function escapeLikePattern(value) {
12
12
  return value.replace(/[%_\\]/g, '\\$&');
@@ -61,18 +61,29 @@ function transformChemicalMeta(meta) {
61
61
 
62
62
  function transformChemicalIdentifiers(identifiers) {
63
63
  if (!identifiers || typeof identifiers !== 'object') return [];
64
+
64
65
  if (Array.isArray(identifiers)) {
65
- // If it's already in new format, return as-is
66
- if (identifiers.length > 0 && identifiers[0].type !== undefined) {
67
- return identifiers;
68
- }
69
- // Transform from old format { identifier_key, identifier_value, ... } to new format { type, value }
66
+ if (identifiers.length === 0) return [];
67
+ if (identifiers[0].type !== undefined) return identifiers;
68
+ // Legacy { identifier_key, identifier_value } row form
70
69
  return identifiers.map(item => ({
71
70
  type: item.identifier_key || item.type,
72
71
  value: Array.isArray(item.identifier_value) ? item.identifier_value[0] : (item.value || item.identifier_value)
73
72
  }));
74
73
  }
75
- return [];
74
+
75
+ // Legacy ES-doc form: { CAS: ["71-43-2"], CID: ["241"], InChIKey: "ABC..." }
76
+ // Flatten one entry per value, preserving multi-valued types.
77
+ const result = [];
78
+ for (const [type, value] of Object.entries(identifiers)) {
79
+ if (value == null) continue;
80
+ const values = Array.isArray(value) ? value : [value];
81
+ for (const v of values) {
82
+ if (v == null || v === '') continue;
83
+ result.push({ type, value: String(v) });
84
+ }
85
+ }
86
+ return result;
76
87
  }
77
88
 
78
89
  class ChemicalsService {
@@ -94,6 +105,60 @@ class ChemicalsService {
94
105
  return d instanceof Date ? d.toISOString() : (d || new Date().toISOString());
95
106
  }
96
107
 
108
+ _toLegacyChemicalSource({ name = '', cas = [], identifiers = [], id = '', formula = null } = {}) {
109
+ return {
110
+ chemical_name: name,
111
+ chemical_name_sensitive: name,
112
+ chemical_name_sort: (name || '').toLowerCase(),
113
+ chemical_identifier: [...cas, ...identifiers],
114
+ chemical_set_identifier: cas[0] || id || '',
115
+ chemical_formula: formula
116
+ };
117
+ }
118
+
119
+ _chemicalRowToLegacySource(chemical) {
120
+ if (!chemical) return null;
121
+ const identifierObj = chemical.chemicalIdentifiers;
122
+ const cas = [];
123
+ const other = [];
124
+ let formula = null;
125
+
126
+ const visit = (type, value) => {
127
+ if (!type) return;
128
+ const values = Array.isArray(value) ? value : (value != null ? [value] : []);
129
+ const stringValues = values.map((v) => String(v)).filter((v) => v.length > 0);
130
+ if (type === 'CAS') {
131
+ cas.push(...stringValues);
132
+ } else if (type === 'formula' || type === 'Formula' || type === 'chemical_formula') {
133
+ if (formula == null && stringValues.length > 0) formula = stringValues[0];
134
+ } else {
135
+ other.push(...stringValues);
136
+ }
137
+ };
138
+
139
+ if (Array.isArray(identifierObj)) {
140
+ for (const item of identifierObj) {
141
+ if (item && typeof item === 'object') {
142
+ const type = item.type || item.identifier_key;
143
+ const value = item.value !== undefined ? item.value : item.identifier_value;
144
+ visit(type, value);
145
+ }
146
+ }
147
+ } else if (identifierObj && typeof identifierObj === 'object') {
148
+ for (const [type, value] of Object.entries(identifierObj)) {
149
+ visit(type, value);
150
+ }
151
+ }
152
+
153
+ return this._toLegacyChemicalSource({
154
+ name: chemical.chemicalName || '',
155
+ cas,
156
+ identifiers: other,
157
+ id: chemical.sourceId,
158
+ formula
159
+ });
160
+ }
161
+
97
162
  _mapChemicalRow(row) {
98
163
  if (!row) return null;
99
164
  return {
@@ -600,7 +665,7 @@ class ChemicalsService {
600
665
  }
601
666
 
602
667
  await this.connection.ensureConnected();
603
-
668
+
604
669
  const sql = `SELECT * FROM chemicals WHERE chemical_identifiers->>'${identifierType}' = :value OR chemical_identifiers->'${identifierType}' ? :value`;
605
670
  const params = [{ name: 'value', value: { stringValue: identifierValue } }];
606
671
  const result = await this.connection.query(sql, params);
@@ -611,6 +676,36 @@ class ChemicalsService {
611
676
  }
612
677
  }
613
678
 
679
+ async findChemicalByIdentifier(identifierValue) {
680
+ if (!identifierValue) return null;
681
+ const result = await this.connection.invokeOpenSearch({
682
+ operation: 'search',
683
+ body: {
684
+ size: 1,
685
+ query: {
686
+ bool: {
687
+ should: [
688
+ { term: { 'cas_numbers.keyword': identifierValue } },
689
+ { term: { 'identifier_values.keyword': identifierValue } }
690
+ ],
691
+ minimum_should_match: 1
692
+ }
693
+ },
694
+ _source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
695
+ }
696
+ });
697
+
698
+ const hit = result?.hits?.hits?.[0]?._source;
699
+ if (!hit) return null;
700
+ return {
701
+ id: hit.postgres_id,
702
+ name: hit.chemical_name,
703
+ cas: hit.cas_numbers || [],
704
+ identifiers: hit.identifier_values || [],
705
+ synonyms: hit.synonyms || []
706
+ };
707
+ }
708
+
614
709
  async countByCollection(collectionName) {
615
710
  try {
616
711
  await this.connection.ensureConnected();
@@ -1006,9 +1101,25 @@ class ChemicalsService {
1006
1101
  },
1007
1102
 
1008
1103
  get: async (params) => {
1009
- const result = await this.getChemicalBySourceId(params.id);
1104
+ const id = params.id;
1105
+ let source = null;
1010
1106
 
1011
- if (!result) {
1107
+ const chemical = await this.getChemicalBySourceId(id);
1108
+ if (chemical) {
1109
+ source = this._chemicalRowToLegacySource(chemical);
1110
+ } else {
1111
+ const hit = await this.findChemicalByIdentifier(id);
1112
+ if (hit) {
1113
+ source = this._toLegacyChemicalSource({
1114
+ name: hit.name,
1115
+ cas: hit.cas,
1116
+ identifiers: hit.identifiers,
1117
+ id: hit.id
1118
+ });
1119
+ }
1120
+ }
1121
+
1122
+ if (!source) {
1012
1123
  return {
1013
1124
  body: {
1014
1125
  _index: params.index,
@@ -1025,7 +1136,7 @@ class ChemicalsService {
1025
1136
  _id: params.id,
1026
1137
  _version: 1,
1027
1138
  found: true,
1028
- _source: result
1139
+ _source: source
1029
1140
  },
1030
1141
  statusCode: 200
1031
1142
  };
@@ -1088,12 +1199,11 @@ class ChemicalsService {
1088
1199
  let searchTerm = '';
1089
1200
  let limit = params.body?.size || 10;
1090
1201
 
1091
- const toLegacySource = (r) => ({
1092
- chemical_name: r.name,
1093
- chemical_name_sensitive: r.name,
1094
- chemical_name_sort: (r.name || '').toLowerCase(),
1095
- chemical_identifier: [...(r.cas || []), ...(r.identifiers || [])],
1096
- chemical_set_identifier: (r.cas && r.cas[0]) || r.id || ''
1202
+ const toLegacySource = (r) => this._toLegacyChemicalSource({
1203
+ name: r.name,
1204
+ cas: r.cas || [],
1205
+ identifiers: r.identifiers || [],
1206
+ id: r.id
1097
1207
  });
1098
1208
 
1099
1209
  if (params.index === 'synonym_lookup_index') {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.6",
3
+ "version": "1.2.8",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",