@toxplanet/pegasus-sdk 1.2.6 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/chemicals.js +127 -17
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -6,7 +6,7 @@ const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
|
6
6
|
const SEARCH_BOOST_EXACT_SECONDARY = 30;
|
|
7
7
|
const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
8
8
|
|
|
9
|
-
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
|
|
9
|
+
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
|
|
10
10
|
|
|
11
11
|
function escapeLikePattern(value) {
|
|
12
12
|
return value.replace(/[%_\\]/g, '\\$&');
|
|
@@ -61,18 +61,29 @@ function transformChemicalMeta(meta) {
|
|
|
61
61
|
|
|
62
62
|
function transformChemicalIdentifiers(identifiers) {
|
|
63
63
|
if (!identifiers || typeof identifiers !== 'object') return [];
|
|
64
|
+
|
|
64
65
|
if (Array.isArray(identifiers)) {
|
|
65
|
-
|
|
66
|
-
if (identifiers
|
|
67
|
-
|
|
68
|
-
}
|
|
69
|
-
// Transform from old format { identifier_key, identifier_value, ... } to new format { type, value }
|
|
66
|
+
if (identifiers.length === 0) return [];
|
|
67
|
+
if (identifiers[0].type !== undefined) return identifiers;
|
|
68
|
+
// Legacy { identifier_key, identifier_value } row form
|
|
70
69
|
return identifiers.map(item => ({
|
|
71
70
|
type: item.identifier_key || item.type,
|
|
72
71
|
value: Array.isArray(item.identifier_value) ? item.identifier_value[0] : (item.value || item.identifier_value)
|
|
73
72
|
}));
|
|
74
73
|
}
|
|
75
|
-
|
|
74
|
+
|
|
75
|
+
// Legacy ES-doc form: { CAS: ["71-43-2"], CID: ["241"], InChIKey: "ABC..." }
|
|
76
|
+
// Flatten one entry per value, preserving multi-valued types.
|
|
77
|
+
const result = [];
|
|
78
|
+
for (const [type, value] of Object.entries(identifiers)) {
|
|
79
|
+
if (value == null) continue;
|
|
80
|
+
const values = Array.isArray(value) ? value : [value];
|
|
81
|
+
for (const v of values) {
|
|
82
|
+
if (v == null || v === '') continue;
|
|
83
|
+
result.push({ type, value: String(v) });
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return result;
|
|
76
87
|
}
|
|
77
88
|
|
|
78
89
|
class ChemicalsService {
|
|
@@ -94,6 +105,60 @@ class ChemicalsService {
|
|
|
94
105
|
return d instanceof Date ? d.toISOString() : (d || new Date().toISOString());
|
|
95
106
|
}
|
|
96
107
|
|
|
108
|
+
_toLegacyChemicalSource({ name = '', cas = [], identifiers = [], id = '', formula = null } = {}) {
|
|
109
|
+
return {
|
|
110
|
+
chemical_name: name,
|
|
111
|
+
chemical_name_sensitive: name,
|
|
112
|
+
chemical_name_sort: (name || '').toLowerCase(),
|
|
113
|
+
chemical_identifier: [...cas, ...identifiers],
|
|
114
|
+
chemical_set_identifier: cas[0] || id || '',
|
|
115
|
+
chemical_formula: formula
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
_chemicalRowToLegacySource(chemical) {
|
|
120
|
+
if (!chemical) return null;
|
|
121
|
+
const identifierObj = chemical.chemicalIdentifiers;
|
|
122
|
+
const cas = [];
|
|
123
|
+
const other = [];
|
|
124
|
+
let formula = null;
|
|
125
|
+
|
|
126
|
+
const visit = (type, value) => {
|
|
127
|
+
if (!type) return;
|
|
128
|
+
const values = Array.isArray(value) ? value : (value != null ? [value] : []);
|
|
129
|
+
const stringValues = values.map((v) => String(v)).filter((v) => v.length > 0);
|
|
130
|
+
if (type === 'CAS') {
|
|
131
|
+
cas.push(...stringValues);
|
|
132
|
+
} else if (type === 'formula' || type === 'Formula' || type === 'chemical_formula') {
|
|
133
|
+
if (formula == null && stringValues.length > 0) formula = stringValues[0];
|
|
134
|
+
} else {
|
|
135
|
+
other.push(...stringValues);
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
if (Array.isArray(identifierObj)) {
|
|
140
|
+
for (const item of identifierObj) {
|
|
141
|
+
if (item && typeof item === 'object') {
|
|
142
|
+
const type = item.type || item.identifier_key;
|
|
143
|
+
const value = item.value !== undefined ? item.value : item.identifier_value;
|
|
144
|
+
visit(type, value);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
} else if (identifierObj && typeof identifierObj === 'object') {
|
|
148
|
+
for (const [type, value] of Object.entries(identifierObj)) {
|
|
149
|
+
visit(type, value);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return this._toLegacyChemicalSource({
|
|
154
|
+
name: chemical.chemicalName || '',
|
|
155
|
+
cas,
|
|
156
|
+
identifiers: other,
|
|
157
|
+
id: chemical.sourceId,
|
|
158
|
+
formula
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
|
|
97
162
|
_mapChemicalRow(row) {
|
|
98
163
|
if (!row) return null;
|
|
99
164
|
return {
|
|
@@ -600,7 +665,7 @@ class ChemicalsService {
|
|
|
600
665
|
}
|
|
601
666
|
|
|
602
667
|
await this.connection.ensureConnected();
|
|
603
|
-
|
|
668
|
+
|
|
604
669
|
const sql = `SELECT * FROM chemicals WHERE chemical_identifiers->>'${identifierType}' = :value OR chemical_identifiers->'${identifierType}' ? :value`;
|
|
605
670
|
const params = [{ name: 'value', value: { stringValue: identifierValue } }];
|
|
606
671
|
const result = await this.connection.query(sql, params);
|
|
@@ -611,6 +676,36 @@ class ChemicalsService {
|
|
|
611
676
|
}
|
|
612
677
|
}
|
|
613
678
|
|
|
679
|
+
async findChemicalByIdentifier(identifierValue) {
|
|
680
|
+
if (!identifierValue) return null;
|
|
681
|
+
const result = await this.connection.invokeOpenSearch({
|
|
682
|
+
operation: 'search',
|
|
683
|
+
body: {
|
|
684
|
+
size: 1,
|
|
685
|
+
query: {
|
|
686
|
+
bool: {
|
|
687
|
+
should: [
|
|
688
|
+
{ term: { 'cas_numbers.keyword': identifierValue } },
|
|
689
|
+
{ term: { 'identifier_values.keyword': identifierValue } }
|
|
690
|
+
],
|
|
691
|
+
minimum_should_match: 1
|
|
692
|
+
}
|
|
693
|
+
},
|
|
694
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
695
|
+
}
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
const hit = result?.hits?.hits?.[0]?._source;
|
|
699
|
+
if (!hit) return null;
|
|
700
|
+
return {
|
|
701
|
+
id: hit.postgres_id,
|
|
702
|
+
name: hit.chemical_name,
|
|
703
|
+
cas: hit.cas_numbers || [],
|
|
704
|
+
identifiers: hit.identifier_values || [],
|
|
705
|
+
synonyms: hit.synonyms || []
|
|
706
|
+
};
|
|
707
|
+
}
|
|
708
|
+
|
|
614
709
|
async countByCollection(collectionName) {
|
|
615
710
|
try {
|
|
616
711
|
await this.connection.ensureConnected();
|
|
@@ -1006,9 +1101,25 @@ class ChemicalsService {
|
|
|
1006
1101
|
},
|
|
1007
1102
|
|
|
1008
1103
|
get: async (params) => {
|
|
1009
|
-
const
|
|
1104
|
+
const id = params.id;
|
|
1105
|
+
let source = null;
|
|
1010
1106
|
|
|
1011
|
-
|
|
1107
|
+
const chemical = await this.getChemicalBySourceId(id);
|
|
1108
|
+
if (chemical) {
|
|
1109
|
+
source = this._chemicalRowToLegacySource(chemical);
|
|
1110
|
+
} else {
|
|
1111
|
+
const hit = await this.findChemicalByIdentifier(id);
|
|
1112
|
+
if (hit) {
|
|
1113
|
+
source = this._toLegacyChemicalSource({
|
|
1114
|
+
name: hit.name,
|
|
1115
|
+
cas: hit.cas,
|
|
1116
|
+
identifiers: hit.identifiers,
|
|
1117
|
+
id: hit.id
|
|
1118
|
+
});
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
if (!source) {
|
|
1012
1123
|
return {
|
|
1013
1124
|
body: {
|
|
1014
1125
|
_index: params.index,
|
|
@@ -1025,7 +1136,7 @@ class ChemicalsService {
|
|
|
1025
1136
|
_id: params.id,
|
|
1026
1137
|
_version: 1,
|
|
1027
1138
|
found: true,
|
|
1028
|
-
_source:
|
|
1139
|
+
_source: source
|
|
1029
1140
|
},
|
|
1030
1141
|
statusCode: 200
|
|
1031
1142
|
};
|
|
@@ -1088,12 +1199,11 @@ class ChemicalsService {
|
|
|
1088
1199
|
let searchTerm = '';
|
|
1089
1200
|
let limit = params.body?.size || 10;
|
|
1090
1201
|
|
|
1091
|
-
const toLegacySource = (r) => ({
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
chemical_set_identifier: (r.cas && r.cas[0]) || r.id || ''
|
|
1202
|
+
const toLegacySource = (r) => this._toLegacyChemicalSource({
|
|
1203
|
+
name: r.name,
|
|
1204
|
+
cas: r.cas || [],
|
|
1205
|
+
identifiers: r.identifiers || [],
|
|
1206
|
+
id: r.id
|
|
1097
1207
|
});
|
|
1098
1208
|
|
|
1099
1209
|
if (params.index === 'synonym_lookup_index') {
|
package/package.json
CHANGED