@toxplanet/pegasus-sdk 1.2.7 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/chemicals.js +63 -31
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -8,10 +8,6 @@ const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
|
8
8
|
|
|
9
9
|
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'CID', 'DTXSID', 'EINECS', 'EC']);
|
|
10
10
|
|
|
11
|
-
const INCHIKEY_PATTERN = /^[A-Z0-9]{14}-[A-Z0-9]{8,10}-[A-Z0-9]$/;
|
|
12
|
-
const EC_PATTERN = /^\d{3}-\d{3}-\d$/;
|
|
13
|
-
const CID_PATTERN = /^CID/i;
|
|
14
|
-
|
|
15
11
|
function escapeLikePattern(value) {
|
|
16
12
|
return value.replace(/[%_\\]/g, '\\$&');
|
|
17
13
|
}
|
|
@@ -65,18 +61,29 @@ function transformChemicalMeta(meta) {
|
|
|
65
61
|
|
|
66
62
|
function transformChemicalIdentifiers(identifiers) {
|
|
67
63
|
if (!identifiers || typeof identifiers !== 'object') return [];
|
|
64
|
+
|
|
68
65
|
if (Array.isArray(identifiers)) {
|
|
69
|
-
|
|
70
|
-
if (identifiers
|
|
71
|
-
|
|
72
|
-
}
|
|
73
|
-
// Transform from old format { identifier_key, identifier_value, ... } to new format { type, value }
|
|
66
|
+
if (identifiers.length === 0) return [];
|
|
67
|
+
if (identifiers[0].type !== undefined) return identifiers;
|
|
68
|
+
// Legacy { identifier_key, identifier_value } row form
|
|
74
69
|
return identifiers.map(item => ({
|
|
75
70
|
type: item.identifier_key || item.type,
|
|
76
71
|
value: Array.isArray(item.identifier_value) ? item.identifier_value[0] : (item.value || item.identifier_value)
|
|
77
72
|
}));
|
|
78
73
|
}
|
|
79
|
-
|
|
74
|
+
|
|
75
|
+
// Legacy ES-doc form: { CAS: ["71-43-2"], CID: ["241"], InChIKey: "ABC..." }
|
|
76
|
+
// Flatten one entry per value, preserving multi-valued types.
|
|
77
|
+
const result = [];
|
|
78
|
+
for (const [type, value] of Object.entries(identifiers)) {
|
|
79
|
+
if (value == null) continue;
|
|
80
|
+
const values = Array.isArray(value) ? value : [value];
|
|
81
|
+
for (const v of values) {
|
|
82
|
+
if (v == null || v === '') continue;
|
|
83
|
+
result.push({ type, value: String(v) });
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return result;
|
|
80
87
|
}
|
|
81
88
|
|
|
82
89
|
class ChemicalsService {
|
|
@@ -658,7 +665,7 @@ class ChemicalsService {
|
|
|
658
665
|
}
|
|
659
666
|
|
|
660
667
|
await this.connection.ensureConnected();
|
|
661
|
-
|
|
668
|
+
|
|
662
669
|
const sql = `SELECT * FROM chemicals WHERE chemical_identifiers->>'${identifierType}' = :value OR chemical_identifiers->'${identifierType}' ? :value`;
|
|
663
670
|
const params = [{ name: 'value', value: { stringValue: identifierValue } }];
|
|
664
671
|
const result = await this.connection.query(sql, params);
|
|
@@ -669,6 +676,36 @@ class ChemicalsService {
|
|
|
669
676
|
}
|
|
670
677
|
}
|
|
671
678
|
|
|
679
|
+
async findChemicalByIdentifier(identifierValue) {
|
|
680
|
+
if (!identifierValue) return null;
|
|
681
|
+
const result = await this.connection.invokeOpenSearch({
|
|
682
|
+
operation: 'search',
|
|
683
|
+
body: {
|
|
684
|
+
size: 1,
|
|
685
|
+
query: {
|
|
686
|
+
bool: {
|
|
687
|
+
should: [
|
|
688
|
+
{ term: { 'cas_numbers.keyword': identifierValue } },
|
|
689
|
+
{ term: { 'identifier_values.keyword': identifierValue } }
|
|
690
|
+
],
|
|
691
|
+
minimum_should_match: 1
|
|
692
|
+
}
|
|
693
|
+
},
|
|
694
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
695
|
+
}
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
const hit = result?.hits?.hits?.[0]?._source;
|
|
699
|
+
if (!hit) return null;
|
|
700
|
+
return {
|
|
701
|
+
id: hit.postgres_id,
|
|
702
|
+
name: hit.chemical_name,
|
|
703
|
+
cas: hit.cas_numbers || [],
|
|
704
|
+
identifiers: hit.identifier_values || [],
|
|
705
|
+
synonyms: hit.synonyms || []
|
|
706
|
+
};
|
|
707
|
+
}
|
|
708
|
+
|
|
672
709
|
async countByCollection(collectionName) {
|
|
673
710
|
try {
|
|
674
711
|
await this.connection.ensureConnected();
|
|
@@ -1065,29 +1102,24 @@ class ChemicalsService {
|
|
|
1065
1102
|
|
|
1066
1103
|
get: async (params) => {
|
|
1067
1104
|
const id = params.id;
|
|
1068
|
-
let
|
|
1069
|
-
|
|
1070
|
-
if (!chemical) {
|
|
1071
|
-
const casMatches = await this.getChemicalsByCAS(id);
|
|
1072
|
-
if (casMatches.length > 0) chemical = casMatches[0];
|
|
1073
|
-
}
|
|
1105
|
+
let source = null;
|
|
1074
1106
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1107
|
+
const chemical = await this.getChemicalBySourceId(id);
|
|
1108
|
+
if (chemical) {
|
|
1109
|
+
source = this._chemicalRowToLegacySource(chemical);
|
|
1110
|
+
} else {
|
|
1111
|
+
const hit = await this.findChemicalByIdentifier(id);
|
|
1112
|
+
if (hit) {
|
|
1113
|
+
source = this._toLegacyChemicalSource({
|
|
1114
|
+
name: hit.name,
|
|
1115
|
+
cas: hit.cas,
|
|
1116
|
+
identifiers: hit.identifiers,
|
|
1117
|
+
id: hit.id
|
|
1118
|
+
});
|
|
1087
1119
|
}
|
|
1088
1120
|
}
|
|
1089
1121
|
|
|
1090
|
-
if (!
|
|
1122
|
+
if (!source) {
|
|
1091
1123
|
return {
|
|
1092
1124
|
body: {
|
|
1093
1125
|
_index: params.index,
|
|
@@ -1104,7 +1136,7 @@ class ChemicalsService {
|
|
|
1104
1136
|
_id: params.id,
|
|
1105
1137
|
_version: 1,
|
|
1106
1138
|
found: true,
|
|
1107
|
-
_source:
|
|
1139
|
+
_source: source
|
|
1108
1140
|
},
|
|
1109
1141
|
statusCode: 200
|
|
1110
1142
|
};
|
package/package.json
CHANGED