@toxplanet/pegasus-sdk 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/environment.dev.js +5 -5
- package/lib/chemicals.js +538 -44
- package/package.json +1 -1
|
@@ -19,9 +19,9 @@ module.exports = {
|
|
|
19
19
|
rejectUnauthorized: false
|
|
20
20
|
}
|
|
21
21
|
},
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
indexRoutes: {
|
|
23
|
+
chemicals: ['chemical_index*'],
|
|
24
|
+
documents: ['document_nones_index*'],
|
|
25
|
+
search: [/^(chemical_index|document_nones_index|search)/]
|
|
26
|
+
}
|
|
27
27
|
};
|
package/lib/chemicals.js
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
const { logError } = require('@toxplanet/tphelper/logging');
|
|
2
2
|
const { getDrizzle, schema } = require('./db');
|
|
3
|
-
const { eq } = require('drizzle-orm');
|
|
3
|
+
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
|
+
|
|
5
|
+
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
6
|
+
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
7
|
+
const SEARCH_BOOST_EXACT_SECONDARY = 30;
|
|
8
|
+
const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
9
|
+
|
|
10
|
+
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
|
|
11
|
+
|
|
12
|
+
function escapeLikePattern(value) {
|
|
13
|
+
return value.replace(/[%_\\]/g, '\\$&');
|
|
14
|
+
}
|
|
4
15
|
|
|
5
16
|
class ChemicalsService {
|
|
6
17
|
constructor(connection) {
|
|
@@ -15,11 +26,91 @@ class ChemicalsService {
|
|
|
15
26
|
return this.db;
|
|
16
27
|
}
|
|
17
28
|
|
|
18
|
-
async bulkIndexFielded(documents) {
|
|
29
|
+
async bulkIndexFielded(documents) {
|
|
30
|
+
try {
|
|
31
|
+
if (!documents || documents.length === 0) {
|
|
32
|
+
return { indexed: 0, errors: [], results: [] };
|
|
33
|
+
}
|
|
19
34
|
|
|
20
|
-
|
|
35
|
+
const db = this.getDb();
|
|
36
|
+
const results = [];
|
|
37
|
+
const errors = [];
|
|
38
|
+
|
|
39
|
+
for (let i = 0; i < documents.length; i++) {
|
|
40
|
+
const doc = documents[i];
|
|
41
|
+
try {
|
|
42
|
+
const chemical = {
|
|
43
|
+
sourceId: doc.source_id || doc._id,
|
|
44
|
+
chemicalName: doc.chemical_name || doc.name,
|
|
45
|
+
chemicalMeta: doc.chemical_meta || {},
|
|
46
|
+
chemicalIdentifiers: doc.chemical_identifiers || {},
|
|
47
|
+
chemicalSynonyms: doc.chemical_synonyms || [],
|
|
48
|
+
chemicalCategories: doc.chemical_categories || [],
|
|
49
|
+
createdAt: doc.created_at || new Date(),
|
|
50
|
+
updatedAt: doc.updated_at || new Date(),
|
|
51
|
+
...(doc.imported_at && { importedAt: doc.imported_at }),
|
|
52
|
+
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const [result] = await db
|
|
56
|
+
.insert(schema.chemicals)
|
|
57
|
+
.values(chemical)
|
|
58
|
+
.onConflictDoUpdate({
|
|
59
|
+
target: schema.chemicals.sourceId,
|
|
60
|
+
set: {
|
|
61
|
+
chemicalName: chemical.chemicalName,
|
|
62
|
+
chemicalMeta: chemical.chemicalMeta,
|
|
63
|
+
chemicalIdentifiers: chemical.chemicalIdentifiers,
|
|
64
|
+
chemicalSynonyms: chemical.chemicalSynonyms,
|
|
65
|
+
chemicalCategories: chemical.chemicalCategories,
|
|
66
|
+
updatedAt: new Date()
|
|
67
|
+
}
|
|
68
|
+
})
|
|
69
|
+
.returning();
|
|
70
|
+
|
|
71
|
+
results.push({ index: i, success: true, result });
|
|
72
|
+
} catch (err) {
|
|
73
|
+
results.push({ index: i, success: false, error: err.message });
|
|
74
|
+
errors.push({ document: doc, error: err.message });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
21
77
|
|
|
22
|
-
|
|
78
|
+
return { indexed: results.filter(r => r.success).length, errors, results };
|
|
79
|
+
} catch (error) {
|
|
80
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
|
|
81
|
+
throw error;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async bulkIndexFulltext(documents) {
|
|
86
|
+
try {
|
|
87
|
+
return { acknowledged: true, count: documents?.length || 0 };
|
|
88
|
+
} catch (error) {
|
|
89
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFulltext', error);
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async bulkIndexSubstances(substances) {
|
|
95
|
+
try {
|
|
96
|
+
const documents = substances.map(substance => ({
|
|
97
|
+
source_id: substance.substance_id || substance.id,
|
|
98
|
+
chemical_name: substance.name || substance.substance_name,
|
|
99
|
+
chemical_meta: substance.meta || {},
|
|
100
|
+
chemical_identifiers: substance.identifiers || {},
|
|
101
|
+
chemical_synonyms: substance.synonyms || [],
|
|
102
|
+
chemical_categories: substance.categories || substance.substance_types || [],
|
|
103
|
+
created_at: substance.created_at,
|
|
104
|
+
updated_at: substance.updated_at,
|
|
105
|
+
imported_at: substance.imported_at
|
|
106
|
+
}));
|
|
107
|
+
|
|
108
|
+
return await this.bulkIndexFielded(documents);
|
|
109
|
+
} catch (error) {
|
|
110
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexSubstances', error);
|
|
111
|
+
throw error;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
23
114
|
|
|
24
115
|
async createChemical(chemical) {
|
|
25
116
|
try {
|
|
@@ -48,7 +139,30 @@ class ChemicalsService {
|
|
|
48
139
|
}
|
|
49
140
|
}
|
|
50
141
|
|
|
51
|
-
async updateChemical(chemicalId, updates) {
|
|
142
|
+
async updateChemical(chemicalId, updates) {
|
|
143
|
+
try {
|
|
144
|
+
const db = this.getDb();
|
|
145
|
+
|
|
146
|
+
const updateData = {};
|
|
147
|
+
if (updates.chemical_name) updateData.chemicalName = updates.chemical_name;
|
|
148
|
+
if (updates.chemical_meta) updateData.chemicalMeta = updates.chemical_meta;
|
|
149
|
+
if (updates.chemical_identifiers) updateData.chemicalIdentifiers = updates.chemical_identifiers;
|
|
150
|
+
if (updates.chemical_synonyms) updateData.chemicalSynonyms = updates.chemical_synonyms;
|
|
151
|
+
if (updates.chemical_categories) updateData.chemicalCategories = updates.chemical_categories;
|
|
152
|
+
updateData.updatedAt = new Date();
|
|
153
|
+
|
|
154
|
+
const [result] = await db
|
|
155
|
+
.update(schema.chemicals)
|
|
156
|
+
.set(updateData)
|
|
157
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
158
|
+
.returning();
|
|
159
|
+
|
|
160
|
+
return result || null;
|
|
161
|
+
} catch (error) {
|
|
162
|
+
logError('pegasus-sdk', 'ChemicalsService', 'updateChemical', error);
|
|
163
|
+
throw error;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
52
166
|
|
|
53
167
|
async deleteChemical(chemicalId) {
|
|
54
168
|
try {
|
|
@@ -66,13 +180,92 @@ class ChemicalsService {
|
|
|
66
180
|
}
|
|
67
181
|
}
|
|
68
182
|
|
|
69
|
-
async deleteBySourceId(sourceId) {
|
|
183
|
+
async deleteBySourceId(sourceId) {
|
|
184
|
+
try {
|
|
185
|
+
const db = this.getDb();
|
|
186
|
+
|
|
187
|
+
const [deleted] = await db
|
|
188
|
+
.delete(schema.chemicals)
|
|
189
|
+
.where(eq(schema.chemicals.sourceId, sourceId))
|
|
190
|
+
.returning();
|
|
70
191
|
|
|
71
|
-
|
|
192
|
+
return deleted || null;
|
|
193
|
+
} catch (error) {
|
|
194
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteBySourceId', error);
|
|
195
|
+
throw error;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
72
198
|
|
|
73
|
-
async
|
|
199
|
+
async deleteCollection(collectionName) {
|
|
200
|
+
try {
|
|
201
|
+
const db = this.getDb();
|
|
202
|
+
|
|
203
|
+
const deleted = await db
|
|
204
|
+
.delete(schema.chemicals)
|
|
205
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]))
|
|
206
|
+
.returning();
|
|
74
207
|
|
|
75
|
-
|
|
208
|
+
return { deletedCount: deleted.length, deleted };
|
|
209
|
+
} catch (error) {
|
|
210
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteCollection', error);
|
|
211
|
+
throw error;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
async updateCollectionProperty(collectionName, propertyPath, newValue) {
|
|
216
|
+
try {
|
|
217
|
+
const db = this.getDb();
|
|
218
|
+
const pathArray = propertyPath.split('.');
|
|
219
|
+
const valueJson = JSON.stringify(newValue);
|
|
220
|
+
|
|
221
|
+
const results = await db
|
|
222
|
+
.update(schema.chemicals)
|
|
223
|
+
.set({
|
|
224
|
+
chemicalMeta: sql`jsonb_set(${schema.chemicals.chemicalMeta}, ${pathArray}::text[], ${valueJson}::jsonb)`,
|
|
225
|
+
updatedAt: new Date()
|
|
226
|
+
})
|
|
227
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]))
|
|
228
|
+
.returning();
|
|
229
|
+
|
|
230
|
+
return { updatedCount: results.length, updated: results };
|
|
231
|
+
} catch (error) {
|
|
232
|
+
logError('pegasus-sdk', 'ChemicalsService', 'updateCollectionProperty', error);
|
|
233
|
+
throw error;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
async bulkUpdateProperty(filter, propertyPath, newValue) {
|
|
238
|
+
try {
|
|
239
|
+
const db = this.getDb();
|
|
240
|
+
|
|
241
|
+
let whereCondition = sql`1=1`;
|
|
242
|
+
|
|
243
|
+
if (filter.chemicalIds && filter.chemicalIds.length > 0) {
|
|
244
|
+
whereCondition = inArray(schema.chemicals.chemicalId, filter.chemicalIds);
|
|
245
|
+
} else if (filter.sourceIds && filter.sourceIds.length > 0) {
|
|
246
|
+
whereCondition = inArray(schema.chemicals.sourceId, filter.sourceIds);
|
|
247
|
+
} else if (filter.category) {
|
|
248
|
+
whereCondition = arrayContains(schema.chemicals.chemicalCategories, [filter.category]);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const pathArray = propertyPath.split('.');
|
|
252
|
+
const valueJson = JSON.stringify(newValue);
|
|
253
|
+
|
|
254
|
+
const results = await db
|
|
255
|
+
.update(schema.chemicals)
|
|
256
|
+
.set({
|
|
257
|
+
chemicalMeta: sql`jsonb_set(COALESCE(${schema.chemicals.chemicalMeta}, '{}'), ${pathArray}::text[], ${valueJson}::jsonb)`,
|
|
258
|
+
updatedAt: new Date()
|
|
259
|
+
})
|
|
260
|
+
.where(whereCondition)
|
|
261
|
+
.returning();
|
|
262
|
+
|
|
263
|
+
return { updatedCount: results.length, updated: results };
|
|
264
|
+
} catch (error) {
|
|
265
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkUpdateProperty', error);
|
|
266
|
+
throw error;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
76
269
|
|
|
77
270
|
async getChemicalById(chemicalId) {
|
|
78
271
|
try {
|
|
@@ -91,25 +284,184 @@ class ChemicalsService {
|
|
|
91
284
|
}
|
|
92
285
|
}
|
|
93
286
|
|
|
94
|
-
async getChemicalBySourceId(sourceId) {
|
|
287
|
+
async getChemicalBySourceId(sourceId) {
|
|
288
|
+
try {
|
|
289
|
+
const db = this.getDb();
|
|
290
|
+
|
|
291
|
+
const [result] = await db
|
|
292
|
+
.select()
|
|
293
|
+
.from(schema.chemicals)
|
|
294
|
+
.where(eq(schema.chemicals.sourceId, sourceId))
|
|
295
|
+
.limit(1);
|
|
95
296
|
|
|
96
|
-
|
|
297
|
+
return result || null;
|
|
298
|
+
} catch (error) {
|
|
299
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalBySourceId', error);
|
|
300
|
+
throw error;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
97
303
|
|
|
98
|
-
async
|
|
304
|
+
async getChemicalsByCAS(casNumber) {
|
|
305
|
+
try {
|
|
306
|
+
const db = this.getDb();
|
|
307
|
+
|
|
308
|
+
const results = await db
|
|
309
|
+
.select()
|
|
310
|
+
.from(schema.chemicals)
|
|
311
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>'CAS' = ${casNumber} OR ${schema.chemicals.chemicalIdentifiers}->'CAS' ? ${casNumber}`);
|
|
99
312
|
|
|
100
|
-
|
|
313
|
+
return results;
|
|
314
|
+
} catch (error) {
|
|
315
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalsByCAS', error);
|
|
316
|
+
throw error;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
101
319
|
|
|
102
|
-
async
|
|
320
|
+
async getChemicalsByIdentifier(identifierType, identifierValue) {
|
|
321
|
+
try {
|
|
322
|
+
if (!ALLOWED_IDENTIFIER_TYPES.has(identifierType)) {
|
|
323
|
+
throw new Error(`Invalid identifier type: ${identifierType}`);
|
|
324
|
+
}
|
|
103
325
|
|
|
104
|
-
|
|
326
|
+
const db = this.getDb();
|
|
105
327
|
|
|
106
|
-
|
|
328
|
+
const results = await db
|
|
329
|
+
.select()
|
|
330
|
+
.from(schema.chemicals)
|
|
331
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>${identifierType} = ${identifierValue} OR ${schema.chemicals.chemicalIdentifiers}->${identifierType} ? ${identifierValue}`);
|
|
107
332
|
|
|
108
|
-
|
|
333
|
+
return results;
|
|
334
|
+
} catch (error) {
|
|
335
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalsByIdentifier', error);
|
|
336
|
+
throw error;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
async countByCollection(collectionName) {
|
|
341
|
+
try {
|
|
342
|
+
const db = this.getDb();
|
|
343
|
+
|
|
344
|
+
const result = await db
|
|
345
|
+
.select({ count: sql`count(*)::int` })
|
|
346
|
+
.from(schema.chemicals)
|
|
347
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]));
|
|
348
|
+
|
|
349
|
+
return { count: result[0].count };
|
|
350
|
+
} catch (error) {
|
|
351
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByCollection', error);
|
|
352
|
+
throw error;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
async countByIdentifier(identifierValue) {
|
|
357
|
+
try {
|
|
358
|
+
const db = this.getDb();
|
|
359
|
+
|
|
360
|
+
const searchPattern = `%${escapeLikePattern(identifierValue)}%`;
|
|
361
|
+
const result = await db
|
|
362
|
+
.select({ count: sql`count(*)::int` })
|
|
363
|
+
.from(schema.chemicals)
|
|
364
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}::text LIKE ${searchPattern}`);
|
|
365
|
+
|
|
366
|
+
return { count: result[0].count };
|
|
367
|
+
} catch (error) {
|
|
368
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByIdentifier', error);
|
|
369
|
+
throw error;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
async countByCAS(casNumber) {
|
|
374
|
+
try {
|
|
375
|
+
const db = this.getDb();
|
|
376
|
+
|
|
377
|
+
const result = await db
|
|
378
|
+
.select({ count: sql`count(*)::int` })
|
|
379
|
+
.from(schema.chemicals)
|
|
380
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>'CAS' = ${casNumber} OR ${schema.chemicals.chemicalIdentifiers}->'CAS' ? ${casNumber}`);
|
|
381
|
+
|
|
382
|
+
return { count: result[0].count };
|
|
383
|
+
} catch (error) {
|
|
384
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByCAS', error);
|
|
385
|
+
throw error;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
async getTotalSynonymCount() {
|
|
390
|
+
try {
|
|
391
|
+
const db = this.getDb();
|
|
392
|
+
|
|
393
|
+
const result = await db
|
|
394
|
+
.select({ count: sql`sum(array_length(${schema.chemicals.chemicalSynonyms}, 1))::int` })
|
|
395
|
+
.from(schema.chemicals);
|
|
396
|
+
|
|
397
|
+
return { count: result[0].count || 0 };
|
|
398
|
+
} catch (error) {
|
|
399
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getTotalSynonymCount', error);
|
|
400
|
+
throw error;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
async getSynonymCount(synonymTerm) {
|
|
405
|
+
try {
|
|
406
|
+
const db = this.getDb();
|
|
407
|
+
|
|
408
|
+
const result = await db
|
|
409
|
+
.select({ count: sql`count(*)::int` })
|
|
410
|
+
.from(schema.chemicals)
|
|
411
|
+
.where(arrayContains(schema.chemicals.chemicalSynonyms, [synonymTerm]));
|
|
412
|
+
|
|
413
|
+
return { count: result[0].count };
|
|
414
|
+
} catch (error) {
|
|
415
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getSynonymCount', error);
|
|
416
|
+
throw error;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
109
419
|
|
|
110
|
-
async convertIdentifier(fromIdentifier, toIdentifierType) {
|
|
420
|
+
async convertIdentifier(fromIdentifier, toIdentifierType) {
|
|
421
|
+
try {
|
|
422
|
+
const db = this.getDb();
|
|
423
|
+
|
|
424
|
+
const searchPattern = `%${escapeLikePattern(fromIdentifier)}%`;
|
|
425
|
+
const chemicals = await db
|
|
426
|
+
.select()
|
|
427
|
+
.from(schema.chemicals)
|
|
428
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}::text LIKE ${searchPattern}`);
|
|
429
|
+
|
|
430
|
+
if (chemicals.length === 0) {
|
|
431
|
+
return null;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
const chemical = chemicals[0];
|
|
435
|
+
const identifiers = chemical.chemicalIdentifiers || {};
|
|
436
|
+
const toIdentifier = identifiers[toIdentifierType];
|
|
437
|
+
|
|
438
|
+
return {
|
|
439
|
+
fromIdentifier,
|
|
440
|
+
toIdentifierType,
|
|
441
|
+
toIdentifier,
|
|
442
|
+
chemicalId: chemical.chemicalId,
|
|
443
|
+
chemicalName: chemical.chemicalName
|
|
444
|
+
};
|
|
445
|
+
} catch (error) {
|
|
446
|
+
logError('pegasus-sdk', 'ChemicalsService', 'convertIdentifier', error);
|
|
447
|
+
throw error;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
111
450
|
|
|
112
|
-
async convertIdentifiersBatch(fromIdentifiers, toIdentifierType) {
|
|
451
|
+
async convertIdentifiersBatch(fromIdentifiers, toIdentifierType) {
|
|
452
|
+
try {
|
|
453
|
+
const conversions = await Promise.all(
|
|
454
|
+
fromIdentifiers.map(fromIdentifier =>
|
|
455
|
+
this.convertIdentifier(fromIdentifier, toIdentifierType)
|
|
456
|
+
)
|
|
457
|
+
);
|
|
458
|
+
|
|
459
|
+
return conversions.filter(conversion => conversion !== null);
|
|
460
|
+
} catch (error) {
|
|
461
|
+
logError('pegasus-sdk', 'ChemicalsService', 'convertIdentifiersBatch', error);
|
|
462
|
+
throw error;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
113
465
|
|
|
114
466
|
/**
|
|
115
467
|
* Search for chemicals by name using OpenSearch
|
|
@@ -133,13 +485,10 @@ class ChemicalsService {
|
|
|
133
485
|
query: {
|
|
134
486
|
bool: {
|
|
135
487
|
should: [
|
|
136
|
-
|
|
137
|
-
{
|
|
138
|
-
|
|
139
|
-
{ prefix: { '
|
|
140
|
-
// Include synonym matches as secondary
|
|
141
|
-
{ term: { 'synonyms.keyword': { value: searchTerm, boost: 30, case_insensitive: true } } },
|
|
142
|
-
{ prefix: { 'synonyms.keyword': { value: searchTerm, boost: 10, case_insensitive: true } } }
|
|
488
|
+
{ term: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
489
|
+
{ prefix: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
490
|
+
{ term: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
491
|
+
{ prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
|
|
143
492
|
],
|
|
144
493
|
minimum_should_match: 1
|
|
145
494
|
}
|
|
@@ -187,13 +536,10 @@ class ChemicalsService {
|
|
|
187
536
|
query: {
|
|
188
537
|
bool: {
|
|
189
538
|
should: [
|
|
190
|
-
|
|
191
|
-
{
|
|
192
|
-
|
|
193
|
-
{ prefix: { '
|
|
194
|
-
// Include name matches as secondary
|
|
195
|
-
{ term: { 'chemical_name.keyword': { value: synonymTerm, boost: 30, case_insensitive: true } } },
|
|
196
|
-
{ prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: 10, case_insensitive: true } } }
|
|
539
|
+
{ term: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
540
|
+
{ prefix: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
541
|
+
{ term: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
542
|
+
{ prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
|
|
197
543
|
],
|
|
198
544
|
minimum_should_match: 1
|
|
199
545
|
}
|
|
@@ -222,7 +568,9 @@ class ChemicalsService {
|
|
|
222
568
|
async countAll() {
|
|
223
569
|
try {
|
|
224
570
|
const db = this.getDb();
|
|
225
|
-
const result = await db
|
|
571
|
+
const result = await db
|
|
572
|
+
.select({ count: sql`count(*)::int` })
|
|
573
|
+
.from(schema.chemicals);
|
|
226
574
|
return { count: result[0].count };
|
|
227
575
|
} catch (error) {
|
|
228
576
|
logError('pegasus-sdk', 'ChemicalsService', 'countAll', error);
|
|
@@ -230,9 +578,55 @@ class ChemicalsService {
|
|
|
230
578
|
}
|
|
231
579
|
}
|
|
232
580
|
|
|
233
|
-
async findChemicalsWithoutDocuments(collectionName, searchTerm, pageSize) {
|
|
581
|
+
async findChemicalsWithoutDocuments(collectionName, searchTerm, pageSize = 100) {
|
|
582
|
+
try {
|
|
583
|
+
const db = this.getDb();
|
|
584
|
+
|
|
585
|
+
let whereConditions = [];
|
|
586
|
+
|
|
587
|
+
if (collectionName) {
|
|
588
|
+
whereConditions.push(arrayContains(schema.chemicals.chemicalCategories, [collectionName]));
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
if (searchTerm) {
|
|
592
|
+
const searchPattern = `%${escapeLikePattern(searchTerm)}%`;
|
|
593
|
+
whereConditions.push(sql`${schema.chemicals.chemicalName} ILIKE ${searchPattern}`);
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
const whereClause = whereConditions.length > 0 ? and(...whereConditions) : undefined;
|
|
597
|
+
|
|
598
|
+
const results = await db
|
|
599
|
+
.select()
|
|
600
|
+
.from(schema.chemicals)
|
|
601
|
+
.where(whereClause)
|
|
602
|
+
.limit(pageSize);
|
|
234
603
|
|
|
235
|
-
|
|
604
|
+
return results;
|
|
605
|
+
} catch (error) {
|
|
606
|
+
logError('pegasus-sdk', 'ChemicalsService', 'findChemicalsWithoutDocuments', error);
|
|
607
|
+
throw error;
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
async countChemicalsWithoutDocuments(collectionName) {
|
|
612
|
+
try {
|
|
613
|
+
const db = this.getDb();
|
|
614
|
+
|
|
615
|
+
const whereClause = collectionName
|
|
616
|
+
? arrayContains(schema.chemicals.chemicalCategories, [collectionName])
|
|
617
|
+
: undefined;
|
|
618
|
+
|
|
619
|
+
const result = await db
|
|
620
|
+
.select({ count: sql`count(*)::int` })
|
|
621
|
+
.from(schema.chemicals)
|
|
622
|
+
.where(whereClause);
|
|
623
|
+
|
|
624
|
+
return { count: result[0].count };
|
|
625
|
+
} catch (error) {
|
|
626
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countChemicalsWithoutDocuments', error);
|
|
627
|
+
throw error;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
236
630
|
|
|
237
631
|
registerElasticsearchHandlers(elasticsearchService) {
|
|
238
632
|
const indexPatterns = this.connection.config.indexRoutes?.chemicals || ['chemicals*'];
|
|
@@ -240,8 +634,16 @@ class ChemicalsService {
|
|
|
240
634
|
indexPatterns.forEach(pattern => {
|
|
241
635
|
elasticsearchService.registerIndexRoute(pattern, {
|
|
242
636
|
index: async (params) => {
|
|
243
|
-
|
|
244
|
-
|
|
637
|
+
const chemical = params.body;
|
|
638
|
+
const result = await this.createChemical(chemical);
|
|
639
|
+
|
|
640
|
+
return {
|
|
641
|
+
_index: params.index,
|
|
642
|
+
_id: result.chemicalId,
|
|
643
|
+
_version: 1,
|
|
644
|
+
result: 'created',
|
|
645
|
+
_source: result
|
|
646
|
+
};
|
|
245
647
|
},
|
|
246
648
|
|
|
247
649
|
bulk: async (params) => {
|
|
@@ -257,19 +659,81 @@ class ChemicalsService {
|
|
|
257
659
|
}
|
|
258
660
|
}
|
|
259
661
|
|
|
260
|
-
|
|
662
|
+
const result = await this.bulkIndexFielded(documents);
|
|
663
|
+
|
|
664
|
+
const items = result.results.map((res, idx) => {
|
|
665
|
+
if (res.success) {
|
|
666
|
+
return {
|
|
667
|
+
index: {
|
|
668
|
+
_index: params.index || 'chemicals',
|
|
669
|
+
_id: documents[idx].source_id || documents[idx]._id,
|
|
670
|
+
status: 201,
|
|
671
|
+
result: 'created'
|
|
672
|
+
}
|
|
673
|
+
};
|
|
674
|
+
} else {
|
|
675
|
+
return {
|
|
676
|
+
index: {
|
|
677
|
+
_index: params.index || 'chemicals',
|
|
678
|
+
_id: documents[idx].source_id || documents[idx]._id,
|
|
679
|
+
status: 400,
|
|
680
|
+
error: {
|
|
681
|
+
type: 'mapper_parsing_exception',
|
|
682
|
+
reason: res.error
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
return {
|
|
690
|
+
took: 1,
|
|
691
|
+
errors: result.errors.length > 0,
|
|
692
|
+
items
|
|
693
|
+
};
|
|
261
694
|
},
|
|
262
695
|
|
|
263
696
|
get: async (params) => {
|
|
264
|
-
|
|
697
|
+
const result = await this.getChemicalById(params.id);
|
|
698
|
+
|
|
699
|
+
if (!result) {
|
|
700
|
+
return {
|
|
701
|
+
_index: params.index,
|
|
702
|
+
_id: params.id,
|
|
703
|
+
found: false
|
|
704
|
+
};
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
return {
|
|
708
|
+
_index: params.index,
|
|
709
|
+
_id: result.chemicalId,
|
|
710
|
+
_version: 1,
|
|
711
|
+
found: true,
|
|
712
|
+
_source: result
|
|
713
|
+
};
|
|
265
714
|
},
|
|
266
715
|
|
|
267
716
|
update: async (params) => {
|
|
268
|
-
|
|
717
|
+
const result = await this.updateChemical(params.id, params.body);
|
|
718
|
+
|
|
719
|
+
return {
|
|
720
|
+
_index: params.index,
|
|
721
|
+
_id: params.id,
|
|
722
|
+
_version: 2,
|
|
723
|
+
result: result ? 'updated' : 'noop',
|
|
724
|
+
_source: result
|
|
725
|
+
};
|
|
269
726
|
},
|
|
270
727
|
|
|
271
728
|
delete: async (params) => {
|
|
272
|
-
|
|
729
|
+
const result = await this.deleteChemical(params.id);
|
|
730
|
+
|
|
731
|
+
return {
|
|
732
|
+
_index: params.index,
|
|
733
|
+
_id: params.id,
|
|
734
|
+
_version: 1,
|
|
735
|
+
result: result ? 'deleted' : 'not_found'
|
|
736
|
+
};
|
|
273
737
|
},
|
|
274
738
|
|
|
275
739
|
search: async (params) => {
|
|
@@ -279,7 +743,37 @@ class ChemicalsService {
|
|
|
279
743
|
query?.query_string?.query || '';
|
|
280
744
|
const limit = params.body?.size || 10;
|
|
281
745
|
|
|
282
|
-
|
|
746
|
+
const searchResults = await this.searchByName(searchTerm, limit);
|
|
747
|
+
|
|
748
|
+
return {
|
|
749
|
+
took: 1,
|
|
750
|
+
timed_out: false,
|
|
751
|
+
_shards: {
|
|
752
|
+
total: 1,
|
|
753
|
+
successful: 1,
|
|
754
|
+
skipped: 0,
|
|
755
|
+
failed: 0
|
|
756
|
+
},
|
|
757
|
+
hits: {
|
|
758
|
+
total: {
|
|
759
|
+
value: searchResults.results.length,
|
|
760
|
+
relation: 'eq'
|
|
761
|
+
},
|
|
762
|
+
max_score: searchResults.results[0]?.score || 0,
|
|
763
|
+
hits: searchResults.results.map(result => ({
|
|
764
|
+
_index: params.index,
|
|
765
|
+
_id: result.id,
|
|
766
|
+
_score: result.score,
|
|
767
|
+
_source: {
|
|
768
|
+
postgres_id: result.id,
|
|
769
|
+
chemical_name: result.name,
|
|
770
|
+
cas_numbers: result.cas,
|
|
771
|
+
identifier_values: result.identifiers,
|
|
772
|
+
synonyms: result.synonyms
|
|
773
|
+
}
|
|
774
|
+
}))
|
|
775
|
+
}
|
|
776
|
+
};
|
|
283
777
|
},
|
|
284
778
|
|
|
285
779
|
count: async (params) => {
|
|
@@ -290,4 +784,4 @@ class ChemicalsService {
|
|
|
290
784
|
}
|
|
291
785
|
}
|
|
292
786
|
|
|
293
|
-
module.exports = ChemicalsService;
|
|
787
|
+
module.exports = ChemicalsService;
|
package/package.json
CHANGED