@toxplanet/pegasus-sdk 1.1.17 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/environment.dev.js +29 -29
- package/lib/chemicals.js +1082 -1061
- package/lib/connection.js +87 -1
- package/lib/db/schema.js +27 -27
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -1,1062 +1,1083 @@
|
|
|
1
|
-
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
|
-
const { getDrizzle, schema } = require('./db');
|
|
3
|
-
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
|
-
const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
|
|
5
|
-
|
|
6
|
-
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
7
|
-
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
8
|
-
const SEARCH_BOOST_EXACT_SECONDARY = 30;
|
|
9
|
-
const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
10
|
-
|
|
11
|
-
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
|
|
12
|
-
|
|
13
|
-
function escapeLikePattern(value) {
|
|
14
|
-
return value.replace(/[%_\\]/g, '\\$&');
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
class ChemicalsService {
|
|
18
|
-
constructor(connection) {
|
|
19
|
-
this.connection = connection;
|
|
20
|
-
this.db = null;
|
|
21
|
-
this.sqsClient = null;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
getDb() {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
'
|
|
79
|
-
'
|
|
80
|
-
'
|
|
81
|
-
'
|
|
82
|
-
'
|
|
83
|
-
'
|
|
84
|
-
'
|
|
85
|
-
'
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
'@
|
|
93
|
-
'@
|
|
94
|
-
'@
|
|
95
|
-
'@
|
|
96
|
-
'@
|
|
97
|
-
'@
|
|
98
|
-
'@
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
const
|
|
107
|
-
const
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
`
|
|
116
|
-
`
|
|
117
|
-
`
|
|
118
|
-
` ${esc(chemical.
|
|
119
|
-
` ${
|
|
120
|
-
` ${escJson(chemical.
|
|
121
|
-
` ${
|
|
122
|
-
` ${escArr(chemical.
|
|
123
|
-
` ${
|
|
124
|
-
` ${escDate(chemical.
|
|
125
|
-
`)`,
|
|
126
|
-
`
|
|
127
|
-
`
|
|
128
|
-
`
|
|
129
|
-
`
|
|
130
|
-
`
|
|
131
|
-
`
|
|
132
|
-
`
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
const results = [];
|
|
147
|
-
const errors = [];
|
|
148
|
-
|
|
149
|
-
logInfo('pegasus-sdk', `[bulkIndexFielded] Database connection established`);
|
|
150
|
-
|
|
151
|
-
for (let i = 0; i < documents.length; i++) {
|
|
152
|
-
const doc = documents[i];
|
|
153
|
-
logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
|
|
154
|
-
|
|
155
|
-
const parseDate = (dateValue) => {
|
|
156
|
-
if (!dateValue) return new Date();
|
|
157
|
-
if (dateValue instanceof Date) return dateValue;
|
|
158
|
-
if (typeof dateValue === 'string') return new Date(dateValue);
|
|
159
|
-
return new Date();
|
|
160
|
-
};
|
|
161
|
-
|
|
162
|
-
const chemical = {
|
|
163
|
-
sourceId: doc.source_id || doc._id,
|
|
164
|
-
chemicalName: doc.chemical_name || doc.name,
|
|
165
|
-
chemicalMeta: doc.chemical_meta || {},
|
|
166
|
-
chemicalIdentifiers: doc.chemical_identifiers || {},
|
|
167
|
-
chemicalSynonyms: doc.chemical_synonyms || [],
|
|
168
|
-
chemicalCategories: doc.chemical_categories || [],
|
|
169
|
-
createdAt: parseDate(doc.created_at),
|
|
170
|
-
updatedAt: parseDate(doc.updated_at),
|
|
171
|
-
...(doc.imported_at && { importedAt: doc.imported_at }),
|
|
172
|
-
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
173
|
-
};
|
|
174
|
-
|
|
175
|
-
logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
|
|
176
|
-
logInfo('pegasus-sdk', `[bulkIndexFielded] DEBUG SQL for document ${i}:\n${this._buildDebugSql(chemical)}`);
|
|
177
|
-
|
|
178
|
-
const isConnectionError = (err) =>
|
|
179
|
-
err.message?.toLowerCase().includes('timeout') ||
|
|
180
|
-
err.message?.toLowerCase().includes('connection') ||
|
|
181
|
-
err.code === 'ECONNREFUSED' ||
|
|
182
|
-
err.code === 'ETIMEDOUT';
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
const
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
return
|
|
284
|
-
} catch (error) {
|
|
285
|
-
logError('pegasus-sdk', 'ChemicalsService', '
|
|
286
|
-
throw error;
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
async
|
|
291
|
-
try {
|
|
292
|
-
const
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
.
|
|
296
|
-
.
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
.
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
const
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
const
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
.
|
|
452
|
-
.
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
.
|
|
506
|
-
.
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
return
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
async
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
const
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
const
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
const
|
|
795
|
-
.select(
|
|
796
|
-
.from(schema.chemicals)
|
|
797
|
-
.where(whereClause)
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
}
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
}
|
|
1061
|
-
|
|
1
|
+
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
|
+
const { getDrizzle, schema } = require('./db');
|
|
3
|
+
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
|
+
const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
|
|
5
|
+
|
|
6
|
+
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
7
|
+
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
8
|
+
const SEARCH_BOOST_EXACT_SECONDARY = 30;
|
|
9
|
+
const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
10
|
+
|
|
11
|
+
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
|
|
12
|
+
|
|
13
|
+
function escapeLikePattern(value) {
|
|
14
|
+
return value.replace(/[%_\\]/g, '\\$&');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
class ChemicalsService {
|
|
18
|
+
constructor(connection) {
|
|
19
|
+
this.connection = connection;
|
|
20
|
+
this.db = null;
|
|
21
|
+
this.sqsClient = null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async getDb() {
|
|
25
|
+
const reconnected = await this.connection.ensureConnected();
|
|
26
|
+
if (reconnected || !this.db) {
|
|
27
|
+
this.db = getDrizzle(this.connection.pgPool);
|
|
28
|
+
}
|
|
29
|
+
return this.db;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async sendSqlWriteFailure({ sql, parameters, error, retryCount, failedAt }) {
|
|
33
|
+
try {
|
|
34
|
+
const region = process.env.AWS_REGION || this.connection.region;
|
|
35
|
+
const { awsAccountId, environment } = this.connection.config;
|
|
36
|
+
const defaultQueueUrl = awsAccountId
|
|
37
|
+
? `https://sqs.${region}.amazonaws.com/${awsAccountId}/cr-pegasus-failed-items-${environment}`
|
|
38
|
+
: null;
|
|
39
|
+
const queueUrl = process.env.SQS_FAILED_ITEMS_QUEUE || defaultQueueUrl;
|
|
40
|
+
|
|
41
|
+
if (!queueUrl) {
|
|
42
|
+
logError('pegasus-sdk', 'sendSqlWriteFailure', 'No SQS queue URL available: set SQS_FAILED_ITEMS_QUEUE or provide awsAccountId in config');
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
logInfo('pegasus-sdk', `[sendSqlWriteFailure] Using queue: ${queueUrl}${process.env.SQS_FAILED_ITEMS_QUEUE ? ' (from env)' : ' (default)'}`);
|
|
47
|
+
|
|
48
|
+
if (!this.sqsClient) {
|
|
49
|
+
this.sqsClient = new SQSClient({ region });
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const message = {
|
|
53
|
+
MessageType: 'SqlWriteFailure',
|
|
54
|
+
SourceService: this.connection.config.sourceService || 'pegasus-sdk',
|
|
55
|
+
Timestamp: (failedAt || new Date()).toISOString(),
|
|
56
|
+
Sql: sql,
|
|
57
|
+
Parameters: parameters,
|
|
58
|
+
OriginalError: error.message,
|
|
59
|
+
RetryCount: retryCount
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const command = new SendMessageCommand({
|
|
63
|
+
QueueUrl: queueUrl,
|
|
64
|
+
MessageBody: JSON.stringify(message)
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
const response = await this.sqsClient.send(command);
|
|
68
|
+
logInfo('pegasus-sdk', `[sendSqlWriteFailure] SqlWriteFailure posted to SQS: MessageId=${response.MessageId}, RetryCount=${retryCount}`);
|
|
69
|
+
return true;
|
|
70
|
+
} catch (sqsError) {
|
|
71
|
+
logError('pegasus-sdk', 'sendSqlWriteFailure', 'Failed to post SqlWriteFailure to SQS', sqsError);
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
_buildChemicalUpsertSql(chemical) {
|
|
77
|
+
const sql = [
|
|
78
|
+
'INSERT INTO chemicals (source_id, chemical_name, chemical_meta, chemical_identifiers, chemical_synonyms, chemical_categories, created_at, updated_at)',
|
|
79
|
+
'VALUES (@source_id, @chemical_name, @chemical_meta::jsonb, @chemical_identifiers::jsonb, @chemical_synonyms, @chemical_categories, @created_at, @updated_at)',
|
|
80
|
+
'ON CONFLICT (source_id) DO UPDATE SET',
|
|
81
|
+
' chemical_name = @chemical_name,',
|
|
82
|
+
' chemical_meta = @chemical_meta::jsonb,',
|
|
83
|
+
' chemical_identifiers = @chemical_identifiers::jsonb,',
|
|
84
|
+
' chemical_synonyms = @chemical_synonyms,',
|
|
85
|
+
' chemical_categories = @chemical_categories,',
|
|
86
|
+
' updated_at = @updated_at'
|
|
87
|
+
].join('\n');
|
|
88
|
+
|
|
89
|
+
const serializeDate = (d) => d instanceof Date ? d.toISOString() : d;
|
|
90
|
+
|
|
91
|
+
const parameters = {
|
|
92
|
+
'@source_id': chemical.sourceId,
|
|
93
|
+
'@chemical_name': chemical.chemicalName,
|
|
94
|
+
'@chemical_meta': JSON.stringify(chemical.chemicalMeta ?? {}),
|
|
95
|
+
'@chemical_identifiers': JSON.stringify(chemical.chemicalIdentifiers ?? {}),
|
|
96
|
+
'@chemical_synonyms': JSON.stringify(chemical.chemicalSynonyms ?? []),
|
|
97
|
+
'@chemical_categories': JSON.stringify(chemical.chemicalCategories ?? []),
|
|
98
|
+
'@created_at': serializeDate(chemical.createdAt),
|
|
99
|
+
'@updated_at': serializeDate(chemical.updatedAt)
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
return { sql, parameters };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
_buildDebugSql(chemical) {
|
|
106
|
+
const esc = (s) => `'${String(s ?? '').replace(/'/g, "''")}'`;
|
|
107
|
+
const escJson = (v) => `'${JSON.stringify(v ?? {}).replace(/'/g, "''")}'`;
|
|
108
|
+
const escArr = (arr) => {
|
|
109
|
+
if (!Array.isArray(arr) || arr.length === 0) return `ARRAY[]::text[]`;
|
|
110
|
+
return `ARRAY[${arr.map(s => esc(s)).join(', ')}]`;
|
|
111
|
+
};
|
|
112
|
+
const escDate = (d) => esc(d instanceof Date ? d.toISOString() : (d ?? new Date().toISOString()));
|
|
113
|
+
|
|
114
|
+
return [
|
|
115
|
+
`INSERT INTO chemicals`,
|
|
116
|
+
` (source_id, chemical_name, chemical_meta, chemical_identifiers, chemical_synonyms, chemical_categories, created_at, updated_at)`,
|
|
117
|
+
`VALUES (`,
|
|
118
|
+
` ${esc(chemical.sourceId)},`,
|
|
119
|
+
` ${esc(chemical.chemicalName)},`,
|
|
120
|
+
` ${escJson(chemical.chemicalMeta)}::jsonb,`,
|
|
121
|
+
` ${escJson(chemical.chemicalIdentifiers)}::jsonb,`,
|
|
122
|
+
` ${escArr(chemical.chemicalSynonyms)},`,
|
|
123
|
+
` ${escArr(chemical.chemicalCategories)},`,
|
|
124
|
+
` ${escDate(chemical.createdAt)},`,
|
|
125
|
+
` ${escDate(chemical.updatedAt)}`,
|
|
126
|
+
`)`,
|
|
127
|
+
`ON CONFLICT (source_id) DO UPDATE SET`,
|
|
128
|
+
` chemical_name = ${esc(chemical.chemicalName)},`,
|
|
129
|
+
` chemical_meta = ${escJson(chemical.chemicalMeta)}::jsonb,`,
|
|
130
|
+
` chemical_identifiers = ${escJson(chemical.chemicalIdentifiers)}::jsonb,`,
|
|
131
|
+
` chemical_synonyms = ${escArr(chemical.chemicalSynonyms)},`,
|
|
132
|
+
` chemical_categories = ${escArr(chemical.chemicalCategories)},`,
|
|
133
|
+
` updated_at = NOW();`
|
|
134
|
+
].join('\n');
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async bulkIndexFielded(documents) {
|
|
138
|
+
try {
|
|
139
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
|
|
140
|
+
|
|
141
|
+
if (!documents || documents.length === 0) {
|
|
142
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] No documents provided, returning empty result`);
|
|
143
|
+
return { indexed: 0, errors: [], results: [] };
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const results = [];
|
|
147
|
+
const errors = [];
|
|
148
|
+
|
|
149
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Database connection established`);
|
|
150
|
+
|
|
151
|
+
for (let i = 0; i < documents.length; i++) {
|
|
152
|
+
const doc = documents[i];
|
|
153
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
|
|
154
|
+
|
|
155
|
+
const parseDate = (dateValue) => {
|
|
156
|
+
if (!dateValue) return new Date();
|
|
157
|
+
if (dateValue instanceof Date) return dateValue;
|
|
158
|
+
if (typeof dateValue === 'string') return new Date(dateValue);
|
|
159
|
+
return new Date();
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const chemical = {
|
|
163
|
+
sourceId: doc.source_id || doc._id,
|
|
164
|
+
chemicalName: doc.chemical_name || doc.name,
|
|
165
|
+
chemicalMeta: doc.chemical_meta || {},
|
|
166
|
+
chemicalIdentifiers: doc.chemical_identifiers || {},
|
|
167
|
+
chemicalSynonyms: doc.chemical_synonyms || [],
|
|
168
|
+
chemicalCategories: doc.chemical_categories || [],
|
|
169
|
+
createdAt: parseDate(doc.created_at),
|
|
170
|
+
updatedAt: parseDate(doc.updated_at),
|
|
171
|
+
...(doc.imported_at && { importedAt: doc.imported_at }),
|
|
172
|
+
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
|
|
176
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] DEBUG SQL for document ${i}:\n${this._buildDebugSql(chemical)}`);
|
|
177
|
+
|
|
178
|
+
const isConnectionError = (err) =>
|
|
179
|
+
err.message?.toLowerCase().includes('timeout') ||
|
|
180
|
+
err.message?.toLowerCase().includes('connection') ||
|
|
181
|
+
err.code === 'ECONNREFUSED' ||
|
|
182
|
+
err.code === 'ETIMEDOUT';
|
|
183
|
+
|
|
184
|
+
// Use this.getDb() on each attempt so a reconnect mid-loop automatically
|
|
185
|
+
// gets a fresh Drizzle instance bound to the new pool.
|
|
186
|
+
const attemptUpsert = async () => {
|
|
187
|
+
const freshDb = await this.getDb();
|
|
188
|
+
return freshDb.insert(schema.chemicals)
|
|
189
|
+
.values(chemical)
|
|
190
|
+
.onConflictDoUpdate({
|
|
191
|
+
target: schema.chemicals.sourceId,
|
|
192
|
+
set: {
|
|
193
|
+
chemicalName: chemical.chemicalName,
|
|
194
|
+
chemicalMeta: chemical.chemicalMeta,
|
|
195
|
+
chemicalIdentifiers: chemical.chemicalIdentifiers,
|
|
196
|
+
chemicalSynonyms: chemical.chemicalSynonyms,
|
|
197
|
+
chemicalCategories: chemical.chemicalCategories,
|
|
198
|
+
updatedAt: new Date()
|
|
199
|
+
}
|
|
200
|
+
})
|
|
201
|
+
.returning({
|
|
202
|
+
chemicalId: schema.chemicals.chemicalId,
|
|
203
|
+
sourceId: schema.chemicals.sourceId
|
|
204
|
+
});
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
let lastError = null;
|
|
208
|
+
let retryCount = 0;
|
|
209
|
+
const failedAt = new Date();
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
const [result] = await attemptUpsert();
|
|
213
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
|
|
214
|
+
this.connection.recordActivity();
|
|
215
|
+
results.push({ index: i, success: true, result });
|
|
216
|
+
continue;
|
|
217
|
+
} catch (firstErr) {
|
|
218
|
+
lastError = firstErr;
|
|
219
|
+
|
|
220
|
+
if (isConnectionError(firstErr)) {
|
|
221
|
+
// Stale pool — rebuild the connection and try once more before queuing
|
|
222
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} connection error (${firstErr.message}), reconnecting pool and retrying`);
|
|
223
|
+
try {
|
|
224
|
+
await this.connection.reconnect();
|
|
225
|
+
const [result] = await attemptUpsert();
|
|
226
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully after reconnect: ${result?.chemicalId || 'no ID returned'}`);
|
|
227
|
+
this.connection.recordActivity();
|
|
228
|
+
results.push({ index: i, success: true, result });
|
|
229
|
+
continue;
|
|
230
|
+
} catch (reconnectErr) {
|
|
231
|
+
lastError = reconnectErr;
|
|
232
|
+
retryCount = 1;
|
|
233
|
+
}
|
|
234
|
+
} else {
|
|
235
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} first attempt failed (${firstErr.message}), retrying once`);
|
|
236
|
+
try {
|
|
237
|
+
const [result] = await attemptUpsert();
|
|
238
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully on retry: ${result?.chemicalId || 'no ID returned'}`);
|
|
239
|
+
this.connection.recordActivity();
|
|
240
|
+
results.push({ index: i, success: true, result });
|
|
241
|
+
continue;
|
|
242
|
+
} catch (retryErr) {
|
|
243
|
+
lastError = retryErr;
|
|
244
|
+
retryCount = 1;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} failed after ${retryCount} local retries (source_id=${chemical.sourceId})`, lastError);
|
|
250
|
+
|
|
251
|
+
const { sql: failureSql, parameters: failureParams } = this._buildChemicalUpsertSql(chemical);
|
|
252
|
+
const queued = await this.sendSqlWriteFailure({
|
|
253
|
+
sql: failureSql,
|
|
254
|
+
parameters: failureParams,
|
|
255
|
+
error: lastError,
|
|
256
|
+
retryCount,
|
|
257
|
+
failedAt
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
if (queued) {
|
|
261
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} (source_id=${chemical.sourceId}) queued for repair via SQS`);
|
|
262
|
+
} else {
|
|
263
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} (source_id=${chemical.sourceId}) failed and could not be queued — data loss risk`, lastError);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
results.push({ index: i, success: false, error: lastError.message, queued });
|
|
267
|
+
errors.push({ document: doc, error: lastError.message, queued });
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const successCount = results.filter(r => r.success).length;
|
|
271
|
+
const queuedCount = results.filter(r => !r.success && r.queued).length;
|
|
272
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${queuedCount} queued for repair, ${errors.length - queuedCount} unhandled errors`);
|
|
273
|
+
|
|
274
|
+
return { indexed: successCount, errors, results };
|
|
275
|
+
} catch (error) {
|
|
276
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
|
|
277
|
+
throw error;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
async bulkIndexFulltext(documents) {
|
|
282
|
+
try {
|
|
283
|
+
return { acknowledged: true, count: documents?.length || 0 };
|
|
284
|
+
} catch (error) {
|
|
285
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFulltext', error);
|
|
286
|
+
throw error;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
async bulkIndexSubstances(substances) {
|
|
291
|
+
try {
|
|
292
|
+
const documents = substances.map(substance => ({
|
|
293
|
+
source_id: substance.substance_id || substance.id,
|
|
294
|
+
chemical_name: substance.name || substance.substance_name,
|
|
295
|
+
chemical_meta: substance.meta || {},
|
|
296
|
+
chemical_identifiers: substance.identifiers || {},
|
|
297
|
+
chemical_synonyms: substance.synonyms || [],
|
|
298
|
+
chemical_categories: substance.categories || substance.substance_types || [],
|
|
299
|
+
created_at: substance.created_at,
|
|
300
|
+
updated_at: substance.updated_at,
|
|
301
|
+
imported_at: substance.imported_at
|
|
302
|
+
}));
|
|
303
|
+
|
|
304
|
+
return await this.bulkIndexFielded(documents);
|
|
305
|
+
} catch (error) {
|
|
306
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexSubstances', error);
|
|
307
|
+
throw error;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
async createChemical(chemical) {
|
|
312
|
+
try {
|
|
313
|
+
const db = await this.getDb();
|
|
314
|
+
|
|
315
|
+
const [result] = await db
|
|
316
|
+
.insert(schema.chemicals)
|
|
317
|
+
.values({
|
|
318
|
+
sourceId: chemical.source_id,
|
|
319
|
+
chemicalName: chemical.chemical_name,
|
|
320
|
+
chemicalMeta: chemical.chemical_meta,
|
|
321
|
+
chemicalIdentifiers: chemical.chemical_identifiers,
|
|
322
|
+
chemicalSynonyms: chemical.chemical_synonyms,
|
|
323
|
+
chemicalCategories: chemical.chemical_categories,
|
|
324
|
+
createdAt: chemical.created_at || new Date(),
|
|
325
|
+
updatedAt: chemical.updated_at || new Date(),
|
|
326
|
+
...(chemical.imported_at && { importedAt: chemical.imported_at }),
|
|
327
|
+
...(chemical.chemical_id && { chemicalId: chemical.chemical_id })
|
|
328
|
+
})
|
|
329
|
+
.returning();
|
|
330
|
+
|
|
331
|
+
return result;
|
|
332
|
+
} catch (error) {
|
|
333
|
+
logError('pegasus-sdk', 'ChemicalsService', 'createChemical', error);
|
|
334
|
+
throw error;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
async updateChemical(chemicalId, updates) {
|
|
339
|
+
try {
|
|
340
|
+
const db = await this.getDb();
|
|
341
|
+
|
|
342
|
+
const updateData = {};
|
|
343
|
+
if (updates.chemical_name) updateData.chemicalName = updates.chemical_name;
|
|
344
|
+
if (updates.chemical_meta) updateData.chemicalMeta = updates.chemical_meta;
|
|
345
|
+
if (updates.chemical_identifiers) updateData.chemicalIdentifiers = updates.chemical_identifiers;
|
|
346
|
+
if (updates.chemical_synonyms) updateData.chemicalSynonyms = updates.chemical_synonyms;
|
|
347
|
+
if (updates.chemical_categories) updateData.chemicalCategories = updates.chemical_categories;
|
|
348
|
+
updateData.updatedAt = new Date();
|
|
349
|
+
|
|
350
|
+
const [result] = await db
|
|
351
|
+
.update(schema.chemicals)
|
|
352
|
+
.set(updateData)
|
|
353
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
354
|
+
.returning();
|
|
355
|
+
|
|
356
|
+
return result || null;
|
|
357
|
+
} catch (error) {
|
|
358
|
+
logError('pegasus-sdk', 'ChemicalsService', 'updateChemical', error);
|
|
359
|
+
throw error;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
async deleteChemical(chemicalId) {
|
|
364
|
+
try {
|
|
365
|
+
const db = await this.getDb();
|
|
366
|
+
|
|
367
|
+
const [deleted] = await db
|
|
368
|
+
.delete(schema.chemicals)
|
|
369
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
370
|
+
.returning();
|
|
371
|
+
|
|
372
|
+
return deleted || null;
|
|
373
|
+
} catch (error) {
|
|
374
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteChemical', error);
|
|
375
|
+
throw error;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async deleteBySourceId(sourceId) {
|
|
380
|
+
try {
|
|
381
|
+
const db = await this.getDb();
|
|
382
|
+
|
|
383
|
+
const [deleted] = await db
|
|
384
|
+
.delete(schema.chemicals)
|
|
385
|
+
.where(eq(schema.chemicals.sourceId, sourceId))
|
|
386
|
+
.returning();
|
|
387
|
+
|
|
388
|
+
return deleted || null;
|
|
389
|
+
} catch (error) {
|
|
390
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteBySourceId', error);
|
|
391
|
+
throw error;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
async deleteCollection(collectionName) {
|
|
396
|
+
try {
|
|
397
|
+
const db = await this.getDb();
|
|
398
|
+
|
|
399
|
+
const deleted = await db
|
|
400
|
+
.delete(schema.chemicals)
|
|
401
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]))
|
|
402
|
+
.returning();
|
|
403
|
+
|
|
404
|
+
return { deletedCount: deleted.length, deleted };
|
|
405
|
+
} catch (error) {
|
|
406
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteCollection', error);
|
|
407
|
+
throw error;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
async updateCollectionProperty(collectionName, propertyPath, newValue) {
|
|
412
|
+
try {
|
|
413
|
+
const db = await this.getDb();
|
|
414
|
+
const pathArray = propertyPath.split('.');
|
|
415
|
+
const valueJson = JSON.stringify(newValue);
|
|
416
|
+
|
|
417
|
+
const results = await db
|
|
418
|
+
.update(schema.chemicals)
|
|
419
|
+
.set({
|
|
420
|
+
chemicalMeta: sql`jsonb_set(${schema.chemicals.chemicalMeta}, ${pathArray}::text[], ${valueJson}::jsonb)`,
|
|
421
|
+
updatedAt: new Date()
|
|
422
|
+
})
|
|
423
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]))
|
|
424
|
+
.returning();
|
|
425
|
+
|
|
426
|
+
return { updatedCount: results.length, updated: results };
|
|
427
|
+
} catch (error) {
|
|
428
|
+
logError('pegasus-sdk', 'ChemicalsService', 'updateCollectionProperty', error);
|
|
429
|
+
throw error;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
async bulkUpdateProperty(filter, propertyPath, newValue) {
|
|
434
|
+
try {
|
|
435
|
+
const db = await this.getDb();
|
|
436
|
+
|
|
437
|
+
let whereCondition = sql`1=1`;
|
|
438
|
+
|
|
439
|
+
if (filter.chemicalIds && filter.chemicalIds.length > 0) {
|
|
440
|
+
whereCondition = inArray(schema.chemicals.chemicalId, filter.chemicalIds);
|
|
441
|
+
} else if (filter.sourceIds && filter.sourceIds.length > 0) {
|
|
442
|
+
whereCondition = inArray(schema.chemicals.sourceId, filter.sourceIds);
|
|
443
|
+
} else if (filter.category) {
|
|
444
|
+
whereCondition = arrayContains(schema.chemicals.chemicalCategories, [filter.category]);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
const pathArray = propertyPath.split('.');
|
|
448
|
+
const valueJson = JSON.stringify(newValue);
|
|
449
|
+
|
|
450
|
+
const results = await db
|
|
451
|
+
.update(schema.chemicals)
|
|
452
|
+
.set({
|
|
453
|
+
chemicalMeta: sql`jsonb_set(COALESCE(${schema.chemicals.chemicalMeta}, '{}'), ${pathArray}::text[], ${valueJson}::jsonb)`,
|
|
454
|
+
updatedAt: new Date()
|
|
455
|
+
})
|
|
456
|
+
.where(whereCondition)
|
|
457
|
+
.returning();
|
|
458
|
+
|
|
459
|
+
return { updatedCount: results.length, updated: results };
|
|
460
|
+
} catch (error) {
|
|
461
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkUpdateProperty', error);
|
|
462
|
+
throw error;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
async getChemicalById(chemicalId) {
|
|
467
|
+
try {
|
|
468
|
+
const db = await this.getDb();
|
|
469
|
+
|
|
470
|
+
const [result] = await db
|
|
471
|
+
.select()
|
|
472
|
+
.from(schema.chemicals)
|
|
473
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
474
|
+
.limit(1);
|
|
475
|
+
|
|
476
|
+
return result || null;
|
|
477
|
+
} catch (error) {
|
|
478
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalById', error);
|
|
479
|
+
throw error;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
async getChemicalBySourceId(sourceId) {
|
|
484
|
+
try {
|
|
485
|
+
const db = await this.getDb();
|
|
486
|
+
|
|
487
|
+
const [result] = await db
|
|
488
|
+
.select()
|
|
489
|
+
.from(schema.chemicals)
|
|
490
|
+
.where(eq(schema.chemicals.sourceId, sourceId))
|
|
491
|
+
.limit(1);
|
|
492
|
+
|
|
493
|
+
return result || null;
|
|
494
|
+
} catch (error) {
|
|
495
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalBySourceId', error);
|
|
496
|
+
throw error;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
async getChemicalsByCAS(casNumber) {
|
|
501
|
+
try {
|
|
502
|
+
const db = await this.getDb();
|
|
503
|
+
|
|
504
|
+
const results = await db
|
|
505
|
+
.select()
|
|
506
|
+
.from(schema.chemicals)
|
|
507
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>'CAS' = ${casNumber} OR ${schema.chemicals.chemicalIdentifiers}->'CAS' ? ${casNumber}`);
|
|
508
|
+
|
|
509
|
+
return results;
|
|
510
|
+
} catch (error) {
|
|
511
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalsByCAS', error);
|
|
512
|
+
throw error;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
async getChemicalsByIdentifier(identifierType, identifierValue) {
|
|
517
|
+
try {
|
|
518
|
+
if (!ALLOWED_IDENTIFIER_TYPES.has(identifierType)) {
|
|
519
|
+
throw new Error(`Invalid identifier type: ${identifierType}`);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
const db = await this.getDb();
|
|
523
|
+
|
|
524
|
+
const results = await db
|
|
525
|
+
.select()
|
|
526
|
+
.from(schema.chemicals)
|
|
527
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>${identifierType} = ${identifierValue} OR ${schema.chemicals.chemicalIdentifiers}->${identifierType} ? ${identifierValue}`);
|
|
528
|
+
|
|
529
|
+
return results;
|
|
530
|
+
} catch (error) {
|
|
531
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalsByIdentifier', error);
|
|
532
|
+
throw error;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
async countByCollection(collectionName) {
|
|
537
|
+
try {
|
|
538
|
+
const db = await this.getDb();
|
|
539
|
+
|
|
540
|
+
const result = await db
|
|
541
|
+
.select({ count: sql`count(*)::int` })
|
|
542
|
+
.from(schema.chemicals)
|
|
543
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]));
|
|
544
|
+
|
|
545
|
+
return { count: result[0].count };
|
|
546
|
+
} catch (error) {
|
|
547
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByCollection', error);
|
|
548
|
+
throw error;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
async countByIdentifier(identifierValue) {
|
|
553
|
+
try {
|
|
554
|
+
const db = await this.getDb();
|
|
555
|
+
|
|
556
|
+
const searchPattern = `%${escapeLikePattern(identifierValue)}%`;
|
|
557
|
+
const result = await db
|
|
558
|
+
.select({ count: sql`count(*)::int` })
|
|
559
|
+
.from(schema.chemicals)
|
|
560
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}::text LIKE ${searchPattern}`);
|
|
561
|
+
|
|
562
|
+
return { count: result[0].count };
|
|
563
|
+
} catch (error) {
|
|
564
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByIdentifier', error);
|
|
565
|
+
throw error;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
async countByCAS(casNumber) {
|
|
570
|
+
try {
|
|
571
|
+
const db = await this.getDb();
|
|
572
|
+
|
|
573
|
+
const result = await db
|
|
574
|
+
.select({ count: sql`count(*)::int` })
|
|
575
|
+
.from(schema.chemicals)
|
|
576
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>'CAS' = ${casNumber} OR ${schema.chemicals.chemicalIdentifiers}->'CAS' ? ${casNumber}`);
|
|
577
|
+
|
|
578
|
+
return { count: result[0].count };
|
|
579
|
+
} catch (error) {
|
|
580
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByCAS', error);
|
|
581
|
+
throw error;
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
async getTotalSynonymCount() {
|
|
586
|
+
try {
|
|
587
|
+
const db = await this.getDb();
|
|
588
|
+
|
|
589
|
+
const result = await db
|
|
590
|
+
.select({ count: sql`sum(array_length(${schema.chemicals.chemicalSynonyms}, 1))::int` })
|
|
591
|
+
.from(schema.chemicals);
|
|
592
|
+
|
|
593
|
+
return { count: result[0].count || 0 };
|
|
594
|
+
} catch (error) {
|
|
595
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getTotalSynonymCount', error);
|
|
596
|
+
throw error;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
async getSynonymCount(synonymTerm) {
|
|
601
|
+
try {
|
|
602
|
+
const db = await this.getDb();
|
|
603
|
+
|
|
604
|
+
const result = await db
|
|
605
|
+
.select({ count: sql`count(*)::int` })
|
|
606
|
+
.from(schema.chemicals)
|
|
607
|
+
.where(arrayContains(schema.chemicals.chemicalSynonyms, [synonymTerm]));
|
|
608
|
+
|
|
609
|
+
return { count: result[0].count };
|
|
610
|
+
} catch (error) {
|
|
611
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getSynonymCount', error);
|
|
612
|
+
throw error;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
async convertIdentifier(fromIdentifier, toIdentifierType) {
|
|
617
|
+
try {
|
|
618
|
+
const db = await this.getDb();
|
|
619
|
+
|
|
620
|
+
const searchPattern = `%${escapeLikePattern(fromIdentifier)}%`;
|
|
621
|
+
const chemicals = await db
|
|
622
|
+
.select()
|
|
623
|
+
.from(schema.chemicals)
|
|
624
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}::text LIKE ${searchPattern}`);
|
|
625
|
+
|
|
626
|
+
if (chemicals.length === 0) {
|
|
627
|
+
return null;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
const chemical = chemicals[0];
|
|
631
|
+
const identifiers = chemical.chemicalIdentifiers || {};
|
|
632
|
+
const toIdentifier = identifiers[toIdentifierType];
|
|
633
|
+
|
|
634
|
+
return {
|
|
635
|
+
fromIdentifier,
|
|
636
|
+
toIdentifierType,
|
|
637
|
+
toIdentifier,
|
|
638
|
+
chemicalId: chemical.chemicalId,
|
|
639
|
+
chemicalName: chemical.chemicalName
|
|
640
|
+
};
|
|
641
|
+
} catch (error) {
|
|
642
|
+
logError('pegasus-sdk', 'ChemicalsService', 'convertIdentifier', error);
|
|
643
|
+
throw error;
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
async convertIdentifiersBatch(fromIdentifiers, toIdentifierType) {
|
|
648
|
+
try {
|
|
649
|
+
const conversions = await Promise.all(
|
|
650
|
+
fromIdentifiers.map(fromIdentifier =>
|
|
651
|
+
this.convertIdentifier(fromIdentifier, toIdentifierType)
|
|
652
|
+
)
|
|
653
|
+
);
|
|
654
|
+
|
|
655
|
+
return conversions.filter(conversion => conversion !== null);
|
|
656
|
+
} catch (error) {
|
|
657
|
+
logError('pegasus-sdk', 'ChemicalsService', 'convertIdentifiersBatch', error);
|
|
658
|
+
throw error;
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Search for chemicals by name using OpenSearch
|
|
664
|
+
* @param {string} searchTerm - Name to search for
|
|
665
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
666
|
+
* @returns {Promise<Object>} Search results
|
|
667
|
+
*/
|
|
668
|
+
async searchByName(searchTerm, limit = 10) {
|
|
669
|
+
if (!searchTerm) {
|
|
670
|
+
return { results: [] };
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
try {
|
|
674
|
+
const opensearchClient = this.connection.getOpenSearchClient();
|
|
675
|
+
const indexName = this.connection.getOpenSearchIndex();
|
|
676
|
+
|
|
677
|
+
const response = await opensearchClient.search({
|
|
678
|
+
index: indexName,
|
|
679
|
+
body: {
|
|
680
|
+
size: limit,
|
|
681
|
+
query: {
|
|
682
|
+
bool: {
|
|
683
|
+
should: [
|
|
684
|
+
{ term: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
685
|
+
{ prefix: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
686
|
+
{ term: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
687
|
+
{ prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
|
|
688
|
+
],
|
|
689
|
+
minimum_should_match: 1
|
|
690
|
+
}
|
|
691
|
+
},
|
|
692
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
693
|
+
}
|
|
694
|
+
});
|
|
695
|
+
|
|
696
|
+
const hits = response.body?.hits?.hits || [];
|
|
697
|
+
const results = hits.map((hit) => ({
|
|
698
|
+
id: hit._source.postgres_id,
|
|
699
|
+
name: hit._source.chemical_name,
|
|
700
|
+
cas: hit._source.cas_numbers || [],
|
|
701
|
+
identifiers: hit._source.identifier_values || [],
|
|
702
|
+
synonyms: hit._source.synonyms || [],
|
|
703
|
+
score: hit._score
|
|
704
|
+
}));
|
|
705
|
+
|
|
706
|
+
return { results };
|
|
707
|
+
} catch (error) {
|
|
708
|
+
logError('pegasus-sdk', 'ChemicalsService', 'searchByName', error);
|
|
709
|
+
throw error;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* Search for chemicals by synonym using OpenSearch
|
|
715
|
+
* @param {string} synonymTerm - Synonym to search for
|
|
716
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
717
|
+
* @returns {Promise<Object>} Search results
|
|
718
|
+
*/
|
|
719
|
+
async searchBySynonym(synonymTerm, limit = 10) {
|
|
720
|
+
if (!synonymTerm) {
|
|
721
|
+
return { results: [] };
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
try {
|
|
725
|
+
const opensearchClient = this.connection.getOpenSearchClient();
|
|
726
|
+
const indexName = this.connection.getOpenSearchIndex();
|
|
727
|
+
|
|
728
|
+
const response = await opensearchClient.search({
|
|
729
|
+
index: indexName,
|
|
730
|
+
body: {
|
|
731
|
+
size: limit,
|
|
732
|
+
query: {
|
|
733
|
+
bool: {
|
|
734
|
+
should: [
|
|
735
|
+
{ term: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
736
|
+
{ prefix: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
737
|
+
{ term: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
738
|
+
{ prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
|
|
739
|
+
],
|
|
740
|
+
minimum_should_match: 1
|
|
741
|
+
}
|
|
742
|
+
},
|
|
743
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
744
|
+
}
|
|
745
|
+
});
|
|
746
|
+
|
|
747
|
+
const hits = response.body?.hits?.hits || [];
|
|
748
|
+
const results = hits.map((hit) => ({
|
|
749
|
+
id: hit._source.postgres_id,
|
|
750
|
+
name: hit._source.chemical_name,
|
|
751
|
+
cas: hit._source.cas_numbers || [],
|
|
752
|
+
identifiers: hit._source.identifier_values || [],
|
|
753
|
+
synonyms: hit._source.synonyms || [],
|
|
754
|
+
score: hit._score
|
|
755
|
+
}));
|
|
756
|
+
|
|
757
|
+
return { results };
|
|
758
|
+
} catch (error) {
|
|
759
|
+
logError('pegasus-sdk', 'ChemicalsService', 'searchBySynonym', error);
|
|
760
|
+
throw error;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
async countAll() {
|
|
765
|
+
try {
|
|
766
|
+
const db = await this.getDb();
|
|
767
|
+
const result = await db
|
|
768
|
+
.select({ count: sql`count(*)::int` })
|
|
769
|
+
.from(schema.chemicals);
|
|
770
|
+
return { count: result[0].count };
|
|
771
|
+
} catch (error) {
|
|
772
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countAll', error);
|
|
773
|
+
throw error;
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
async findChemicalsWithoutDocuments(collectionName, searchTerm, pageSize = 100) {
|
|
778
|
+
try {
|
|
779
|
+
const db = await this.getDb();
|
|
780
|
+
|
|
781
|
+
let whereConditions = [];
|
|
782
|
+
|
|
783
|
+
if (collectionName) {
|
|
784
|
+
whereConditions.push(arrayContains(schema.chemicals.chemicalCategories, [collectionName]));
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
if (searchTerm) {
|
|
788
|
+
const searchPattern = `%${escapeLikePattern(searchTerm)}%`;
|
|
789
|
+
whereConditions.push(sql`${schema.chemicals.chemicalName} ILIKE ${searchPattern}`);
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
const whereClause = whereConditions.length > 0 ? and(...whereConditions) : undefined;
|
|
793
|
+
|
|
794
|
+
const results = await db
|
|
795
|
+
.select()
|
|
796
|
+
.from(schema.chemicals)
|
|
797
|
+
.where(whereClause)
|
|
798
|
+
.limit(pageSize);
|
|
799
|
+
|
|
800
|
+
return results;
|
|
801
|
+
} catch (error) {
|
|
802
|
+
logError('pegasus-sdk', 'ChemicalsService', 'findChemicalsWithoutDocuments', error);
|
|
803
|
+
throw error;
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
async countChemicalsWithoutDocuments(collectionName) {
|
|
808
|
+
try {
|
|
809
|
+
const db = await this.getDb();
|
|
810
|
+
|
|
811
|
+
const whereClause = collectionName
|
|
812
|
+
? arrayContains(schema.chemicals.chemicalCategories, [collectionName])
|
|
813
|
+
: undefined;
|
|
814
|
+
|
|
815
|
+
const result = await db
|
|
816
|
+
.select({ count: sql`count(*)::int` })
|
|
817
|
+
.from(schema.chemicals)
|
|
818
|
+
.where(whereClause);
|
|
819
|
+
|
|
820
|
+
return { count: result[0].count };
|
|
821
|
+
} catch (error) {
|
|
822
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countChemicalsWithoutDocuments', error);
|
|
823
|
+
throw error;
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
_buildEsHandlers() {
|
|
828
|
+
return {
|
|
829
|
+
index: async (params) => {
|
|
830
|
+
const chemical = params.body;
|
|
831
|
+
const result = await this.createChemical(chemical);
|
|
832
|
+
|
|
833
|
+
return {
|
|
834
|
+
_index: params.index,
|
|
835
|
+
_id: result.chemicalId,
|
|
836
|
+
_version: 1,
|
|
837
|
+
result: 'created',
|
|
838
|
+
_source: result
|
|
839
|
+
};
|
|
840
|
+
},
|
|
841
|
+
|
|
842
|
+
bulk: async (params) => {
|
|
843
|
+
const operations = params.body || params.operations;
|
|
844
|
+
|
|
845
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Starting bulk operation with ${operations?.length || 0} total operations`);
|
|
846
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Params index: ${params.index}`);
|
|
847
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Operations array type: ${Array.isArray(operations) ? 'array' : typeof operations}`);
|
|
848
|
+
|
|
849
|
+
const cdiDocuments = [];
|
|
850
|
+
let cdiOpCount = 0;
|
|
851
|
+
let otherOpCount = 0;
|
|
852
|
+
|
|
853
|
+
for (let i = 0; i < operations.length; i++) {
|
|
854
|
+
const op = operations[i];
|
|
855
|
+
const isIndexOp = !!(op.index || op.create);
|
|
856
|
+
const indexName = op.index?._index || op.create?._index || op.delete?._index || op.update?._index;
|
|
857
|
+
|
|
858
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Op[${i}]: action=${Object.keys(op)[0] || 'unknown'}, index=${indexName}`);
|
|
859
|
+
|
|
860
|
+
if ((op.index || op.create) &&
|
|
861
|
+
(op.index?._index === 'chemical_data_index' || op.create?._index === 'chemical_data_index')) {
|
|
862
|
+
const doc = operations[i + 1];
|
|
863
|
+
const sourceId = op.index?._id || op.create?._id;
|
|
864
|
+
|
|
865
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Found CDI entry: sourceId=${sourceId}, hasDoc=${!!doc}`);
|
|
866
|
+
|
|
867
|
+
if (doc && sourceId) {
|
|
868
|
+
const cdiDoc = {
|
|
869
|
+
source_id: sourceId,
|
|
870
|
+
chemical_name: doc.chemical_primary_name || (doc.chemical_names && doc.chemical_names[0]) || null,
|
|
871
|
+
chemical_meta: doc.chemical_meta || {},
|
|
872
|
+
chemical_identifiers: doc.chemical_identifiers || {},
|
|
873
|
+
chemical_synonyms: doc.chemical_synonyms || [],
|
|
874
|
+
chemical_categories: doc.chemical_categories || [],
|
|
875
|
+
created_at: doc.chemical_created_at ? (typeof doc.chemical_created_at === 'string' ? new Date(doc.chemical_created_at) : doc.chemical_created_at) : new Date(),
|
|
876
|
+
updated_at: doc.chemical_updated_at ? (typeof doc.chemical_updated_at === 'string' ? new Date(doc.chemical_updated_at) : doc.chemical_updated_at) : new Date()
|
|
877
|
+
};
|
|
878
|
+
cdiDocuments.push(cdiDoc);
|
|
879
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Extracted CDI doc: ${JSON.stringify({ source_id: cdiDoc.source_id, chemical_name: cdiDoc.chemical_name })}`);
|
|
880
|
+
i++;
|
|
881
|
+
cdiOpCount++;
|
|
882
|
+
} else {
|
|
883
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] CDI entry incomplete: sourceId=${sourceId}, doc=${!!doc}`);
|
|
884
|
+
}
|
|
885
|
+
} else {
|
|
886
|
+
otherOpCount++;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Scan complete: ${cdiOpCount} CDI docs found, ${otherOpCount} other operations skipped`);
|
|
891
|
+
|
|
892
|
+
if (cdiDocuments.length === 0) {
|
|
893
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] No CDI documents to index, returning empty no-op response`);
|
|
894
|
+
return { took: 0, errors: false, items: [] };
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Calling bulkIndexFielded with ${cdiDocuments.length} CDI documents`);
|
|
898
|
+
|
|
899
|
+
try {
|
|
900
|
+
const result = await this.bulkIndexFielded(cdiDocuments);
|
|
901
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] bulkIndexFielded returned: indexed=${result.indexed}, errors=${result.errors.length}`);
|
|
902
|
+
|
|
903
|
+
if (result.errors.length > 0) {
|
|
904
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Errors during bulk indexing', result.errors);
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
return {
|
|
908
|
+
took: 1,
|
|
909
|
+
errors: result.errors.length > 0,
|
|
910
|
+
items: result.results.map((res, idx) => ({
|
|
911
|
+
index: {
|
|
912
|
+
_index: 'chemical_data_index',
|
|
913
|
+
_id: cdiDocuments[idx].source_id,
|
|
914
|
+
status: res.success ? 200 : 400,
|
|
915
|
+
result: res.success ? 'created' : 'error',
|
|
916
|
+
...(res.success ? {} : { error: { type: 'mapper_parsing_exception', reason: res.error } })
|
|
917
|
+
}
|
|
918
|
+
}))
|
|
919
|
+
};
|
|
920
|
+
} catch (error) {
|
|
921
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
|
|
922
|
+
throw error;
|
|
923
|
+
}
|
|
924
|
+
},
|
|
925
|
+
|
|
926
|
+
get: async (params) => {
|
|
927
|
+
const result = await this.getChemicalBySourceId(params.id);
|
|
928
|
+
|
|
929
|
+
if (!result) {
|
|
930
|
+
return {
|
|
931
|
+
_index: params.index,
|
|
932
|
+
_id: params.id,
|
|
933
|
+
found: false
|
|
934
|
+
};
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
return {
|
|
938
|
+
_index: params.index,
|
|
939
|
+
_id: params.id,
|
|
940
|
+
_version: 1,
|
|
941
|
+
found: true,
|
|
942
|
+
_source: result
|
|
943
|
+
};
|
|
944
|
+
},
|
|
945
|
+
|
|
946
|
+
update: async (params) => {
|
|
947
|
+
const result = await this.updateChemical(params.id, params.body);
|
|
948
|
+
|
|
949
|
+
return {
|
|
950
|
+
_index: params.index,
|
|
951
|
+
_id: params.id,
|
|
952
|
+
_version: 2,
|
|
953
|
+
result: result ? 'updated' : 'noop',
|
|
954
|
+
_source: result
|
|
955
|
+
};
|
|
956
|
+
},
|
|
957
|
+
|
|
958
|
+
delete: async (params) => {
|
|
959
|
+
if (params.index === 'synonym_lookup_index') {
|
|
960
|
+
return { _index: params.index, _id: params.id, result: 'not_found' };
|
|
961
|
+
}
|
|
962
|
+
const result = await this.deleteBySourceId(params.id);
|
|
963
|
+
|
|
964
|
+
return {
|
|
965
|
+
_index: params.index,
|
|
966
|
+
_id: params.id,
|
|
967
|
+
result: result ? 'deleted' : 'not_found'
|
|
968
|
+
};
|
|
969
|
+
},
|
|
970
|
+
|
|
971
|
+
deleteByQuery: async (params) => {
|
|
972
|
+
const sourceId = params.body?.query?.term?.chemical_set_identifier
|
|
973
|
+
|| params.body?.query?.term?.source_id;
|
|
974
|
+
if (!sourceId) {
|
|
975
|
+
return { deleted: 0, failures: [] };
|
|
976
|
+
}
|
|
977
|
+
const result = await this.deleteBySourceId(sourceId);
|
|
978
|
+
return {
|
|
979
|
+
deleted: result ? 1 : 0,
|
|
980
|
+
failures: []
|
|
981
|
+
};
|
|
982
|
+
},
|
|
983
|
+
|
|
984
|
+
search: async (params) => {
|
|
985
|
+
let searchTerm = '';
|
|
986
|
+
let limit = params.body?.size || 10;
|
|
987
|
+
|
|
988
|
+
if (params.index === 'synonym_lookup_index') {
|
|
989
|
+
const query = params.body?.query;
|
|
990
|
+
searchTerm = query?.match?.chemical_name ||
|
|
991
|
+
query?.term?.chemical_name ||
|
|
992
|
+
query?.query_string?.query || '';
|
|
993
|
+
const searchResults = await this.searchBySynonym(searchTerm, limit);
|
|
994
|
+
|
|
995
|
+
return {
|
|
996
|
+
took: 1,
|
|
997
|
+
timed_out: false,
|
|
998
|
+
_shards: {
|
|
999
|
+
total: 1,
|
|
1000
|
+
successful: 1,
|
|
1001
|
+
skipped: 0,
|
|
1002
|
+
failed: 0
|
|
1003
|
+
},
|
|
1004
|
+
hits: {
|
|
1005
|
+
total: {
|
|
1006
|
+
value: searchResults.results.length,
|
|
1007
|
+
relation: 'eq'
|
|
1008
|
+
},
|
|
1009
|
+
max_score: searchResults.results[0]?.score || 0,
|
|
1010
|
+
hits: searchResults.results.map(result => ({
|
|
1011
|
+
_index: params.index,
|
|
1012
|
+
_id: result.id,
|
|
1013
|
+
_score: result.score,
|
|
1014
|
+
_source: {
|
|
1015
|
+
postgres_id: result.id,
|
|
1016
|
+
chemical_name: result.name,
|
|
1017
|
+
cas_numbers: result.cas,
|
|
1018
|
+
identifier_values: result.identifiers,
|
|
1019
|
+
synonyms: result.synonyms
|
|
1020
|
+
}
|
|
1021
|
+
}))
|
|
1022
|
+
}
|
|
1023
|
+
};
|
|
1024
|
+
} else {
|
|
1025
|
+
const query = params.body?.query;
|
|
1026
|
+
searchTerm = query?.match?.chemical_name ||
|
|
1027
|
+
query?.term?.chemical_name ||
|
|
1028
|
+
query?.query_string?.query || '';
|
|
1029
|
+
const searchResults = await this.searchByName(searchTerm, limit);
|
|
1030
|
+
|
|
1031
|
+
return {
|
|
1032
|
+
took: 1,
|
|
1033
|
+
timed_out: false,
|
|
1034
|
+
_shards: {
|
|
1035
|
+
total: 1,
|
|
1036
|
+
successful: 1,
|
|
1037
|
+
skipped: 0,
|
|
1038
|
+
failed: 0
|
|
1039
|
+
},
|
|
1040
|
+
hits: {
|
|
1041
|
+
total: {
|
|
1042
|
+
value: searchResults.results.length,
|
|
1043
|
+
relation: 'eq'
|
|
1044
|
+
},
|
|
1045
|
+
max_score: searchResults.results[0]?.score || 0,
|
|
1046
|
+
hits: searchResults.results.map(result => ({
|
|
1047
|
+
_index: params.index,
|
|
1048
|
+
_id: result.id,
|
|
1049
|
+
_score: result.score,
|
|
1050
|
+
_source: {
|
|
1051
|
+
postgres_id: result.id,
|
|
1052
|
+
chemical_name: result.name,
|
|
1053
|
+
cas_numbers: result.cas,
|
|
1054
|
+
identifier_values: result.identifiers,
|
|
1055
|
+
synonyms: result.synonyms
|
|
1056
|
+
}
|
|
1057
|
+
}))
|
|
1058
|
+
}
|
|
1059
|
+
};
|
|
1060
|
+
}
|
|
1061
|
+
},
|
|
1062
|
+
|
|
1063
|
+
count: async (params) => {
|
|
1064
|
+
if (params.index === 'synonym_lookup_index') {
|
|
1065
|
+
return await this.getTotalSynonymCount();
|
|
1066
|
+
}
|
|
1067
|
+
return await this.countAll();
|
|
1068
|
+
}
|
|
1069
|
+
};
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
registerElasticsearchHandlers(elasticsearchService) {
|
|
1073
|
+
const configurablePatterns = this.connection.config.indexRoutes?.chemicals || ['chemicals*'];
|
|
1074
|
+
const legacyPatterns = ['synonym_lookup_index', 'chemical_data_index', 'chemical_converter_index'];
|
|
1075
|
+
const allPatterns = [...new Set([...configurablePatterns, ...legacyPatterns])];
|
|
1076
|
+
const handlers = this._buildEsHandlers();
|
|
1077
|
+
allPatterns.forEach(pattern => {
|
|
1078
|
+
elasticsearchService.registerIndexRoute(pattern, handlers);
|
|
1079
|
+
});
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1062
1083
|
module.exports = ChemicalsService;
|