@toxplanet/pegasus-sdk 1.1.16 → 1.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/environment.dev.js +3 -3
- package/lib/chemicals.js +1087 -1028
- package/lib/connection.js +87 -1
- package/lib/db/schema.js +27 -27
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -1,1029 +1,1088 @@
|
|
|
1
|
-
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
|
-
const { getDrizzle, schema } = require('./db');
|
|
3
|
-
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
|
-
const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
|
|
5
|
-
|
|
6
|
-
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
7
|
-
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
8
|
-
const SEARCH_BOOST_EXACT_SECONDARY = 30;
|
|
9
|
-
const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
10
|
-
|
|
11
|
-
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
|
|
12
|
-
|
|
13
|
-
function escapeLikePattern(value) {
|
|
14
|
-
return value.replace(/[%_\\]/g, '\\$&');
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
class ChemicalsService {
|
|
18
|
-
constructor(connection) {
|
|
19
|
-
this.connection = connection;
|
|
20
|
-
this.db = null;
|
|
21
|
-
this.sqsClient = null;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
getDb() {
|
|
25
|
-
if (!this.db) {
|
|
26
|
-
this.db = getDrizzle(this.connection.pgPool);
|
|
27
|
-
}
|
|
28
|
-
return this.db;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
async sendSqlWriteFailure({ sql, parameters, error, retryCount, failedAt }) {
|
|
32
|
-
try {
|
|
33
|
-
const region = process.env.AWS_REGION || this.connection.region;
|
|
34
|
-
const { awsAccountId, environment } = this.connection.config;
|
|
35
|
-
const defaultQueueUrl = awsAccountId
|
|
36
|
-
? `https://sqs.${region}.amazonaws.com/${awsAccountId}/cr-pegasus-failed-items-${environment}`
|
|
37
|
-
: null;
|
|
38
|
-
const queueUrl = process.env.SQS_FAILED_ITEMS_QUEUE || defaultQueueUrl;
|
|
39
|
-
|
|
40
|
-
if (!queueUrl) {
|
|
41
|
-
logError('pegasus-sdk', 'sendSqlWriteFailure', 'No SQS queue URL available: set SQS_FAILED_ITEMS_QUEUE or provide awsAccountId in config');
|
|
42
|
-
return false;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
logInfo('pegasus-sdk', `[sendSqlWriteFailure] Using queue: ${queueUrl}${process.env.SQS_FAILED_ITEMS_QUEUE ? ' (from env)' : ' (default)'}`);
|
|
46
|
-
|
|
47
|
-
if (!this.sqsClient) {
|
|
48
|
-
this.sqsClient = new SQSClient({ region });
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
const message = {
|
|
52
|
-
MessageType: 'SqlWriteFailure',
|
|
53
|
-
SourceService: this.connection.config.sourceService || 'pegasus-sdk',
|
|
54
|
-
Timestamp: (failedAt || new Date()).toISOString(),
|
|
55
|
-
Sql: sql,
|
|
56
|
-
Parameters: parameters,
|
|
57
|
-
OriginalError: error.message,
|
|
58
|
-
RetryCount: retryCount
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
const command = new SendMessageCommand({
|
|
62
|
-
QueueUrl: queueUrl,
|
|
63
|
-
MessageBody: JSON.stringify(message)
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
const response = await this.sqsClient.send(command);
|
|
67
|
-
logInfo('pegasus-sdk', `[sendSqlWriteFailure] SqlWriteFailure posted to SQS: MessageId=${response.MessageId}, RetryCount=${retryCount}`);
|
|
68
|
-
return true;
|
|
69
|
-
} catch (sqsError) {
|
|
70
|
-
logError('pegasus-sdk', 'sendSqlWriteFailure', 'Failed to post SqlWriteFailure to SQS', sqsError);
|
|
71
|
-
return false;
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
_buildChemicalUpsertSql(chemical) {
|
|
76
|
-
const sql = [
|
|
77
|
-
'INSERT INTO chemicals (source_id, chemical_name, chemical_meta, chemical_identifiers, chemical_synonyms, chemical_categories, created_at, updated_at)',
|
|
78
|
-
'VALUES (@source_id, @chemical_name, @chemical_meta::jsonb, @chemical_identifiers::jsonb, @chemical_synonyms, @chemical_categories, @created_at, @updated_at)',
|
|
79
|
-
'ON CONFLICT (source_id) DO UPDATE SET',
|
|
80
|
-
' chemical_name = @chemical_name,',
|
|
81
|
-
' chemical_meta = @chemical_meta::jsonb,',
|
|
82
|
-
' chemical_identifiers = @chemical_identifiers::jsonb,',
|
|
83
|
-
' chemical_synonyms = @chemical_synonyms,',
|
|
84
|
-
' chemical_categories = @chemical_categories,',
|
|
85
|
-
' updated_at = @updated_at'
|
|
86
|
-
].join('\n');
|
|
87
|
-
|
|
88
|
-
const serializeDate = (d) => d instanceof Date ? d.toISOString() : d;
|
|
89
|
-
|
|
90
|
-
const parameters = {
|
|
91
|
-
'@source_id': chemical.sourceId,
|
|
92
|
-
'@chemical_name': chemical.chemicalName,
|
|
93
|
-
'@chemical_meta': JSON.stringify(chemical.chemicalMeta ?? {}),
|
|
94
|
-
'@chemical_identifiers': JSON.stringify(chemical.chemicalIdentifiers ?? {}),
|
|
95
|
-
'@chemical_synonyms': JSON.stringify(chemical.chemicalSynonyms ?? []),
|
|
96
|
-
'@chemical_categories': JSON.stringify(chemical.chemicalCategories ?? []),
|
|
97
|
-
'@created_at': serializeDate(chemical.createdAt),
|
|
98
|
-
'@updated_at': serializeDate(chemical.updatedAt)
|
|
99
|
-
};
|
|
100
|
-
|
|
101
|
-
return { sql, parameters };
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
if (!
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
.
|
|
299
|
-
.
|
|
300
|
-
.
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
const
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
const
|
|
577
|
-
|
|
578
|
-
const
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
const
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
:
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
if (
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
}
|
|
1028
|
-
|
|
1
|
+
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
|
+
const { getDrizzle, schema } = require('./db');
|
|
3
|
+
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
|
+
const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
|
|
5
|
+
|
|
6
|
+
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
7
|
+
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
8
|
+
const SEARCH_BOOST_EXACT_SECONDARY = 30;
|
|
9
|
+
const SEARCH_BOOST_PREFIX_SECONDARY = 10;
|
|
10
|
+
|
|
11
|
+
const ALLOWED_IDENTIFIER_TYPES = new Set(['CAS', 'SMILES', 'InChI', 'InChIKey', 'PubChem', 'DTXSID', 'EINECS', 'EC']);
|
|
12
|
+
|
|
13
|
+
function escapeLikePattern(value) {
|
|
14
|
+
return value.replace(/[%_\\]/g, '\\$&');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
class ChemicalsService {
|
|
18
|
+
constructor(connection) {
|
|
19
|
+
this.connection = connection;
|
|
20
|
+
this.db = null;
|
|
21
|
+
this.sqsClient = null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
getDb() {
|
|
25
|
+
if (!this.db) {
|
|
26
|
+
this.db = getDrizzle(this.connection.pgPool);
|
|
27
|
+
}
|
|
28
|
+
return this.db;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async sendSqlWriteFailure({ sql, parameters, error, retryCount, failedAt }) {
|
|
32
|
+
try {
|
|
33
|
+
const region = process.env.AWS_REGION || this.connection.region;
|
|
34
|
+
const { awsAccountId, environment } = this.connection.config;
|
|
35
|
+
const defaultQueueUrl = awsAccountId
|
|
36
|
+
? `https://sqs.${region}.amazonaws.com/${awsAccountId}/cr-pegasus-failed-items-${environment}`
|
|
37
|
+
: null;
|
|
38
|
+
const queueUrl = process.env.SQS_FAILED_ITEMS_QUEUE || defaultQueueUrl;
|
|
39
|
+
|
|
40
|
+
if (!queueUrl) {
|
|
41
|
+
logError('pegasus-sdk', 'sendSqlWriteFailure', 'No SQS queue URL available: set SQS_FAILED_ITEMS_QUEUE or provide awsAccountId in config');
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
logInfo('pegasus-sdk', `[sendSqlWriteFailure] Using queue: ${queueUrl}${process.env.SQS_FAILED_ITEMS_QUEUE ? ' (from env)' : ' (default)'}`);
|
|
46
|
+
|
|
47
|
+
if (!this.sqsClient) {
|
|
48
|
+
this.sqsClient = new SQSClient({ region });
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const message = {
|
|
52
|
+
MessageType: 'SqlWriteFailure',
|
|
53
|
+
SourceService: this.connection.config.sourceService || 'pegasus-sdk',
|
|
54
|
+
Timestamp: (failedAt || new Date()).toISOString(),
|
|
55
|
+
Sql: sql,
|
|
56
|
+
Parameters: parameters,
|
|
57
|
+
OriginalError: error.message,
|
|
58
|
+
RetryCount: retryCount
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const command = new SendMessageCommand({
|
|
62
|
+
QueueUrl: queueUrl,
|
|
63
|
+
MessageBody: JSON.stringify(message)
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
const response = await this.sqsClient.send(command);
|
|
67
|
+
logInfo('pegasus-sdk', `[sendSqlWriteFailure] SqlWriteFailure posted to SQS: MessageId=${response.MessageId}, RetryCount=${retryCount}`);
|
|
68
|
+
return true;
|
|
69
|
+
} catch (sqsError) {
|
|
70
|
+
logError('pegasus-sdk', 'sendSqlWriteFailure', 'Failed to post SqlWriteFailure to SQS', sqsError);
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
_buildChemicalUpsertSql(chemical) {
|
|
76
|
+
const sql = [
|
|
77
|
+
'INSERT INTO chemicals (source_id, chemical_name, chemical_meta, chemical_identifiers, chemical_synonyms, chemical_categories, created_at, updated_at)',
|
|
78
|
+
'VALUES (@source_id, @chemical_name, @chemical_meta::jsonb, @chemical_identifiers::jsonb, @chemical_synonyms, @chemical_categories, @created_at, @updated_at)',
|
|
79
|
+
'ON CONFLICT (source_id) DO UPDATE SET',
|
|
80
|
+
' chemical_name = @chemical_name,',
|
|
81
|
+
' chemical_meta = @chemical_meta::jsonb,',
|
|
82
|
+
' chemical_identifiers = @chemical_identifiers::jsonb,',
|
|
83
|
+
' chemical_synonyms = @chemical_synonyms,',
|
|
84
|
+
' chemical_categories = @chemical_categories,',
|
|
85
|
+
' updated_at = @updated_at'
|
|
86
|
+
].join('\n');
|
|
87
|
+
|
|
88
|
+
const serializeDate = (d) => d instanceof Date ? d.toISOString() : d;
|
|
89
|
+
|
|
90
|
+
const parameters = {
|
|
91
|
+
'@source_id': chemical.sourceId,
|
|
92
|
+
'@chemical_name': chemical.chemicalName,
|
|
93
|
+
'@chemical_meta': JSON.stringify(chemical.chemicalMeta ?? {}),
|
|
94
|
+
'@chemical_identifiers': JSON.stringify(chemical.chemicalIdentifiers ?? {}),
|
|
95
|
+
'@chemical_synonyms': JSON.stringify(chemical.chemicalSynonyms ?? []),
|
|
96
|
+
'@chemical_categories': JSON.stringify(chemical.chemicalCategories ?? []),
|
|
97
|
+
'@created_at': serializeDate(chemical.createdAt),
|
|
98
|
+
'@updated_at': serializeDate(chemical.updatedAt)
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
return { sql, parameters };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
_buildDebugSql(chemical) {
|
|
105
|
+
const esc = (s) => `'${String(s ?? '').replace(/'/g, "''")}'`;
|
|
106
|
+
const escJson = (v) => `'${JSON.stringify(v ?? {}).replace(/'/g, "''")}'`;
|
|
107
|
+
const escArr = (arr) => {
|
|
108
|
+
if (!Array.isArray(arr) || arr.length === 0) return `ARRAY[]::text[]`;
|
|
109
|
+
return `ARRAY[${arr.map(s => esc(s)).join(', ')}]`;
|
|
110
|
+
};
|
|
111
|
+
const escDate = (d) => esc(d instanceof Date ? d.toISOString() : (d ?? new Date().toISOString()));
|
|
112
|
+
|
|
113
|
+
return [
|
|
114
|
+
`INSERT INTO chemicals`,
|
|
115
|
+
` (source_id, chemical_name, chemical_meta, chemical_identifiers, chemical_synonyms, chemical_categories, created_at, updated_at)`,
|
|
116
|
+
`VALUES (`,
|
|
117
|
+
` ${esc(chemical.sourceId)},`,
|
|
118
|
+
` ${esc(chemical.chemicalName)},`,
|
|
119
|
+
` ${escJson(chemical.chemicalMeta)}::jsonb,`,
|
|
120
|
+
` ${escJson(chemical.chemicalIdentifiers)}::jsonb,`,
|
|
121
|
+
` ${escArr(chemical.chemicalSynonyms)},`,
|
|
122
|
+
` ${escArr(chemical.chemicalCategories)},`,
|
|
123
|
+
` ${escDate(chemical.createdAt)},`,
|
|
124
|
+
` ${escDate(chemical.updatedAt)}`,
|
|
125
|
+
`)`,
|
|
126
|
+
`ON CONFLICT (source_id) DO UPDATE SET`,
|
|
127
|
+
` chemical_name = ${esc(chemical.chemicalName)},`,
|
|
128
|
+
` chemical_meta = ${escJson(chemical.chemicalMeta)}::jsonb,`,
|
|
129
|
+
` chemical_identifiers = ${escJson(chemical.chemicalIdentifiers)}::jsonb,`,
|
|
130
|
+
` chemical_synonyms = ${escArr(chemical.chemicalSynonyms)},`,
|
|
131
|
+
` chemical_categories = ${escArr(chemical.chemicalCategories)},`,
|
|
132
|
+
` updated_at = NOW();`
|
|
133
|
+
].join('\n');
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async bulkIndexFielded(documents) {
|
|
137
|
+
try {
|
|
138
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
|
|
139
|
+
|
|
140
|
+
if (!documents || documents.length === 0) {
|
|
141
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] No documents provided, returning empty result`);
|
|
142
|
+
return { indexed: 0, errors: [], results: [] };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Proactively validate the connection before any real query fires.
|
|
146
|
+
// If idle too long, this reconnects first so the real query never faces
|
|
147
|
+
// the full connectionTimeoutMillis wait on a stale pool.
|
|
148
|
+
const reconnected = await this.connection.ensureConnected();
|
|
149
|
+
if (reconnected) {
|
|
150
|
+
this.db = null; // force getDb() to bind to the fresh pool
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const db = this.getDb();
|
|
154
|
+
const results = [];
|
|
155
|
+
const errors = [];
|
|
156
|
+
|
|
157
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Database connection established`);
|
|
158
|
+
|
|
159
|
+
for (let i = 0; i < documents.length; i++) {
|
|
160
|
+
const doc = documents[i];
|
|
161
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
|
|
162
|
+
|
|
163
|
+
const parseDate = (dateValue) => {
|
|
164
|
+
if (!dateValue) return new Date();
|
|
165
|
+
if (dateValue instanceof Date) return dateValue;
|
|
166
|
+
if (typeof dateValue === 'string') return new Date(dateValue);
|
|
167
|
+
return new Date();
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
const chemical = {
|
|
171
|
+
sourceId: doc.source_id || doc._id,
|
|
172
|
+
chemicalName: doc.chemical_name || doc.name,
|
|
173
|
+
chemicalMeta: doc.chemical_meta || {},
|
|
174
|
+
chemicalIdentifiers: doc.chemical_identifiers || {},
|
|
175
|
+
chemicalSynonyms: doc.chemical_synonyms || [],
|
|
176
|
+
chemicalCategories: doc.chemical_categories || [],
|
|
177
|
+
createdAt: parseDate(doc.created_at),
|
|
178
|
+
updatedAt: parseDate(doc.updated_at),
|
|
179
|
+
...(doc.imported_at && { importedAt: doc.imported_at }),
|
|
180
|
+
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
|
|
184
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] DEBUG SQL for document ${i}:\n${this._buildDebugSql(chemical)}`);
|
|
185
|
+
|
|
186
|
+
const isConnectionError = (err) =>
|
|
187
|
+
err.message?.toLowerCase().includes('timeout') ||
|
|
188
|
+
err.message?.toLowerCase().includes('connection') ||
|
|
189
|
+
err.code === 'ECONNREFUSED' ||
|
|
190
|
+
err.code === 'ETIMEDOUT';
|
|
191
|
+
|
|
192
|
+
const attemptUpsert = () =>
|
|
193
|
+
db.insert(schema.chemicals)
|
|
194
|
+
.values(chemical)
|
|
195
|
+
.onConflictDoUpdate({
|
|
196
|
+
target: schema.chemicals.sourceId,
|
|
197
|
+
set: {
|
|
198
|
+
chemicalName: chemical.chemicalName,
|
|
199
|
+
chemicalMeta: chemical.chemicalMeta,
|
|
200
|
+
chemicalIdentifiers: chemical.chemicalIdentifiers,
|
|
201
|
+
chemicalSynonyms: chemical.chemicalSynonyms,
|
|
202
|
+
chemicalCategories: chemical.chemicalCategories,
|
|
203
|
+
updatedAt: new Date()
|
|
204
|
+
}
|
|
205
|
+
})
|
|
206
|
+
.returning({
|
|
207
|
+
chemicalId: schema.chemicals.chemicalId,
|
|
208
|
+
sourceId: schema.chemicals.sourceId
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
let lastError = null;
|
|
212
|
+
let retryCount = 0;
|
|
213
|
+
const failedAt = new Date();
|
|
214
|
+
|
|
215
|
+
try {
|
|
216
|
+
const [result] = await attemptUpsert();
|
|
217
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
|
|
218
|
+
this.connection.recordActivity();
|
|
219
|
+
results.push({ index: i, success: true, result });
|
|
220
|
+
continue;
|
|
221
|
+
} catch (firstErr) {
|
|
222
|
+
lastError = firstErr;
|
|
223
|
+
|
|
224
|
+
if (isConnectionError(firstErr)) {
|
|
225
|
+
// Stale pool — rebuild the connection and try once more before queuing
|
|
226
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} connection error (${firstErr.message}), reconnecting pool and retrying`);
|
|
227
|
+
try {
|
|
228
|
+
await this.connection.reconnect();
|
|
229
|
+
this.db = null; // force getDb() to bind to the new pool
|
|
230
|
+
const [result] = await attemptUpsert();
|
|
231
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully after reconnect: ${result?.chemicalId || 'no ID returned'}`);
|
|
232
|
+
this.connection.recordActivity();
|
|
233
|
+
results.push({ index: i, success: true, result });
|
|
234
|
+
continue;
|
|
235
|
+
} catch (reconnectErr) {
|
|
236
|
+
lastError = reconnectErr;
|
|
237
|
+
retryCount = 1;
|
|
238
|
+
}
|
|
239
|
+
} else {
|
|
240
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} first attempt failed (${firstErr.message}), retrying once`);
|
|
241
|
+
try {
|
|
242
|
+
const [result] = await attemptUpsert();
|
|
243
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully on retry: ${result?.chemicalId || 'no ID returned'}`);
|
|
244
|
+
this.connection.recordActivity();
|
|
245
|
+
results.push({ index: i, success: true, result });
|
|
246
|
+
continue;
|
|
247
|
+
} catch (retryErr) {
|
|
248
|
+
lastError = retryErr;
|
|
249
|
+
retryCount = 1;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} failed after ${retryCount} local retries (source_id=${chemical.sourceId})`, lastError);
|
|
255
|
+
|
|
256
|
+
const { sql: failureSql, parameters: failureParams } = this._buildChemicalUpsertSql(chemical);
|
|
257
|
+
const queued = await this.sendSqlWriteFailure({
|
|
258
|
+
sql: failureSql,
|
|
259
|
+
parameters: failureParams,
|
|
260
|
+
error: lastError,
|
|
261
|
+
retryCount,
|
|
262
|
+
failedAt
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
if (queued) {
|
|
266
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} (source_id=${chemical.sourceId}) queued for repair via SQS`);
|
|
267
|
+
} else {
|
|
268
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} (source_id=${chemical.sourceId}) failed and could not be queued — data loss risk`, lastError);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
results.push({ index: i, success: false, error: lastError.message, queued });
|
|
272
|
+
errors.push({ document: doc, error: lastError.message, queued });
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const successCount = results.filter(r => r.success).length;
|
|
276
|
+
const queuedCount = results.filter(r => !r.success && r.queued).length;
|
|
277
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${queuedCount} queued for repair, ${errors.length - queuedCount} unhandled errors`);
|
|
278
|
+
|
|
279
|
+
return { indexed: successCount, errors, results };
|
|
280
|
+
} catch (error) {
|
|
281
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
|
|
282
|
+
throw error;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
async bulkIndexFulltext(documents) {
|
|
287
|
+
try {
|
|
288
|
+
return { acknowledged: true, count: documents?.length || 0 };
|
|
289
|
+
} catch (error) {
|
|
290
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFulltext', error);
|
|
291
|
+
throw error;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
async bulkIndexSubstances(substances) {
|
|
296
|
+
try {
|
|
297
|
+
const documents = substances.map(substance => ({
|
|
298
|
+
source_id: substance.substance_id || substance.id,
|
|
299
|
+
chemical_name: substance.name || substance.substance_name,
|
|
300
|
+
chemical_meta: substance.meta || {},
|
|
301
|
+
chemical_identifiers: substance.identifiers || {},
|
|
302
|
+
chemical_synonyms: substance.synonyms || [],
|
|
303
|
+
chemical_categories: substance.categories || substance.substance_types || [],
|
|
304
|
+
created_at: substance.created_at,
|
|
305
|
+
updated_at: substance.updated_at,
|
|
306
|
+
imported_at: substance.imported_at
|
|
307
|
+
}));
|
|
308
|
+
|
|
309
|
+
return await this.bulkIndexFielded(documents);
|
|
310
|
+
} catch (error) {
|
|
311
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexSubstances', error);
|
|
312
|
+
throw error;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
async createChemical(chemical) {
|
|
317
|
+
try {
|
|
318
|
+
const db = this.getDb();
|
|
319
|
+
|
|
320
|
+
const [result] = await db
|
|
321
|
+
.insert(schema.chemicals)
|
|
322
|
+
.values({
|
|
323
|
+
sourceId: chemical.source_id,
|
|
324
|
+
chemicalName: chemical.chemical_name,
|
|
325
|
+
chemicalMeta: chemical.chemical_meta,
|
|
326
|
+
chemicalIdentifiers: chemical.chemical_identifiers,
|
|
327
|
+
chemicalSynonyms: chemical.chemical_synonyms,
|
|
328
|
+
chemicalCategories: chemical.chemical_categories,
|
|
329
|
+
createdAt: chemical.created_at || new Date(),
|
|
330
|
+
updatedAt: chemical.updated_at || new Date(),
|
|
331
|
+
...(chemical.imported_at && { importedAt: chemical.imported_at }),
|
|
332
|
+
...(chemical.chemical_id && { chemicalId: chemical.chemical_id })
|
|
333
|
+
})
|
|
334
|
+
.returning();
|
|
335
|
+
|
|
336
|
+
return result;
|
|
337
|
+
} catch (error) {
|
|
338
|
+
logError('pegasus-sdk', 'ChemicalsService', 'createChemical', error);
|
|
339
|
+
throw error;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
async updateChemical(chemicalId, updates) {
|
|
344
|
+
try {
|
|
345
|
+
const db = this.getDb();
|
|
346
|
+
|
|
347
|
+
const updateData = {};
|
|
348
|
+
if (updates.chemical_name) updateData.chemicalName = updates.chemical_name;
|
|
349
|
+
if (updates.chemical_meta) updateData.chemicalMeta = updates.chemical_meta;
|
|
350
|
+
if (updates.chemical_identifiers) updateData.chemicalIdentifiers = updates.chemical_identifiers;
|
|
351
|
+
if (updates.chemical_synonyms) updateData.chemicalSynonyms = updates.chemical_synonyms;
|
|
352
|
+
if (updates.chemical_categories) updateData.chemicalCategories = updates.chemical_categories;
|
|
353
|
+
updateData.updatedAt = new Date();
|
|
354
|
+
|
|
355
|
+
const [result] = await db
|
|
356
|
+
.update(schema.chemicals)
|
|
357
|
+
.set(updateData)
|
|
358
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
359
|
+
.returning();
|
|
360
|
+
|
|
361
|
+
return result || null;
|
|
362
|
+
} catch (error) {
|
|
363
|
+
logError('pegasus-sdk', 'ChemicalsService', 'updateChemical', error);
|
|
364
|
+
throw error;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
async deleteChemical(chemicalId) {
|
|
369
|
+
try {
|
|
370
|
+
const db = this.getDb();
|
|
371
|
+
|
|
372
|
+
const [deleted] = await db
|
|
373
|
+
.delete(schema.chemicals)
|
|
374
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
375
|
+
.returning();
|
|
376
|
+
|
|
377
|
+
return deleted || null;
|
|
378
|
+
} catch (error) {
|
|
379
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteChemical', error);
|
|
380
|
+
throw error;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
async deleteBySourceId(sourceId) {
|
|
385
|
+
try {
|
|
386
|
+
const db = this.getDb();
|
|
387
|
+
|
|
388
|
+
const [deleted] = await db
|
|
389
|
+
.delete(schema.chemicals)
|
|
390
|
+
.where(eq(schema.chemicals.sourceId, sourceId))
|
|
391
|
+
.returning();
|
|
392
|
+
|
|
393
|
+
return deleted || null;
|
|
394
|
+
} catch (error) {
|
|
395
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteBySourceId', error);
|
|
396
|
+
throw error;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
async deleteCollection(collectionName) {
|
|
401
|
+
try {
|
|
402
|
+
const db = this.getDb();
|
|
403
|
+
|
|
404
|
+
const deleted = await db
|
|
405
|
+
.delete(schema.chemicals)
|
|
406
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]))
|
|
407
|
+
.returning();
|
|
408
|
+
|
|
409
|
+
return { deletedCount: deleted.length, deleted };
|
|
410
|
+
} catch (error) {
|
|
411
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteCollection', error);
|
|
412
|
+
throw error;
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
async updateCollectionProperty(collectionName, propertyPath, newValue) {
|
|
417
|
+
try {
|
|
418
|
+
const db = this.getDb();
|
|
419
|
+
const pathArray = propertyPath.split('.');
|
|
420
|
+
const valueJson = JSON.stringify(newValue);
|
|
421
|
+
|
|
422
|
+
const results = await db
|
|
423
|
+
.update(schema.chemicals)
|
|
424
|
+
.set({
|
|
425
|
+
chemicalMeta: sql`jsonb_set(${schema.chemicals.chemicalMeta}, ${pathArray}::text[], ${valueJson}::jsonb)`,
|
|
426
|
+
updatedAt: new Date()
|
|
427
|
+
})
|
|
428
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]))
|
|
429
|
+
.returning();
|
|
430
|
+
|
|
431
|
+
return { updatedCount: results.length, updated: results };
|
|
432
|
+
} catch (error) {
|
|
433
|
+
logError('pegasus-sdk', 'ChemicalsService', 'updateCollectionProperty', error);
|
|
434
|
+
throw error;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
async bulkUpdateProperty(filter, propertyPath, newValue) {
|
|
439
|
+
try {
|
|
440
|
+
const db = this.getDb();
|
|
441
|
+
|
|
442
|
+
let whereCondition = sql`1=1`;
|
|
443
|
+
|
|
444
|
+
if (filter.chemicalIds && filter.chemicalIds.length > 0) {
|
|
445
|
+
whereCondition = inArray(schema.chemicals.chemicalId, filter.chemicalIds);
|
|
446
|
+
} else if (filter.sourceIds && filter.sourceIds.length > 0) {
|
|
447
|
+
whereCondition = inArray(schema.chemicals.sourceId, filter.sourceIds);
|
|
448
|
+
} else if (filter.category) {
|
|
449
|
+
whereCondition = arrayContains(schema.chemicals.chemicalCategories, [filter.category]);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const pathArray = propertyPath.split('.');
|
|
453
|
+
const valueJson = JSON.stringify(newValue);
|
|
454
|
+
|
|
455
|
+
const results = await db
|
|
456
|
+
.update(schema.chemicals)
|
|
457
|
+
.set({
|
|
458
|
+
chemicalMeta: sql`jsonb_set(COALESCE(${schema.chemicals.chemicalMeta}, '{}'), ${pathArray}::text[], ${valueJson}::jsonb)`,
|
|
459
|
+
updatedAt: new Date()
|
|
460
|
+
})
|
|
461
|
+
.where(whereCondition)
|
|
462
|
+
.returning();
|
|
463
|
+
|
|
464
|
+
return { updatedCount: results.length, updated: results };
|
|
465
|
+
} catch (error) {
|
|
466
|
+
logError('pegasus-sdk', 'ChemicalsService', 'bulkUpdateProperty', error);
|
|
467
|
+
throw error;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
async getChemicalById(chemicalId) {
|
|
472
|
+
try {
|
|
473
|
+
const db = this.getDb();
|
|
474
|
+
|
|
475
|
+
const [result] = await db
|
|
476
|
+
.select()
|
|
477
|
+
.from(schema.chemicals)
|
|
478
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
479
|
+
.limit(1);
|
|
480
|
+
|
|
481
|
+
return result || null;
|
|
482
|
+
} catch (error) {
|
|
483
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalById', error);
|
|
484
|
+
throw error;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
async getChemicalBySourceId(sourceId) {
|
|
489
|
+
try {
|
|
490
|
+
const db = this.getDb();
|
|
491
|
+
|
|
492
|
+
const [result] = await db
|
|
493
|
+
.select()
|
|
494
|
+
.from(schema.chemicals)
|
|
495
|
+
.where(eq(schema.chemicals.sourceId, sourceId))
|
|
496
|
+
.limit(1);
|
|
497
|
+
|
|
498
|
+
return result || null;
|
|
499
|
+
} catch (error) {
|
|
500
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalBySourceId', error);
|
|
501
|
+
throw error;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
async getChemicalsByCAS(casNumber) {
|
|
506
|
+
try {
|
|
507
|
+
const db = this.getDb();
|
|
508
|
+
|
|
509
|
+
const results = await db
|
|
510
|
+
.select()
|
|
511
|
+
.from(schema.chemicals)
|
|
512
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>'CAS' = ${casNumber} OR ${schema.chemicals.chemicalIdentifiers}->'CAS' ? ${casNumber}`);
|
|
513
|
+
|
|
514
|
+
return results;
|
|
515
|
+
} catch (error) {
|
|
516
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalsByCAS', error);
|
|
517
|
+
throw error;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
async getChemicalsByIdentifier(identifierType, identifierValue) {
|
|
522
|
+
try {
|
|
523
|
+
if (!ALLOWED_IDENTIFIER_TYPES.has(identifierType)) {
|
|
524
|
+
throw new Error(`Invalid identifier type: ${identifierType}`);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
const db = this.getDb();
|
|
528
|
+
|
|
529
|
+
const results = await db
|
|
530
|
+
.select()
|
|
531
|
+
.from(schema.chemicals)
|
|
532
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>${identifierType} = ${identifierValue} OR ${schema.chemicals.chemicalIdentifiers}->${identifierType} ? ${identifierValue}`);
|
|
533
|
+
|
|
534
|
+
return results;
|
|
535
|
+
} catch (error) {
|
|
536
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalsByIdentifier', error);
|
|
537
|
+
throw error;
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
async countByCollection(collectionName) {
|
|
542
|
+
try {
|
|
543
|
+
const db = this.getDb();
|
|
544
|
+
|
|
545
|
+
const result = await db
|
|
546
|
+
.select({ count: sql`count(*)::int` })
|
|
547
|
+
.from(schema.chemicals)
|
|
548
|
+
.where(arrayContains(schema.chemicals.chemicalCategories, [collectionName]));
|
|
549
|
+
|
|
550
|
+
return { count: result[0].count };
|
|
551
|
+
} catch (error) {
|
|
552
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByCollection', error);
|
|
553
|
+
throw error;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
async countByIdentifier(identifierValue) {
|
|
558
|
+
try {
|
|
559
|
+
const db = this.getDb();
|
|
560
|
+
|
|
561
|
+
const searchPattern = `%${escapeLikePattern(identifierValue)}%`;
|
|
562
|
+
const result = await db
|
|
563
|
+
.select({ count: sql`count(*)::int` })
|
|
564
|
+
.from(schema.chemicals)
|
|
565
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}::text LIKE ${searchPattern}`);
|
|
566
|
+
|
|
567
|
+
return { count: result[0].count };
|
|
568
|
+
} catch (error) {
|
|
569
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByIdentifier', error);
|
|
570
|
+
throw error;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
async countByCAS(casNumber) {
|
|
575
|
+
try {
|
|
576
|
+
const db = this.getDb();
|
|
577
|
+
|
|
578
|
+
const result = await db
|
|
579
|
+
.select({ count: sql`count(*)::int` })
|
|
580
|
+
.from(schema.chemicals)
|
|
581
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}->>'CAS' = ${casNumber} OR ${schema.chemicals.chemicalIdentifiers}->'CAS' ? ${casNumber}`);
|
|
582
|
+
|
|
583
|
+
return { count: result[0].count };
|
|
584
|
+
} catch (error) {
|
|
585
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countByCAS', error);
|
|
586
|
+
throw error;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
async getTotalSynonymCount() {
|
|
591
|
+
try {
|
|
592
|
+
const db = this.getDb();
|
|
593
|
+
|
|
594
|
+
const result = await db
|
|
595
|
+
.select({ count: sql`sum(array_length(${schema.chemicals.chemicalSynonyms}, 1))::int` })
|
|
596
|
+
.from(schema.chemicals);
|
|
597
|
+
|
|
598
|
+
return { count: result[0].count || 0 };
|
|
599
|
+
} catch (error) {
|
|
600
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getTotalSynonymCount', error);
|
|
601
|
+
throw error;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
async getSynonymCount(synonymTerm) {
|
|
606
|
+
try {
|
|
607
|
+
const db = this.getDb();
|
|
608
|
+
|
|
609
|
+
const result = await db
|
|
610
|
+
.select({ count: sql`count(*)::int` })
|
|
611
|
+
.from(schema.chemicals)
|
|
612
|
+
.where(arrayContains(schema.chemicals.chemicalSynonyms, [synonymTerm]));
|
|
613
|
+
|
|
614
|
+
return { count: result[0].count };
|
|
615
|
+
} catch (error) {
|
|
616
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getSynonymCount', error);
|
|
617
|
+
throw error;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
async convertIdentifier(fromIdentifier, toIdentifierType) {
|
|
622
|
+
try {
|
|
623
|
+
const db = this.getDb();
|
|
624
|
+
|
|
625
|
+
const searchPattern = `%${escapeLikePattern(fromIdentifier)}%`;
|
|
626
|
+
const chemicals = await db
|
|
627
|
+
.select()
|
|
628
|
+
.from(schema.chemicals)
|
|
629
|
+
.where(sql`${schema.chemicals.chemicalIdentifiers}::text LIKE ${searchPattern}`);
|
|
630
|
+
|
|
631
|
+
if (chemicals.length === 0) {
|
|
632
|
+
return null;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
const chemical = chemicals[0];
|
|
636
|
+
const identifiers = chemical.chemicalIdentifiers || {};
|
|
637
|
+
const toIdentifier = identifiers[toIdentifierType];
|
|
638
|
+
|
|
639
|
+
return {
|
|
640
|
+
fromIdentifier,
|
|
641
|
+
toIdentifierType,
|
|
642
|
+
toIdentifier,
|
|
643
|
+
chemicalId: chemical.chemicalId,
|
|
644
|
+
chemicalName: chemical.chemicalName
|
|
645
|
+
};
|
|
646
|
+
} catch (error) {
|
|
647
|
+
logError('pegasus-sdk', 'ChemicalsService', 'convertIdentifier', error);
|
|
648
|
+
throw error;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
async convertIdentifiersBatch(fromIdentifiers, toIdentifierType) {
|
|
653
|
+
try {
|
|
654
|
+
const conversions = await Promise.all(
|
|
655
|
+
fromIdentifiers.map(fromIdentifier =>
|
|
656
|
+
this.convertIdentifier(fromIdentifier, toIdentifierType)
|
|
657
|
+
)
|
|
658
|
+
);
|
|
659
|
+
|
|
660
|
+
return conversions.filter(conversion => conversion !== null);
|
|
661
|
+
} catch (error) {
|
|
662
|
+
logError('pegasus-sdk', 'ChemicalsService', 'convertIdentifiersBatch', error);
|
|
663
|
+
throw error;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Search for chemicals by name using OpenSearch
|
|
669
|
+
* @param {string} searchTerm - Name to search for
|
|
670
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
671
|
+
* @returns {Promise<Object>} Search results
|
|
672
|
+
*/
|
|
673
|
+
async searchByName(searchTerm, limit = 10) {
|
|
674
|
+
if (!searchTerm) {
|
|
675
|
+
return { results: [] };
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
try {
|
|
679
|
+
const opensearchClient = this.connection.getOpenSearchClient();
|
|
680
|
+
const indexName = this.connection.getOpenSearchIndex();
|
|
681
|
+
|
|
682
|
+
const response = await opensearchClient.search({
|
|
683
|
+
index: indexName,
|
|
684
|
+
body: {
|
|
685
|
+
size: limit,
|
|
686
|
+
query: {
|
|
687
|
+
bool: {
|
|
688
|
+
should: [
|
|
689
|
+
{ term: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
690
|
+
{ prefix: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
691
|
+
{ term: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
692
|
+
{ prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
|
|
693
|
+
],
|
|
694
|
+
minimum_should_match: 1
|
|
695
|
+
}
|
|
696
|
+
},
|
|
697
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
698
|
+
}
|
|
699
|
+
});
|
|
700
|
+
|
|
701
|
+
const hits = response.body?.hits?.hits || [];
|
|
702
|
+
const results = hits.map((hit) => ({
|
|
703
|
+
id: hit._source.postgres_id,
|
|
704
|
+
name: hit._source.chemical_name,
|
|
705
|
+
cas: hit._source.cas_numbers || [],
|
|
706
|
+
identifiers: hit._source.identifier_values || [],
|
|
707
|
+
synonyms: hit._source.synonyms || [],
|
|
708
|
+
score: hit._score
|
|
709
|
+
}));
|
|
710
|
+
|
|
711
|
+
return { results };
|
|
712
|
+
} catch (error) {
|
|
713
|
+
logError('pegasus-sdk', 'ChemicalsService', 'searchByName', error);
|
|
714
|
+
throw error;
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
/**
|
|
719
|
+
* Search for chemicals by synonym using OpenSearch
|
|
720
|
+
* @param {string} synonymTerm - Synonym to search for
|
|
721
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
722
|
+
* @returns {Promise<Object>} Search results
|
|
723
|
+
*/
|
|
724
|
+
async searchBySynonym(synonymTerm, limit = 10) {
|
|
725
|
+
if (!synonymTerm) {
|
|
726
|
+
return { results: [] };
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
try {
|
|
730
|
+
const opensearchClient = this.connection.getOpenSearchClient();
|
|
731
|
+
const indexName = this.connection.getOpenSearchIndex();
|
|
732
|
+
|
|
733
|
+
const response = await opensearchClient.search({
|
|
734
|
+
index: indexName,
|
|
735
|
+
body: {
|
|
736
|
+
size: limit,
|
|
737
|
+
query: {
|
|
738
|
+
bool: {
|
|
739
|
+
should: [
|
|
740
|
+
{ term: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
|
|
741
|
+
{ prefix: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
|
|
742
|
+
{ term: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
|
|
743
|
+
{ prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
|
|
744
|
+
],
|
|
745
|
+
minimum_should_match: 1
|
|
746
|
+
}
|
|
747
|
+
},
|
|
748
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
749
|
+
}
|
|
750
|
+
});
|
|
751
|
+
|
|
752
|
+
const hits = response.body?.hits?.hits || [];
|
|
753
|
+
const results = hits.map((hit) => ({
|
|
754
|
+
id: hit._source.postgres_id,
|
|
755
|
+
name: hit._source.chemical_name,
|
|
756
|
+
cas: hit._source.cas_numbers || [],
|
|
757
|
+
identifiers: hit._source.identifier_values || [],
|
|
758
|
+
synonyms: hit._source.synonyms || [],
|
|
759
|
+
score: hit._score
|
|
760
|
+
}));
|
|
761
|
+
|
|
762
|
+
return { results };
|
|
763
|
+
} catch (error) {
|
|
764
|
+
logError('pegasus-sdk', 'ChemicalsService', 'searchBySynonym', error);
|
|
765
|
+
throw error;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
async countAll() {
|
|
770
|
+
try {
|
|
771
|
+
const db = this.getDb();
|
|
772
|
+
const result = await db
|
|
773
|
+
.select({ count: sql`count(*)::int` })
|
|
774
|
+
.from(schema.chemicals);
|
|
775
|
+
return { count: result[0].count };
|
|
776
|
+
} catch (error) {
|
|
777
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countAll', error);
|
|
778
|
+
throw error;
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
async findChemicalsWithoutDocuments(collectionName, searchTerm, pageSize = 100) {
|
|
783
|
+
try {
|
|
784
|
+
const db = this.getDb();
|
|
785
|
+
|
|
786
|
+
let whereConditions = [];
|
|
787
|
+
|
|
788
|
+
if (collectionName) {
|
|
789
|
+
whereConditions.push(arrayContains(schema.chemicals.chemicalCategories, [collectionName]));
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
if (searchTerm) {
|
|
793
|
+
const searchPattern = `%${escapeLikePattern(searchTerm)}%`;
|
|
794
|
+
whereConditions.push(sql`${schema.chemicals.chemicalName} ILIKE ${searchPattern}`);
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
const whereClause = whereConditions.length > 0 ? and(...whereConditions) : undefined;
|
|
798
|
+
|
|
799
|
+
const results = await db
|
|
800
|
+
.select()
|
|
801
|
+
.from(schema.chemicals)
|
|
802
|
+
.where(whereClause)
|
|
803
|
+
.limit(pageSize);
|
|
804
|
+
|
|
805
|
+
return results;
|
|
806
|
+
} catch (error) {
|
|
807
|
+
logError('pegasus-sdk', 'ChemicalsService', 'findChemicalsWithoutDocuments', error);
|
|
808
|
+
throw error;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
async countChemicalsWithoutDocuments(collectionName) {
|
|
813
|
+
try {
|
|
814
|
+
const db = this.getDb();
|
|
815
|
+
|
|
816
|
+
const whereClause = collectionName
|
|
817
|
+
? arrayContains(schema.chemicals.chemicalCategories, [collectionName])
|
|
818
|
+
: undefined;
|
|
819
|
+
|
|
820
|
+
const result = await db
|
|
821
|
+
.select({ count: sql`count(*)::int` })
|
|
822
|
+
.from(schema.chemicals)
|
|
823
|
+
.where(whereClause);
|
|
824
|
+
|
|
825
|
+
return { count: result[0].count };
|
|
826
|
+
} catch (error) {
|
|
827
|
+
logError('pegasus-sdk', 'ChemicalsService', 'countChemicalsWithoutDocuments', error);
|
|
828
|
+
throw error;
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
_buildEsHandlers() {
|
|
833
|
+
return {
|
|
834
|
+
index: async (params) => {
|
|
835
|
+
const chemical = params.body;
|
|
836
|
+
const result = await this.createChemical(chemical);
|
|
837
|
+
|
|
838
|
+
return {
|
|
839
|
+
_index: params.index,
|
|
840
|
+
_id: result.chemicalId,
|
|
841
|
+
_version: 1,
|
|
842
|
+
result: 'created',
|
|
843
|
+
_source: result
|
|
844
|
+
};
|
|
845
|
+
},
|
|
846
|
+
|
|
847
|
+
bulk: async (params) => {
|
|
848
|
+
const operations = params.body || params.operations;
|
|
849
|
+
|
|
850
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Starting bulk operation with ${operations?.length || 0} total operations`);
|
|
851
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Params index: ${params.index}`);
|
|
852
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Operations array type: ${Array.isArray(operations) ? 'array' : typeof operations}`);
|
|
853
|
+
|
|
854
|
+
const cdiDocuments = [];
|
|
855
|
+
let cdiOpCount = 0;
|
|
856
|
+
let otherOpCount = 0;
|
|
857
|
+
|
|
858
|
+
for (let i = 0; i < operations.length; i++) {
|
|
859
|
+
const op = operations[i];
|
|
860
|
+
const isIndexOp = !!(op.index || op.create);
|
|
861
|
+
const indexName = op.index?._index || op.create?._index || op.delete?._index || op.update?._index;
|
|
862
|
+
|
|
863
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Op[${i}]: action=${Object.keys(op)[0] || 'unknown'}, index=${indexName}`);
|
|
864
|
+
|
|
865
|
+
if ((op.index || op.create) &&
|
|
866
|
+
(op.index?._index === 'chemical_data_index' || op.create?._index === 'chemical_data_index')) {
|
|
867
|
+
const doc = operations[i + 1];
|
|
868
|
+
const sourceId = op.index?._id || op.create?._id;
|
|
869
|
+
|
|
870
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Found CDI entry: sourceId=${sourceId}, hasDoc=${!!doc}`);
|
|
871
|
+
|
|
872
|
+
if (doc && sourceId) {
|
|
873
|
+
const cdiDoc = {
|
|
874
|
+
source_id: sourceId,
|
|
875
|
+
chemical_name: doc.chemical_primary_name || (doc.chemical_names && doc.chemical_names[0]) || null,
|
|
876
|
+
chemical_meta: doc.chemical_meta || {},
|
|
877
|
+
chemical_identifiers: doc.chemical_identifiers || {},
|
|
878
|
+
chemical_synonyms: doc.chemical_synonyms || [],
|
|
879
|
+
chemical_categories: doc.chemical_categories || [],
|
|
880
|
+
created_at: doc.chemical_created_at ? (typeof doc.chemical_created_at === 'string' ? new Date(doc.chemical_created_at) : doc.chemical_created_at) : new Date(),
|
|
881
|
+
updated_at: doc.chemical_updated_at ? (typeof doc.chemical_updated_at === 'string' ? new Date(doc.chemical_updated_at) : doc.chemical_updated_at) : new Date()
|
|
882
|
+
};
|
|
883
|
+
cdiDocuments.push(cdiDoc);
|
|
884
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Extracted CDI doc: ${JSON.stringify({ source_id: cdiDoc.source_id, chemical_name: cdiDoc.chemical_name })}`);
|
|
885
|
+
i++;
|
|
886
|
+
cdiOpCount++;
|
|
887
|
+
} else {
|
|
888
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] CDI entry incomplete: sourceId=${sourceId}, doc=${!!doc}`);
|
|
889
|
+
}
|
|
890
|
+
} else {
|
|
891
|
+
otherOpCount++;
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Scan complete: ${cdiOpCount} CDI docs found, ${otherOpCount} other operations skipped`);
|
|
896
|
+
|
|
897
|
+
if (cdiDocuments.length === 0) {
|
|
898
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] No CDI documents to index, returning empty no-op response`);
|
|
899
|
+
return { took: 0, errors: false, items: [] };
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Calling bulkIndexFielded with ${cdiDocuments.length} CDI documents`);
|
|
903
|
+
|
|
904
|
+
try {
|
|
905
|
+
const result = await this.bulkIndexFielded(cdiDocuments);
|
|
906
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] bulkIndexFielded returned: indexed=${result.indexed}, errors=${result.errors.length}`);
|
|
907
|
+
|
|
908
|
+
if (result.errors.length > 0) {
|
|
909
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Errors during bulk indexing', result.errors);
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
return {
|
|
913
|
+
took: 1,
|
|
914
|
+
errors: result.errors.length > 0,
|
|
915
|
+
items: result.results.map((res, idx) => ({
|
|
916
|
+
index: {
|
|
917
|
+
_index: 'chemical_data_index',
|
|
918
|
+
_id: cdiDocuments[idx].source_id,
|
|
919
|
+
status: res.success ? 200 : 400,
|
|
920
|
+
result: res.success ? 'created' : 'error',
|
|
921
|
+
...(res.success ? {} : { error: { type: 'mapper_parsing_exception', reason: res.error } })
|
|
922
|
+
}
|
|
923
|
+
}))
|
|
924
|
+
};
|
|
925
|
+
} catch (error) {
|
|
926
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
|
|
927
|
+
throw error;
|
|
928
|
+
}
|
|
929
|
+
},
|
|
930
|
+
|
|
931
|
+
get: async (params) => {
|
|
932
|
+
const result = await this.getChemicalBySourceId(params.id);
|
|
933
|
+
|
|
934
|
+
if (!result) {
|
|
935
|
+
return {
|
|
936
|
+
_index: params.index,
|
|
937
|
+
_id: params.id,
|
|
938
|
+
found: false
|
|
939
|
+
};
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
return {
|
|
943
|
+
_index: params.index,
|
|
944
|
+
_id: params.id,
|
|
945
|
+
_version: 1,
|
|
946
|
+
found: true,
|
|
947
|
+
_source: result
|
|
948
|
+
};
|
|
949
|
+
},
|
|
950
|
+
|
|
951
|
+
update: async (params) => {
|
|
952
|
+
const result = await this.updateChemical(params.id, params.body);
|
|
953
|
+
|
|
954
|
+
return {
|
|
955
|
+
_index: params.index,
|
|
956
|
+
_id: params.id,
|
|
957
|
+
_version: 2,
|
|
958
|
+
result: result ? 'updated' : 'noop',
|
|
959
|
+
_source: result
|
|
960
|
+
};
|
|
961
|
+
},
|
|
962
|
+
|
|
963
|
+
delete: async (params) => {
|
|
964
|
+
if (params.index === 'synonym_lookup_index') {
|
|
965
|
+
return { _index: params.index, _id: params.id, result: 'not_found' };
|
|
966
|
+
}
|
|
967
|
+
const result = await this.deleteBySourceId(params.id);
|
|
968
|
+
|
|
969
|
+
return {
|
|
970
|
+
_index: params.index,
|
|
971
|
+
_id: params.id,
|
|
972
|
+
result: result ? 'deleted' : 'not_found'
|
|
973
|
+
};
|
|
974
|
+
},
|
|
975
|
+
|
|
976
|
+
deleteByQuery: async (params) => {
|
|
977
|
+
const sourceId = params.body?.query?.term?.chemical_set_identifier
|
|
978
|
+
|| params.body?.query?.term?.source_id;
|
|
979
|
+
if (!sourceId) {
|
|
980
|
+
return { deleted: 0, failures: [] };
|
|
981
|
+
}
|
|
982
|
+
const result = await this.deleteBySourceId(sourceId);
|
|
983
|
+
return {
|
|
984
|
+
deleted: result ? 1 : 0,
|
|
985
|
+
failures: []
|
|
986
|
+
};
|
|
987
|
+
},
|
|
988
|
+
|
|
989
|
+
search: async (params) => {
|
|
990
|
+
let searchTerm = '';
|
|
991
|
+
let limit = params.body?.size || 10;
|
|
992
|
+
|
|
993
|
+
if (params.index === 'synonym_lookup_index') {
|
|
994
|
+
const query = params.body?.query;
|
|
995
|
+
searchTerm = query?.match?.chemical_name ||
|
|
996
|
+
query?.term?.chemical_name ||
|
|
997
|
+
query?.query_string?.query || '';
|
|
998
|
+
const searchResults = await this.searchBySynonym(searchTerm, limit);
|
|
999
|
+
|
|
1000
|
+
return {
|
|
1001
|
+
took: 1,
|
|
1002
|
+
timed_out: false,
|
|
1003
|
+
_shards: {
|
|
1004
|
+
total: 1,
|
|
1005
|
+
successful: 1,
|
|
1006
|
+
skipped: 0,
|
|
1007
|
+
failed: 0
|
|
1008
|
+
},
|
|
1009
|
+
hits: {
|
|
1010
|
+
total: {
|
|
1011
|
+
value: searchResults.results.length,
|
|
1012
|
+
relation: 'eq'
|
|
1013
|
+
},
|
|
1014
|
+
max_score: searchResults.results[0]?.score || 0,
|
|
1015
|
+
hits: searchResults.results.map(result => ({
|
|
1016
|
+
_index: params.index,
|
|
1017
|
+
_id: result.id,
|
|
1018
|
+
_score: result.score,
|
|
1019
|
+
_source: {
|
|
1020
|
+
postgres_id: result.id,
|
|
1021
|
+
chemical_name: result.name,
|
|
1022
|
+
cas_numbers: result.cas,
|
|
1023
|
+
identifier_values: result.identifiers,
|
|
1024
|
+
synonyms: result.synonyms
|
|
1025
|
+
}
|
|
1026
|
+
}))
|
|
1027
|
+
}
|
|
1028
|
+
};
|
|
1029
|
+
} else {
|
|
1030
|
+
const query = params.body?.query;
|
|
1031
|
+
searchTerm = query?.match?.chemical_name ||
|
|
1032
|
+
query?.term?.chemical_name ||
|
|
1033
|
+
query?.query_string?.query || '';
|
|
1034
|
+
const searchResults = await this.searchByName(searchTerm, limit);
|
|
1035
|
+
|
|
1036
|
+
return {
|
|
1037
|
+
took: 1,
|
|
1038
|
+
timed_out: false,
|
|
1039
|
+
_shards: {
|
|
1040
|
+
total: 1,
|
|
1041
|
+
successful: 1,
|
|
1042
|
+
skipped: 0,
|
|
1043
|
+
failed: 0
|
|
1044
|
+
},
|
|
1045
|
+
hits: {
|
|
1046
|
+
total: {
|
|
1047
|
+
value: searchResults.results.length,
|
|
1048
|
+
relation: 'eq'
|
|
1049
|
+
},
|
|
1050
|
+
max_score: searchResults.results[0]?.score || 0,
|
|
1051
|
+
hits: searchResults.results.map(result => ({
|
|
1052
|
+
_index: params.index,
|
|
1053
|
+
_id: result.id,
|
|
1054
|
+
_score: result.score,
|
|
1055
|
+
_source: {
|
|
1056
|
+
postgres_id: result.id,
|
|
1057
|
+
chemical_name: result.name,
|
|
1058
|
+
cas_numbers: result.cas,
|
|
1059
|
+
identifier_values: result.identifiers,
|
|
1060
|
+
synonyms: result.synonyms
|
|
1061
|
+
}
|
|
1062
|
+
}))
|
|
1063
|
+
}
|
|
1064
|
+
};
|
|
1065
|
+
}
|
|
1066
|
+
},
|
|
1067
|
+
|
|
1068
|
+
count: async (params) => {
|
|
1069
|
+
if (params.index === 'synonym_lookup_index') {
|
|
1070
|
+
return await this.getTotalSynonymCount();
|
|
1071
|
+
}
|
|
1072
|
+
return await this.countAll();
|
|
1073
|
+
}
|
|
1074
|
+
};
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
registerElasticsearchHandlers(elasticsearchService) {
|
|
1078
|
+
const configurablePatterns = this.connection.config.indexRoutes?.chemicals || ['chemicals*'];
|
|
1079
|
+
const legacyPatterns = ['synonym_lookup_index', 'chemical_data_index', 'chemical_converter_index'];
|
|
1080
|
+
const allPatterns = [...new Set([...configurablePatterns, ...legacyPatterns])];
|
|
1081
|
+
const handlers = this._buildEsHandlers();
|
|
1082
|
+
allPatterns.forEach(pattern => {
|
|
1083
|
+
elasticsearchService.registerIndexRoute(pattern, handlers);
|
|
1084
|
+
});
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1029
1088
|
module.exports = ChemicalsService;
|