@toxplanet/pegasus-sdk 1.1.10 → 1.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'acc',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '605134466764',
|
|
4
5
|
secretName: 'arn:aws:secretsmanager:us-east-1:605134466764:secret:rds!cluster-9b502dde-5e2a-49db-b2c5-9801141ee40b-gkHbLm',
|
|
5
6
|
openSearchEndpoint: 'https://1pbu0yqr197lq07hfcjh.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: false
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-acc',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'dev',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '292931567094',
|
|
4
5
|
secretName: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
|
|
5
6
|
openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: false
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-dev',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'prod',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '147997144422',
|
|
4
5
|
secretName: 'rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d',
|
|
5
6
|
openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: true
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-prod',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
package/config/environment.qa.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'qa',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '147997144422',
|
|
4
5
|
secretName: 'arn:aws:secretsmanager:us-east-1:147997144422:secret:rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d-oYjysU',
|
|
5
6
|
openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: true
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-qa',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
package/lib/chemicals.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
2
|
const { getDrizzle, schema } = require('./db');
|
|
3
3
|
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
|
+
const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
|
|
4
5
|
|
|
5
6
|
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
6
7
|
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
@@ -17,6 +18,7 @@ class ChemicalsService {
|
|
|
17
18
|
constructor(connection) {
|
|
18
19
|
this.connection = connection;
|
|
19
20
|
this.db = null;
|
|
21
|
+
this.sqsClient = null;
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
getDb() {
|
|
@@ -26,6 +28,46 @@ class ChemicalsService {
|
|
|
26
28
|
return this.db;
|
|
27
29
|
}
|
|
28
30
|
|
|
31
|
+
async sendToRetryQueue(operation, error) {
|
|
32
|
+
try {
|
|
33
|
+
const queueName = this.connection.config.bulkFailureQueueName || 'pegasus-bulk-failures';
|
|
34
|
+
const queueUrl = this.connection.config.bulkFailureQueueUrl ||
|
|
35
|
+
`https://sqs.${this.connection.region}.amazonaws.com/${this.connection.config.awsAccountId}/${queueName}`;
|
|
36
|
+
|
|
37
|
+
if (!this.sqsClient) {
|
|
38
|
+
this.sqsClient = new SQSClient({ region: this.connection.region });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const message = {
|
|
42
|
+
operation_type: operation.type || 'bulk',
|
|
43
|
+
timestamp: new Date().toISOString(),
|
|
44
|
+
environment: this.connection.environment,
|
|
45
|
+
error_message: error.message,
|
|
46
|
+
error_type: error.name,
|
|
47
|
+
bulk_params: operation.params,
|
|
48
|
+
cdi_documents: operation.cdiDocuments
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
const command = new SendMessageCommand({
|
|
52
|
+
QueueUrl: queueUrl,
|
|
53
|
+
MessageBody: JSON.stringify(message),
|
|
54
|
+
MessageAttributes: {
|
|
55
|
+
Operation: { StringValue: operation.type || 'bulk', DataType: 'String' },
|
|
56
|
+
Environment: { StringValue: this.connection.environment, DataType: 'String' },
|
|
57
|
+
ErrorType: { StringValue: error.name || 'Unknown', DataType: 'String' },
|
|
58
|
+
Timestamp: { StringValue: new Date().toISOString(), DataType: 'String' }
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
await this.sqsClient.send(command);
|
|
63
|
+
logInfo('pegasus-sdk', `[sendToRetryQueue] Successfully sent bulk operation to SQS for retry: ${queueName}`);
|
|
64
|
+
return true;
|
|
65
|
+
} catch (sqsError) {
|
|
66
|
+
logError('pegasus-sdk', 'sendToRetryQueue', 'Failed to send to SQS retry queue', sqsError);
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
29
71
|
async bulkIndexFielded(documents) {
|
|
30
72
|
try {
|
|
31
73
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
|
|
@@ -46,6 +88,13 @@ class ChemicalsService {
|
|
|
46
88
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
|
|
47
89
|
|
|
48
90
|
try {
|
|
91
|
+
const parseDate = (dateValue) => {
|
|
92
|
+
if (!dateValue) return new Date();
|
|
93
|
+
if (dateValue instanceof Date) return dateValue;
|
|
94
|
+
if (typeof dateValue === 'string') return new Date(dateValue);
|
|
95
|
+
return new Date();
|
|
96
|
+
};
|
|
97
|
+
|
|
49
98
|
const chemical = {
|
|
50
99
|
sourceId: doc.source_id || doc._id,
|
|
51
100
|
chemicalName: doc.chemical_name || doc.name,
|
|
@@ -53,8 +102,8 @@ class ChemicalsService {
|
|
|
53
102
|
chemicalIdentifiers: doc.chemical_identifiers || {},
|
|
54
103
|
chemicalSynonyms: doc.chemical_synonyms || [],
|
|
55
104
|
chemicalCategories: doc.chemical_categories || [],
|
|
56
|
-
createdAt: doc.created_at
|
|
57
|
-
updatedAt: doc.updated_at
|
|
105
|
+
createdAt: parseDate(doc.created_at),
|
|
106
|
+
updatedAt: parseDate(doc.updated_at),
|
|
58
107
|
...(doc.imported_at && { importedAt: doc.imported_at }),
|
|
59
108
|
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
60
109
|
};
|
|
@@ -75,7 +124,10 @@ class ChemicalsService {
|
|
|
75
124
|
updatedAt: new Date()
|
|
76
125
|
}
|
|
77
126
|
})
|
|
78
|
-
.returning(
|
|
127
|
+
.returning({
|
|
128
|
+
chemicalId: schema.chemicals.chemicalId,
|
|
129
|
+
sourceId: schema.chemicals.sourceId
|
|
130
|
+
});
|
|
79
131
|
|
|
80
132
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
|
|
81
133
|
results.push({ index: i, success: true, result });
|
|
@@ -690,8 +742,8 @@ class ChemicalsService {
|
|
|
690
742
|
chemical_identifiers: doc.chemical_identifiers || {},
|
|
691
743
|
chemical_synonyms: doc.chemical_synonyms || [],
|
|
692
744
|
chemical_categories: doc.chemical_categories || [],
|
|
693
|
-
created_at: doc.chemical_created_at,
|
|
694
|
-
updated_at: doc.chemical_updated_at
|
|
745
|
+
created_at: doc.chemical_created_at ? (typeof doc.chemical_created_at === 'string' ? new Date(doc.chemical_created_at) : doc.chemical_created_at) : new Date(),
|
|
746
|
+
updated_at: doc.chemical_updated_at ? (typeof doc.chemical_updated_at === 'string' ? new Date(doc.chemical_updated_at) : doc.chemical_updated_at) : new Date()
|
|
695
747
|
};
|
|
696
748
|
cdiDocuments.push(cdiDoc);
|
|
697
749
|
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Extracted CDI doc: ${JSON.stringify({ source_id: cdiDoc.source_id, chemical_name: cdiDoc.chemical_name })}`);
|
|
@@ -737,6 +789,43 @@ class ChemicalsService {
|
|
|
737
789
|
};
|
|
738
790
|
} catch (error) {
|
|
739
791
|
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
|
|
792
|
+
|
|
793
|
+
const isTimeoutError = error.message && (
|
|
794
|
+
error.message.includes('timeout') ||
|
|
795
|
+
error.message.includes('Connection terminated') ||
|
|
796
|
+
error.code === 'ECONNREFUSED' ||
|
|
797
|
+
error.code === 'ETIMEDOUT'
|
|
798
|
+
);
|
|
799
|
+
|
|
800
|
+
if (isTimeoutError || error.message?.includes('Connection')) {
|
|
801
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Database timeout detected, attempting to queue for retry`);
|
|
802
|
+
|
|
803
|
+
const queueSuccess = await this.sendToRetryQueue({
|
|
804
|
+
type: 'bulk',
|
|
805
|
+
params,
|
|
806
|
+
cdiDocuments
|
|
807
|
+
}, error);
|
|
808
|
+
|
|
809
|
+
if (queueSuccess) {
|
|
810
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Successfully queued ${cdiDocuments.length} CDI documents for retry in SQS`);
|
|
811
|
+
return {
|
|
812
|
+
took: 1,
|
|
813
|
+
errors: false,
|
|
814
|
+
items: cdiDocuments.map(doc => ({
|
|
815
|
+
index: {
|
|
816
|
+
_index: 'chemical_data_index',
|
|
817
|
+
_id: doc.source_id,
|
|
818
|
+
status: 202,
|
|
819
|
+
result: 'accepted_for_retry',
|
|
820
|
+
_notes: 'Operation queued for retry due to database timeout'
|
|
821
|
+
}
|
|
822
|
+
}))
|
|
823
|
+
};
|
|
824
|
+
} else {
|
|
825
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Failed to queue operation for retry', error);
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
740
829
|
throw error;
|
|
741
830
|
}
|
|
742
831
|
},
|
package/lib/db/schema.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const { pgTable, uuid, text, jsonb, timestamp, index
|
|
1
|
+
const { pgTable, uuid, text, jsonb, timestamp, index } = require('drizzle-orm/pg-core');
|
|
2
2
|
const { sql } = require('drizzle-orm');
|
|
3
3
|
|
|
4
4
|
const chemicals = pgTable('chemicals', {
|
|
@@ -14,12 +14,11 @@ const chemicals = pgTable('chemicals', {
|
|
|
14
14
|
importedAt: timestamp('imported_at', { withTimezone: true }).defaultNow()
|
|
15
15
|
}, (table) => {
|
|
16
16
|
return {
|
|
17
|
-
sourceIdIdx: uniqueIndex('idx_chemicals_source_id').on(table.sourceId),
|
|
18
17
|
nameIdx: index('idx_chemicals_name').on(table.chemicalName),
|
|
19
18
|
createdAtIdx: index('idx_chemicals_created_at').on(table.createdAt),
|
|
20
19
|
updatedAtIdx: index('idx_chemicals_updated_at').on(table.updatedAt),
|
|
21
|
-
identifiersGinIdx: index('idx_chemicals_identifiers_gin').on(table.chemicalIdentifiers),
|
|
22
|
-
synonymsGinIdx: index('idx_chemicals_synonyms_gin').on(table.chemicalSynonyms)
|
|
20
|
+
identifiersGinIdx: index('idx_chemicals_identifiers_gin').using('gin').on(table.chemicalIdentifiers),
|
|
21
|
+
synonymsGinIdx: index('idx_chemicals_synonyms_gin').using('gin').on(table.chemicalSynonyms)
|
|
23
22
|
};
|
|
24
23
|
});
|
|
25
24
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@toxplanet/pegasus-sdk",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.12",
|
|
4
4
|
"description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"drizzle-orm": "^0.30.0",
|
|
30
30
|
"@opensearch-project/opensearch": "^2.5.0",
|
|
31
31
|
"@aws-sdk/client-secrets-manager": "^3.490.0",
|
|
32
|
+
"@aws-sdk/client-sqs": "^3.490.0",
|
|
32
33
|
"@aws-sdk/credential-providers": "^3.490.0"
|
|
33
34
|
},
|
|
34
35
|
"engines": {
|