@toxplanet/pegasus-sdk 1.1.11 → 1.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'acc',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '605134466764',
|
|
4
5
|
secretName: 'arn:aws:secretsmanager:us-east-1:605134466764:secret:rds!cluster-9b502dde-5e2a-49db-b2c5-9801141ee40b-gkHbLm',
|
|
5
6
|
openSearchEndpoint: 'https://1pbu0yqr197lq07hfcjh.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: false
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-acc',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'dev',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '292931567094',
|
|
4
5
|
secretName: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
|
|
5
6
|
openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: false
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-dev',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'prod',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '147997144422',
|
|
4
5
|
secretName: 'rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d',
|
|
5
6
|
openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: true
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-prod',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
package/config/environment.qa.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
environment: 'qa',
|
|
3
3
|
region: 'us-east-1',
|
|
4
|
+
awsAccountId: '147997144422',
|
|
4
5
|
secretName: 'arn:aws:secretsmanager:us-east-1:147997144422:secret:rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d-oYjysU',
|
|
5
6
|
openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
|
|
6
7
|
openSearchIndex: 'chemicals',
|
|
@@ -19,6 +20,7 @@ module.exports = {
|
|
|
19
20
|
rejectUnauthorized: true
|
|
20
21
|
}
|
|
21
22
|
},
|
|
23
|
+
bulkFailureQueueName: 'pegasus-bulk-failures-qa',
|
|
22
24
|
indexRoutes: {
|
|
23
25
|
chemicals: ['chemicals*'],
|
|
24
26
|
documents: ['documents*'],
|
package/lib/chemicals.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
2
|
const { getDrizzle, schema } = require('./db');
|
|
3
3
|
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
|
+
const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
|
|
4
5
|
|
|
5
6
|
const SEARCH_BOOST_EXACT_PRIMARY = 100;
|
|
6
7
|
const SEARCH_BOOST_PREFIX_PRIMARY = 50;
|
|
@@ -17,6 +18,7 @@ class ChemicalsService {
|
|
|
17
18
|
constructor(connection) {
|
|
18
19
|
this.connection = connection;
|
|
19
20
|
this.db = null;
|
|
21
|
+
this.sqsClient = null;
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
getDb() {
|
|
@@ -26,6 +28,46 @@ class ChemicalsService {
|
|
|
26
28
|
return this.db;
|
|
27
29
|
}
|
|
28
30
|
|
|
31
|
+
async sendToRetryQueue(operation, error) {
|
|
32
|
+
try {
|
|
33
|
+
const queueName = this.connection.config.bulkFailureQueueName || 'pegasus-bulk-failures';
|
|
34
|
+
const queueUrl = this.connection.config.bulkFailureQueueUrl ||
|
|
35
|
+
`https://sqs.${this.connection.region}.amazonaws.com/${this.connection.config.awsAccountId}/${queueName}`;
|
|
36
|
+
|
|
37
|
+
if (!this.sqsClient) {
|
|
38
|
+
this.sqsClient = new SQSClient({ region: this.connection.region });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const message = {
|
|
42
|
+
operation_type: operation.type || 'bulk',
|
|
43
|
+
timestamp: new Date().toISOString(),
|
|
44
|
+
environment: this.connection.environment,
|
|
45
|
+
error_message: error.message,
|
|
46
|
+
error_type: error.name,
|
|
47
|
+
bulk_params: operation.params,
|
|
48
|
+
cdi_documents: operation.cdiDocuments
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
const command = new SendMessageCommand({
|
|
52
|
+
QueueUrl: queueUrl,
|
|
53
|
+
MessageBody: JSON.stringify(message),
|
|
54
|
+
MessageAttributes: {
|
|
55
|
+
Operation: { StringValue: operation.type || 'bulk', DataType: 'String' },
|
|
56
|
+
Environment: { StringValue: this.connection.environment, DataType: 'String' },
|
|
57
|
+
ErrorType: { StringValue: error.name || 'Unknown', DataType: 'String' },
|
|
58
|
+
Timestamp: { StringValue: new Date().toISOString(), DataType: 'String' }
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
await this.sqsClient.send(command);
|
|
63
|
+
logInfo('pegasus-sdk', `[sendToRetryQueue] Successfully sent bulk operation to SQS for retry: ${queueName}`);
|
|
64
|
+
return true;
|
|
65
|
+
} catch (sqsError) {
|
|
66
|
+
logError('pegasus-sdk', 'sendToRetryQueue', 'Failed to send to SQS retry queue', sqsError);
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
29
71
|
async bulkIndexFielded(documents) {
|
|
30
72
|
try {
|
|
31
73
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
|
|
@@ -82,7 +124,10 @@ class ChemicalsService {
|
|
|
82
124
|
updatedAt: new Date()
|
|
83
125
|
}
|
|
84
126
|
})
|
|
85
|
-
.returning(
|
|
127
|
+
.returning({
|
|
128
|
+
chemicalId: schema.chemicals.chemicalId,
|
|
129
|
+
sourceId: schema.chemicals.sourceId
|
|
130
|
+
});
|
|
86
131
|
|
|
87
132
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
|
|
88
133
|
results.push({ index: i, success: true, result });
|
|
@@ -744,6 +789,43 @@ class ChemicalsService {
|
|
|
744
789
|
};
|
|
745
790
|
} catch (error) {
|
|
746
791
|
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
|
|
792
|
+
|
|
793
|
+
const isTimeoutError = error.message && (
|
|
794
|
+
error.message.includes('timeout') ||
|
|
795
|
+
error.message.includes('Connection terminated') ||
|
|
796
|
+
error.code === 'ECONNREFUSED' ||
|
|
797
|
+
error.code === 'ETIMEDOUT'
|
|
798
|
+
);
|
|
799
|
+
|
|
800
|
+
if (isTimeoutError || error.message?.includes('Connection')) {
|
|
801
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Database timeout detected, attempting to queue for retry`);
|
|
802
|
+
|
|
803
|
+
const queueSuccess = await this.sendToRetryQueue({
|
|
804
|
+
type: 'bulk',
|
|
805
|
+
params,
|
|
806
|
+
cdiDocuments
|
|
807
|
+
}, error);
|
|
808
|
+
|
|
809
|
+
if (queueSuccess) {
|
|
810
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Successfully queued ${cdiDocuments.length} CDI documents for retry in SQS`);
|
|
811
|
+
return {
|
|
812
|
+
took: 1,
|
|
813
|
+
errors: false,
|
|
814
|
+
items: cdiDocuments.map(doc => ({
|
|
815
|
+
index: {
|
|
816
|
+
_index: 'chemical_data_index',
|
|
817
|
+
_id: doc.source_id,
|
|
818
|
+
status: 202,
|
|
819
|
+
result: 'accepted_for_retry',
|
|
820
|
+
_notes: 'Operation queued for retry due to database timeout'
|
|
821
|
+
}
|
|
822
|
+
}))
|
|
823
|
+
};
|
|
824
|
+
} else {
|
|
825
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Failed to queue operation for retry', error);
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
747
829
|
throw error;
|
|
748
830
|
}
|
|
749
831
|
},
|
package/lib/db/schema.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const { pgTable, uuid, text, jsonb, timestamp, index
|
|
1
|
+
const { pgTable, uuid, text, jsonb, timestamp, index } = require('drizzle-orm/pg-core');
|
|
2
2
|
const { sql } = require('drizzle-orm');
|
|
3
3
|
|
|
4
4
|
const chemicals = pgTable('chemicals', {
|
|
@@ -14,12 +14,11 @@ const chemicals = pgTable('chemicals', {
|
|
|
14
14
|
importedAt: timestamp('imported_at', { withTimezone: true }).defaultNow()
|
|
15
15
|
}, (table) => {
|
|
16
16
|
return {
|
|
17
|
-
sourceIdIdx: uniqueIndex('idx_chemicals_source_id').on(table.sourceId),
|
|
18
17
|
nameIdx: index('idx_chemicals_name').on(table.chemicalName),
|
|
19
18
|
createdAtIdx: index('idx_chemicals_created_at').on(table.createdAt),
|
|
20
19
|
updatedAtIdx: index('idx_chemicals_updated_at').on(table.updatedAt),
|
|
21
|
-
identifiersGinIdx: index('idx_chemicals_identifiers_gin').on(table.chemicalIdentifiers),
|
|
22
|
-
synonymsGinIdx: index('idx_chemicals_synonyms_gin').on(table.chemicalSynonyms)
|
|
20
|
+
identifiersGinIdx: index('idx_chemicals_identifiers_gin').using('gin').on(table.chemicalIdentifiers),
|
|
21
|
+
synonymsGinIdx: index('idx_chemicals_synonyms_gin').using('gin').on(table.chemicalSynonyms)
|
|
23
22
|
};
|
|
24
23
|
});
|
|
25
24
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@toxplanet/pegasus-sdk",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.12",
|
|
4
4
|
"description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"drizzle-orm": "^0.30.0",
|
|
30
30
|
"@opensearch-project/opensearch": "^2.5.0",
|
|
31
31
|
"@aws-sdk/client-secrets-manager": "^3.490.0",
|
|
32
|
+
"@aws-sdk/client-sqs": "^3.490.0",
|
|
32
33
|
"@aws-sdk/credential-providers": "^3.490.0"
|
|
33
34
|
},
|
|
34
35
|
"engines": {
|