@toxplanet/pegasus-sdk 1.1.11 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'acc',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '605134466764',
4
5
  secretName: 'arn:aws:secretsmanager:us-east-1:605134466764:secret:rds!cluster-9b502dde-5e2a-49db-b2c5-9801141ee40b-gkHbLm',
5
6
  openSearchEndpoint: 'https://1pbu0yqr197lq07hfcjh.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: false
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-acc',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'dev',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '292931567094',
4
5
  secretName: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
5
6
  openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: false
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-dev',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'prod',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '147997144422',
4
5
  secretName: 'rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d',
5
6
  openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: true
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-prod',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'qa',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '147997144422',
4
5
  secretName: 'arn:aws:secretsmanager:us-east-1:147997144422:secret:rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d-oYjysU',
5
6
  openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: true
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-qa',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
package/lib/chemicals.js CHANGED
@@ -1,6 +1,7 @@
1
1
  const { logError, logInfo } = require('@toxplanet/tphelper/logging');
2
2
  const { getDrizzle, schema } = require('./db');
3
3
  const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
4
+ const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
4
5
 
5
6
  const SEARCH_BOOST_EXACT_PRIMARY = 100;
6
7
  const SEARCH_BOOST_PREFIX_PRIMARY = 50;
@@ -17,6 +18,7 @@ class ChemicalsService {
17
18
  constructor(connection) {
18
19
  this.connection = connection;
19
20
  this.db = null;
21
+ this.sqsClient = null;
20
22
  }
21
23
 
22
24
  getDb() {
@@ -26,6 +28,46 @@ class ChemicalsService {
26
28
  return this.db;
27
29
  }
28
30
 
31
+ async sendToRetryQueue(operation, error) {
32
+ try {
33
+ const queueName = this.connection.config.bulkFailureQueueName || 'pegasus-bulk-failures';
34
+ const queueUrl = this.connection.config.bulkFailureQueueUrl ||
35
+ `https://sqs.${this.connection.region}.amazonaws.com/${this.connection.config.awsAccountId}/${queueName}`;
36
+
37
+ if (!this.sqsClient) {
38
+ this.sqsClient = new SQSClient({ region: this.connection.region });
39
+ }
40
+
41
+ const message = {
42
+ operation_type: operation.type || 'bulk',
43
+ timestamp: new Date().toISOString(),
44
+ environment: this.connection.environment,
45
+ error_message: error.message,
46
+ error_type: error.name,
47
+ bulk_params: operation.params,
48
+ cdi_documents: operation.cdiDocuments
49
+ };
50
+
51
+ const command = new SendMessageCommand({
52
+ QueueUrl: queueUrl,
53
+ MessageBody: JSON.stringify(message),
54
+ MessageAttributes: {
55
+ Operation: { StringValue: operation.type || 'bulk', DataType: 'String' },
56
+ Environment: { StringValue: this.connection.environment, DataType: 'String' },
57
+ ErrorType: { StringValue: error.name || 'Unknown', DataType: 'String' },
58
+ Timestamp: { StringValue: new Date().toISOString(), DataType: 'String' }
59
+ }
60
+ });
61
+
62
+ await this.sqsClient.send(command);
63
+ logInfo('pegasus-sdk', `[sendToRetryQueue] Successfully sent bulk operation to SQS for retry: ${queueName}`);
64
+ return true;
65
+ } catch (sqsError) {
66
+ logError('pegasus-sdk', 'sendToRetryQueue', 'Failed to send to SQS retry queue', sqsError);
67
+ return false;
68
+ }
69
+ }
70
+
29
71
  async bulkIndexFielded(documents) {
30
72
  try {
31
73
  logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
@@ -82,7 +124,10 @@ class ChemicalsService {
82
124
  updatedAt: new Date()
83
125
  }
84
126
  })
85
- .returning();
127
+ .returning({
128
+ chemicalId: schema.chemicals.chemicalId,
129
+ sourceId: schema.chemicals.sourceId
130
+ });
86
131
 
87
132
  logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
88
133
  results.push({ index: i, success: true, result });
@@ -744,6 +789,43 @@ class ChemicalsService {
744
789
  };
745
790
  } catch (error) {
746
791
  logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
792
+
793
+ const isTimeoutError = error.message && (
794
+ error.message.includes('timeout') ||
795
+ error.message.includes('Connection terminated') ||
796
+ error.code === 'ECONNREFUSED' ||
797
+ error.code === 'ETIMEDOUT'
798
+ );
799
+
800
+ if (isTimeoutError || error.message?.includes('Connection')) {
801
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Database timeout detected, attempting to queue for retry`);
802
+
803
+ const queueSuccess = await this.sendToRetryQueue({
804
+ type: 'bulk',
805
+ params,
806
+ cdiDocuments
807
+ }, error);
808
+
809
+ if (queueSuccess) {
810
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Successfully queued ${cdiDocuments.length} CDI documents for retry in SQS`);
811
+ return {
812
+ took: 1,
813
+ errors: false,
814
+ items: cdiDocuments.map(doc => ({
815
+ index: {
816
+ _index: 'chemical_data_index',
817
+ _id: doc.source_id,
818
+ status: 202,
819
+ result: 'accepted_for_retry',
820
+ _notes: 'Operation queued for retry due to database timeout'
821
+ }
822
+ }))
823
+ };
824
+ } else {
825
+ logError('pegasus-sdk', 'ChemicalsService.bulk', 'Failed to queue operation for retry', error);
826
+ }
827
+ }
828
+
747
829
  throw error;
748
830
  }
749
831
  },
package/lib/db/schema.js CHANGED
@@ -1,4 +1,4 @@
1
- const { pgTable, uuid, text, jsonb, timestamp, index, uniqueIndex } = require('drizzle-orm/pg-core');
1
+ const { pgTable, uuid, text, jsonb, timestamp, index } = require('drizzle-orm/pg-core');
2
2
  const { sql } = require('drizzle-orm');
3
3
 
4
4
  const chemicals = pgTable('chemicals', {
@@ -14,12 +14,11 @@ const chemicals = pgTable('chemicals', {
14
14
  importedAt: timestamp('imported_at', { withTimezone: true }).defaultNow()
15
15
  }, (table) => {
16
16
  return {
17
- sourceIdIdx: uniqueIndex('idx_chemicals_source_id').on(table.sourceId),
18
17
  nameIdx: index('idx_chemicals_name').on(table.chemicalName),
19
18
  createdAtIdx: index('idx_chemicals_created_at').on(table.createdAt),
20
19
  updatedAtIdx: index('idx_chemicals_updated_at').on(table.updatedAt),
21
- identifiersGinIdx: index('idx_chemicals_identifiers_gin').on(table.chemicalIdentifiers),
22
- synonymsGinIdx: index('idx_chemicals_synonyms_gin').on(table.chemicalSynonyms)
20
+ identifiersGinIdx: index('idx_chemicals_identifiers_gin').using('gin').on(table.chemicalIdentifiers),
21
+ synonymsGinIdx: index('idx_chemicals_synonyms_gin').using('gin').on(table.chemicalSynonyms)
23
22
  };
24
23
  });
25
24
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.1.11",
3
+ "version": "1.1.12",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",
@@ -29,6 +29,7 @@
29
29
  "drizzle-orm": "^0.30.0",
30
30
  "@opensearch-project/opensearch": "^2.5.0",
31
31
  "@aws-sdk/client-secrets-manager": "^3.490.0",
32
+ "@aws-sdk/client-sqs": "^3.490.0",
32
33
  "@aws-sdk/credential-providers": "^3.490.0"
33
34
  },
34
35
  "engines": {