@toxplanet/pegasus-sdk 1.1.10 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'acc',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '605134466764',
4
5
  secretName: 'arn:aws:secretsmanager:us-east-1:605134466764:secret:rds!cluster-9b502dde-5e2a-49db-b2c5-9801141ee40b-gkHbLm',
5
6
  openSearchEndpoint: 'https://1pbu0yqr197lq07hfcjh.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: false
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-acc',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'dev',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '292931567094',
4
5
  secretName: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
5
6
  openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: false
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-dev',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'prod',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '147997144422',
4
5
  secretName: 'rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d',
5
6
  openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: true
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-prod',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
@@ -1,6 +1,7 @@
1
1
  module.exports = {
2
2
  environment: 'qa',
3
3
  region: 'us-east-1',
4
+ awsAccountId: '147997144422',
4
5
  secretName: 'arn:aws:secretsmanager:us-east-1:147997144422:secret:rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d-oYjysU',
5
6
  openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
6
7
  openSearchIndex: 'chemicals',
@@ -19,6 +20,7 @@ module.exports = {
19
20
  rejectUnauthorized: true
20
21
  }
21
22
  },
23
+ bulkFailureQueueName: 'pegasus-bulk-failures-qa',
22
24
  indexRoutes: {
23
25
  chemicals: ['chemicals*'],
24
26
  documents: ['documents*'],
package/lib/chemicals.js CHANGED
@@ -1,6 +1,7 @@
1
1
  const { logError, logInfo } = require('@toxplanet/tphelper/logging');
2
2
  const { getDrizzle, schema } = require('./db');
3
3
  const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
4
+ const { SQSClient, SendMessageCommand } = require('@aws-sdk/client-sqs');
4
5
 
5
6
  const SEARCH_BOOST_EXACT_PRIMARY = 100;
6
7
  const SEARCH_BOOST_PREFIX_PRIMARY = 50;
@@ -17,6 +18,7 @@ class ChemicalsService {
17
18
  constructor(connection) {
18
19
  this.connection = connection;
19
20
  this.db = null;
21
+ this.sqsClient = null;
20
22
  }
21
23
 
22
24
  getDb() {
@@ -26,6 +28,46 @@ class ChemicalsService {
26
28
  return this.db;
27
29
  }
28
30
 
31
+ async sendToRetryQueue(operation, error) {
32
+ try {
33
+ const queueName = this.connection.config.bulkFailureQueueName || 'pegasus-bulk-failures';
34
+ const queueUrl = this.connection.config.bulkFailureQueueUrl ||
35
+ `https://sqs.${this.connection.region}.amazonaws.com/${this.connection.config.awsAccountId}/${queueName}`;
36
+
37
+ if (!this.sqsClient) {
38
+ this.sqsClient = new SQSClient({ region: this.connection.region });
39
+ }
40
+
41
+ const message = {
42
+ operation_type: operation.type || 'bulk',
43
+ timestamp: new Date().toISOString(),
44
+ environment: this.connection.environment,
45
+ error_message: error.message,
46
+ error_type: error.name,
47
+ bulk_params: operation.params,
48
+ cdi_documents: operation.cdiDocuments
49
+ };
50
+
51
+ const command = new SendMessageCommand({
52
+ QueueUrl: queueUrl,
53
+ MessageBody: JSON.stringify(message),
54
+ MessageAttributes: {
55
+ Operation: { StringValue: operation.type || 'bulk', DataType: 'String' },
56
+ Environment: { StringValue: this.connection.environment, DataType: 'String' },
57
+ ErrorType: { StringValue: error.name || 'Unknown', DataType: 'String' },
58
+ Timestamp: { StringValue: new Date().toISOString(), DataType: 'String' }
59
+ }
60
+ });
61
+
62
+ await this.sqsClient.send(command);
63
+ logInfo('pegasus-sdk', `[sendToRetryQueue] Successfully sent bulk operation to SQS for retry: ${queueName}`);
64
+ return true;
65
+ } catch (sqsError) {
66
+ logError('pegasus-sdk', 'sendToRetryQueue', 'Failed to send to SQS retry queue', sqsError);
67
+ return false;
68
+ }
69
+ }
70
+
29
71
  async bulkIndexFielded(documents) {
30
72
  try {
31
73
  logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
@@ -46,6 +88,13 @@ class ChemicalsService {
46
88
  logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
47
89
 
48
90
  try {
91
+ const parseDate = (dateValue) => {
92
+ if (!dateValue) return new Date();
93
+ if (dateValue instanceof Date) return dateValue;
94
+ if (typeof dateValue === 'string') return new Date(dateValue);
95
+ return new Date();
96
+ };
97
+
49
98
  const chemical = {
50
99
  sourceId: doc.source_id || doc._id,
51
100
  chemicalName: doc.chemical_name || doc.name,
@@ -53,8 +102,8 @@ class ChemicalsService {
53
102
  chemicalIdentifiers: doc.chemical_identifiers || {},
54
103
  chemicalSynonyms: doc.chemical_synonyms || [],
55
104
  chemicalCategories: doc.chemical_categories || [],
56
- createdAt: doc.created_at || new Date(),
57
- updatedAt: doc.updated_at || new Date(),
105
+ createdAt: parseDate(doc.created_at),
106
+ updatedAt: parseDate(doc.updated_at),
58
107
  ...(doc.imported_at && { importedAt: doc.imported_at }),
59
108
  ...(doc.chemical_id && { chemicalId: doc.chemical_id })
60
109
  };
@@ -75,7 +124,10 @@ class ChemicalsService {
75
124
  updatedAt: new Date()
76
125
  }
77
126
  })
78
- .returning();
127
+ .returning({
128
+ chemicalId: schema.chemicals.chemicalId,
129
+ sourceId: schema.chemicals.sourceId
130
+ });
79
131
 
80
132
  logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
81
133
  results.push({ index: i, success: true, result });
@@ -690,8 +742,8 @@ class ChemicalsService {
690
742
  chemical_identifiers: doc.chemical_identifiers || {},
691
743
  chemical_synonyms: doc.chemical_synonyms || [],
692
744
  chemical_categories: doc.chemical_categories || [],
693
- created_at: doc.chemical_created_at,
694
- updated_at: doc.chemical_updated_at
745
+ created_at: doc.chemical_created_at ? (typeof doc.chemical_created_at === 'string' ? new Date(doc.chemical_created_at) : doc.chemical_created_at) : new Date(),
746
+ updated_at: doc.chemical_updated_at ? (typeof doc.chemical_updated_at === 'string' ? new Date(doc.chemical_updated_at) : doc.chemical_updated_at) : new Date()
695
747
  };
696
748
  cdiDocuments.push(cdiDoc);
697
749
  logInfo('pegasus-sdk', `[ChemicalsService.bulk] Extracted CDI doc: ${JSON.stringify({ source_id: cdiDoc.source_id, chemical_name: cdiDoc.chemical_name })}`);
@@ -737,6 +789,43 @@ class ChemicalsService {
737
789
  };
738
790
  } catch (error) {
739
791
  logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
792
+
793
+ const isTimeoutError = error.message && (
794
+ error.message.includes('timeout') ||
795
+ error.message.includes('Connection terminated') ||
796
+ error.code === 'ECONNREFUSED' ||
797
+ error.code === 'ETIMEDOUT'
798
+ );
799
+
800
+ if (isTimeoutError || error.message?.includes('Connection')) {
801
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Database timeout detected, attempting to queue for retry`);
802
+
803
+ const queueSuccess = await this.sendToRetryQueue({
804
+ type: 'bulk',
805
+ params,
806
+ cdiDocuments
807
+ }, error);
808
+
809
+ if (queueSuccess) {
810
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Successfully queued ${cdiDocuments.length} CDI documents for retry in SQS`);
811
+ return {
812
+ took: 1,
813
+ errors: false,
814
+ items: cdiDocuments.map(doc => ({
815
+ index: {
816
+ _index: 'chemical_data_index',
817
+ _id: doc.source_id,
818
+ status: 202,
819
+ result: 'accepted_for_retry',
820
+ _notes: 'Operation queued for retry due to database timeout'
821
+ }
822
+ }))
823
+ };
824
+ } else {
825
+ logError('pegasus-sdk', 'ChemicalsService.bulk', 'Failed to queue operation for retry', error);
826
+ }
827
+ }
828
+
740
829
  throw error;
741
830
  }
742
831
  },
package/lib/db/schema.js CHANGED
@@ -1,4 +1,4 @@
1
- const { pgTable, uuid, text, jsonb, timestamp, index, uniqueIndex } = require('drizzle-orm/pg-core');
1
+ const { pgTable, uuid, text, jsonb, timestamp, index } = require('drizzle-orm/pg-core');
2
2
  const { sql } = require('drizzle-orm');
3
3
 
4
4
  const chemicals = pgTable('chemicals', {
@@ -14,12 +14,11 @@ const chemicals = pgTable('chemicals', {
14
14
  importedAt: timestamp('imported_at', { withTimezone: true }).defaultNow()
15
15
  }, (table) => {
16
16
  return {
17
- sourceIdIdx: uniqueIndex('idx_chemicals_source_id').on(table.sourceId),
18
17
  nameIdx: index('idx_chemicals_name').on(table.chemicalName),
19
18
  createdAtIdx: index('idx_chemicals_created_at').on(table.createdAt),
20
19
  updatedAtIdx: index('idx_chemicals_updated_at').on(table.updatedAt),
21
- identifiersGinIdx: index('idx_chemicals_identifiers_gin').on(table.chemicalIdentifiers),
22
- synonymsGinIdx: index('idx_chemicals_synonyms_gin').on(table.chemicalSynonyms)
20
+ identifiersGinIdx: index('idx_chemicals_identifiers_gin').using('gin').on(table.chemicalIdentifiers),
21
+ synonymsGinIdx: index('idx_chemicals_synonyms_gin').using('gin').on(table.chemicalSynonyms)
23
22
  };
24
23
  });
25
24
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.1.10",
3
+ "version": "1.1.12",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",
@@ -29,6 +29,7 @@
29
29
  "drizzle-orm": "^0.30.0",
30
30
  "@opensearch-project/opensearch": "^2.5.0",
31
31
  "@aws-sdk/client-secrets-manager": "^3.490.0",
32
+ "@aws-sdk/client-sqs": "^3.490.0",
32
33
  "@aws-sdk/credential-providers": "^3.490.0"
33
34
  },
34
35
  "engines": {