@toxplanet/pegasus-sdk 1.1.12 → 1.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ module.exports = {
2
2
  environment: 'acc',
3
3
  region: 'us-east-1',
4
4
  awsAccountId: '605134466764',
5
+ sourceService: 'pegasus-sdk',
5
6
  secretName: 'arn:aws:secretsmanager:us-east-1:605134466764:secret:rds!cluster-9b502dde-5e2a-49db-b2c5-9801141ee40b-gkHbLm',
6
7
  openSearchEndpoint: 'https://1pbu0yqr197lq07hfcjh.us-east-1.aoss.amazonaws.com',
7
8
  openSearchIndex: 'chemicals',
@@ -20,7 +21,6 @@ module.exports = {
20
21
  rejectUnauthorized: false
21
22
  }
22
23
  },
23
- bulkFailureQueueName: 'pegasus-bulk-failures-acc',
24
24
  indexRoutes: {
25
25
  chemicals: ['chemicals*'],
26
26
  documents: ['documents*'],
@@ -2,6 +2,7 @@ module.exports = {
2
2
  environment: 'dev',
3
3
  region: 'us-east-1',
4
4
  awsAccountId: '292931567094',
5
+ sourceService: 'pegasus-sdk',
5
6
  secretName: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
6
7
  openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
7
8
  openSearchIndex: 'chemicals',
@@ -20,7 +21,6 @@ module.exports = {
20
21
  rejectUnauthorized: false
21
22
  }
22
23
  },
23
- bulkFailureQueueName: 'pegasus-bulk-failures-dev',
24
24
  indexRoutes: {
25
25
  chemicals: ['chemicals*'],
26
26
  documents: ['documents*'],
@@ -2,6 +2,7 @@ module.exports = {
2
2
  environment: 'prod',
3
3
  region: 'us-east-1',
4
4
  awsAccountId: '147997144422',
5
+ sourceService: 'pegasus-sdk',
5
6
  secretName: 'rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d',
6
7
  openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
7
8
  openSearchIndex: 'chemicals',
@@ -20,7 +21,6 @@ module.exports = {
20
21
  rejectUnauthorized: true
21
22
  }
22
23
  },
23
- bulkFailureQueueName: 'pegasus-bulk-failures-prod',
24
24
  indexRoutes: {
25
25
  chemicals: ['chemicals*'],
26
26
  documents: ['documents*'],
@@ -2,6 +2,7 @@ module.exports = {
2
2
  environment: 'qa',
3
3
  region: 'us-east-1',
4
4
  awsAccountId: '147997144422',
5
+ sourceService: 'pegasus-sdk',
5
6
  secretName: 'arn:aws:secretsmanager:us-east-1:147997144422:secret:rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d-oYjysU',
6
7
  openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
7
8
  openSearchIndex: 'chemicals',
@@ -20,7 +21,6 @@ module.exports = {
20
21
  rejectUnauthorized: true
21
22
  }
22
23
  },
23
- bulkFailureQueueName: 'pegasus-bulk-failures-qa',
24
24
  indexRoutes: {
25
25
  chemicals: ['chemicals*'],
26
26
  documents: ['documents*'],
package/lib/chemicals.js CHANGED
@@ -28,50 +28,81 @@ class ChemicalsService {
28
28
  return this.db;
29
29
  }
30
30
 
31
- async sendToRetryQueue(operation, error) {
31
+ async sendSqlWriteFailure({ sql, parameters, error, retryCount, failedAt }) {
32
32
  try {
33
- const queueName = this.connection.config.bulkFailureQueueName || 'pegasus-bulk-failures';
34
- const queueUrl = this.connection.config.bulkFailureQueueUrl ||
35
- `https://sqs.${this.connection.region}.amazonaws.com/${this.connection.config.awsAccountId}/${queueName}`;
36
-
33
+ const region = process.env.AWS_REGION || this.connection.region;
34
+ const { awsAccountId, environment } = this.connection.config;
35
+ const defaultQueueUrl = awsAccountId
36
+ ? `https://sqs.${region}.amazonaws.com/${awsAccountId}/cr-pegasus-failed-items-${environment}`
37
+ : null;
38
+ const queueUrl = process.env.SQS_FAILED_ITEMS_QUEUE || defaultQueueUrl;
39
+
40
+ if (!queueUrl) {
41
+ logError('pegasus-sdk', 'sendSqlWriteFailure', 'No SQS queue URL available: set SQS_FAILED_ITEMS_QUEUE or provide awsAccountId in config');
42
+ return false;
43
+ }
44
+
45
+ logInfo('pegasus-sdk', `[sendSqlWriteFailure] Using queue: ${queueUrl}${process.env.SQS_FAILED_ITEMS_QUEUE ? ' (from env)' : ' (default)'}`);
46
+
37
47
  if (!this.sqsClient) {
38
- this.sqsClient = new SQSClient({ region: this.connection.region });
48
+ this.sqsClient = new SQSClient({ region });
39
49
  }
40
50
 
41
51
  const message = {
42
- operation_type: operation.type || 'bulk',
43
- timestamp: new Date().toISOString(),
44
- environment: this.connection.environment,
45
- error_message: error.message,
46
- error_type: error.name,
47
- bulk_params: operation.params,
48
- cdi_documents: operation.cdiDocuments
52
+ MessageType: 'SqlWriteFailure',
53
+ SourceService: this.connection.config.sourceService || 'pegasus-sdk',
54
+ Timestamp: (failedAt || new Date()).toISOString(),
55
+ Sql: sql,
56
+ Parameters: parameters,
57
+ OriginalError: error.message,
58
+ RetryCount: retryCount
49
59
  };
50
60
 
51
61
  const command = new SendMessageCommand({
52
62
  QueueUrl: queueUrl,
53
- MessageBody: JSON.stringify(message),
54
- MessageAttributes: {
55
- Operation: { StringValue: operation.type || 'bulk', DataType: 'String' },
56
- Environment: { StringValue: this.connection.environment, DataType: 'String' },
57
- ErrorType: { StringValue: error.name || 'Unknown', DataType: 'String' },
58
- Timestamp: { StringValue: new Date().toISOString(), DataType: 'String' }
59
- }
63
+ MessageBody: JSON.stringify(message)
60
64
  });
61
65
 
62
- await this.sqsClient.send(command);
63
- logInfo('pegasus-sdk', `[sendToRetryQueue] Successfully sent bulk operation to SQS for retry: ${queueName}`);
66
+ const response = await this.sqsClient.send(command);
67
+ logInfo('pegasus-sdk', `[sendSqlWriteFailure] SqlWriteFailure posted to SQS: MessageId=${response.MessageId}, RetryCount=${retryCount}`);
64
68
  return true;
65
69
  } catch (sqsError) {
66
- logError('pegasus-sdk', 'sendToRetryQueue', 'Failed to send to SQS retry queue', sqsError);
70
+ logError('pegasus-sdk', 'sendSqlWriteFailure', 'Failed to post SqlWriteFailure to SQS', sqsError);
67
71
  return false;
68
72
  }
69
73
  }
70
74
 
75
+ _buildChemicalUpsertSql(chemical) {
76
+ const sql = [
77
+ 'INSERT INTO chemicals (source_id, chemical_name, chemical_meta, chemical_identifiers, chemical_synonyms, chemical_categories, created_at, updated_at)',
78
+ 'VALUES (@source_id, @chemical_name, @chemical_meta, @chemical_identifiers, @chemical_synonyms, @chemical_categories, @created_at, @updated_at)',
79
+ 'ON CONFLICT (source_id) DO UPDATE SET',
80
+ ' chemical_name = @chemical_name,',
81
+ ' chemical_meta = @chemical_meta,',
82
+ ' chemical_identifiers = @chemical_identifiers,',
83
+ ' chemical_synonyms = @chemical_synonyms,',
84
+ ' chemical_categories = @chemical_categories,',
85
+ ' updated_at = @updated_at'
86
+ ].join('\n');
87
+
88
+ const parameters = {
89
+ '@source_id': chemical.sourceId,
90
+ '@chemical_name': chemical.chemicalName,
91
+ '@chemical_meta': chemical.chemicalMeta,
92
+ '@chemical_identifiers': chemical.chemicalIdentifiers,
93
+ '@chemical_synonyms': chemical.chemicalSynonyms,
94
+ '@chemical_categories': chemical.chemicalCategories,
95
+ '@created_at': chemical.createdAt instanceof Date ? chemical.createdAt.toISOString() : chemical.createdAt,
96
+ '@updated_at': chemical.updatedAt instanceof Date ? chemical.updatedAt.toISOString() : chemical.updatedAt
97
+ };
98
+
99
+ return { sql, parameters };
100
+ }
101
+
71
102
  async bulkIndexFielded(documents) {
72
103
  try {
73
104
  logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
74
-
105
+
75
106
  if (!documents || documents.length === 0) {
76
107
  logInfo('pegasus-sdk', `[bulkIndexFielded] No documents provided, returning empty result`);
77
108
  return { indexed: 0, errors: [], results: [] };
@@ -86,32 +117,37 @@ class ChemicalsService {
86
117
  for (let i = 0; i < documents.length; i++) {
87
118
  const doc = documents[i];
88
119
  logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
89
-
90
- try {
91
- const parseDate = (dateValue) => {
92
- if (!dateValue) return new Date();
93
- if (dateValue instanceof Date) return dateValue;
94
- if (typeof dateValue === 'string') return new Date(dateValue);
95
- return new Date();
96
- };
97
120
 
98
- const chemical = {
99
- sourceId: doc.source_id || doc._id,
100
- chemicalName: doc.chemical_name || doc.name,
101
- chemicalMeta: doc.chemical_meta || {},
102
- chemicalIdentifiers: doc.chemical_identifiers || {},
103
- chemicalSynonyms: doc.chemical_synonyms || [],
104
- chemicalCategories: doc.chemical_categories || [],
105
- createdAt: parseDate(doc.created_at),
106
- updatedAt: parseDate(doc.updated_at),
107
- ...(doc.imported_at && { importedAt: doc.imported_at }),
108
- ...(doc.chemical_id && { chemicalId: doc.chemical_id })
109
- };
121
+ const parseDate = (dateValue) => {
122
+ if (!dateValue) return new Date();
123
+ if (dateValue instanceof Date) return dateValue;
124
+ if (typeof dateValue === 'string') return new Date(dateValue);
125
+ return new Date();
126
+ };
127
+
128
+ const chemical = {
129
+ sourceId: doc.source_id || doc._id,
130
+ chemicalName: doc.chemical_name || doc.name,
131
+ chemicalMeta: doc.chemical_meta || {},
132
+ chemicalIdentifiers: doc.chemical_identifiers || {},
133
+ chemicalSynonyms: doc.chemical_synonyms || [],
134
+ chemicalCategories: doc.chemical_categories || [],
135
+ createdAt: parseDate(doc.created_at),
136
+ updatedAt: parseDate(doc.updated_at),
137
+ ...(doc.imported_at && { importedAt: doc.imported_at }),
138
+ ...(doc.chemical_id && { chemicalId: doc.chemical_id })
139
+ };
110
140
 
111
- logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
141
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
112
142
 
113
- const [result] = await db
114
- .insert(schema.chemicals)
143
+ const isConnectionError = (err) =>
144
+ err.message?.toLowerCase().includes('timeout') ||
145
+ err.message?.toLowerCase().includes('connection') ||
146
+ err.code === 'ECONNREFUSED' ||
147
+ err.code === 'ETIMEDOUT';
148
+
149
+ const attemptUpsert = () =>
150
+ db.insert(schema.chemicals)
115
151
  .values(chemical)
116
152
  .onConflictDoUpdate({
117
153
  target: schema.chemicals.sourceId,
@@ -129,18 +165,56 @@ class ChemicalsService {
129
165
  sourceId: schema.chemicals.sourceId
130
166
  });
131
167
 
168
+ let lastError = null;
169
+ let retryCount = 0;
170
+ const failedAt = new Date();
171
+
172
+ try {
173
+ const [result] = await attemptUpsert();
132
174
  logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
133
175
  results.push({ index: i, success: true, result });
134
- } catch (err) {
135
- logError('pegasus-sdk', 'bulkIndexFielded', `Error indexing document ${i}`, err);
136
- results.push({ index: i, success: false, error: err.message });
137
- errors.push({ document: doc, error: err.message });
176
+ continue;
177
+ } catch (firstErr) {
178
+ lastError = firstErr;
179
+ if (!isConnectionError(firstErr)) {
180
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} first attempt failed (${firstErr.message}), retrying once`);
181
+ try {
182
+ const [result] = await attemptUpsert();
183
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully on retry: ${result?.chemicalId || 'no ID returned'}`);
184
+ results.push({ index: i, success: true, result });
185
+ continue;
186
+ } catch (retryErr) {
187
+ lastError = retryErr;
188
+ retryCount = 1;
189
+ }
190
+ }
191
+ }
192
+
193
+ logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} failed after ${retryCount} local retries (source_id=${chemical.sourceId})`, lastError);
194
+
195
+ const { sql: failureSql, parameters: failureParams } = this._buildChemicalUpsertSql(chemical);
196
+ const queued = await this.sendSqlWriteFailure({
197
+ sql: failureSql,
198
+ parameters: failureParams,
199
+ error: lastError,
200
+ retryCount,
201
+ failedAt
202
+ });
203
+
204
+ if (queued) {
205
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} (source_id=${chemical.sourceId}) queued for repair via SQS`);
206
+ } else {
207
+ logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} (source_id=${chemical.sourceId}) failed and could not be queued — data loss risk`, lastError);
138
208
  }
209
+
210
+ results.push({ index: i, success: false, error: lastError.message, queued });
211
+ errors.push({ document: doc, error: lastError.message, queued });
139
212
  }
140
213
 
141
214
  const successCount = results.filter(r => r.success).length;
142
- logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${errors.length} errors`);
143
-
215
+ const queuedCount = results.filter(r => !r.success && r.queued).length;
216
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${queuedCount} queued for repair, ${errors.length - queuedCount} unhandled errors`);
217
+
144
218
  return { indexed: successCount, errors, results };
145
219
  } catch (error) {
146
220
  logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
@@ -789,43 +863,6 @@ class ChemicalsService {
789
863
  };
790
864
  } catch (error) {
791
865
  logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
792
-
793
- const isTimeoutError = error.message && (
794
- error.message.includes('timeout') ||
795
- error.message.includes('Connection terminated') ||
796
- error.code === 'ECONNREFUSED' ||
797
- error.code === 'ETIMEDOUT'
798
- );
799
-
800
- if (isTimeoutError || error.message?.includes('Connection')) {
801
- logInfo('pegasus-sdk', `[ChemicalsService.bulk] Database timeout detected, attempting to queue for retry`);
802
-
803
- const queueSuccess = await this.sendToRetryQueue({
804
- type: 'bulk',
805
- params,
806
- cdiDocuments
807
- }, error);
808
-
809
- if (queueSuccess) {
810
- logInfo('pegasus-sdk', `[ChemicalsService.bulk] Successfully queued ${cdiDocuments.length} CDI documents for retry in SQS`);
811
- return {
812
- took: 1,
813
- errors: false,
814
- items: cdiDocuments.map(doc => ({
815
- index: {
816
- _index: 'chemical_data_index',
817
- _id: doc.source_id,
818
- status: 202,
819
- result: 'accepted_for_retry',
820
- _notes: 'Operation queued for retry due to database timeout'
821
- }
822
- }))
823
- };
824
- } else {
825
- logError('pegasus-sdk', 'ChemicalsService.bulk', 'Failed to queue operation for retry', error);
826
- }
827
- }
828
-
829
866
  throw error;
830
867
  }
831
868
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.1.12",
3
+ "version": "1.1.14",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",