@toxplanet/pegasus-sdk 1.1.13 → 1.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/environment.acc.js +1 -1
- package/config/environment.dev.js +1 -1
- package/config/environment.prod.js +1 -1
- package/config/environment.qa.js +1 -1
- package/lib/chemicals.js +127 -90
- package/package.json +1 -1
|
@@ -2,6 +2,7 @@ module.exports = {
|
|
|
2
2
|
environment: 'acc',
|
|
3
3
|
region: 'us-east-1',
|
|
4
4
|
awsAccountId: '605134466764',
|
|
5
|
+
sourceService: 'pegasus-sdk',
|
|
5
6
|
secretName: 'arn:aws:secretsmanager:us-east-1:605134466764:secret:rds!cluster-9b502dde-5e2a-49db-b2c5-9801141ee40b-gkHbLm',
|
|
6
7
|
openSearchEndpoint: 'https://1pbu0yqr197lq07hfcjh.us-east-1.aoss.amazonaws.com',
|
|
7
8
|
openSearchIndex: 'chemicals',
|
|
@@ -20,7 +21,6 @@ module.exports = {
|
|
|
20
21
|
rejectUnauthorized: false
|
|
21
22
|
}
|
|
22
23
|
},
|
|
23
|
-
bulkFailureQueueName: 'pegasus-bulk-failures-acc',
|
|
24
24
|
indexRoutes: {
|
|
25
25
|
chemicals: ['chemicals*'],
|
|
26
26
|
documents: ['documents*'],
|
|
@@ -2,6 +2,7 @@ module.exports = {
|
|
|
2
2
|
environment: 'dev',
|
|
3
3
|
region: 'us-east-1',
|
|
4
4
|
awsAccountId: '292931567094',
|
|
5
|
+
sourceService: 'pegasus-sdk',
|
|
5
6
|
secretName: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
|
|
6
7
|
openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
|
|
7
8
|
openSearchIndex: 'chemicals',
|
|
@@ -20,7 +21,6 @@ module.exports = {
|
|
|
20
21
|
rejectUnauthorized: false
|
|
21
22
|
}
|
|
22
23
|
},
|
|
23
|
-
bulkFailureQueueName: 'cr-pegasus-failed-items-dev',
|
|
24
24
|
indexRoutes: {
|
|
25
25
|
chemicals: ['chemicals*'],
|
|
26
26
|
documents: ['documents*'],
|
|
@@ -2,6 +2,7 @@ module.exports = {
|
|
|
2
2
|
environment: 'prod',
|
|
3
3
|
region: 'us-east-1',
|
|
4
4
|
awsAccountId: '147997144422',
|
|
5
|
+
sourceService: 'pegasus-sdk',
|
|
5
6
|
secretName: 'rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d',
|
|
6
7
|
openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
|
|
7
8
|
openSearchIndex: 'chemicals',
|
|
@@ -20,7 +21,6 @@ module.exports = {
|
|
|
20
21
|
rejectUnauthorized: true
|
|
21
22
|
}
|
|
22
23
|
},
|
|
23
|
-
bulkFailureQueueName: 'pegasus-bulk-failures-prod',
|
|
24
24
|
indexRoutes: {
|
|
25
25
|
chemicals: ['chemicals*'],
|
|
26
26
|
documents: ['documents*'],
|
package/config/environment.qa.js
CHANGED
|
@@ -2,6 +2,7 @@ module.exports = {
|
|
|
2
2
|
environment: 'qa',
|
|
3
3
|
region: 'us-east-1',
|
|
4
4
|
awsAccountId: '147997144422',
|
|
5
|
+
sourceService: 'pegasus-sdk',
|
|
5
6
|
secretName: 'arn:aws:secretsmanager:us-east-1:147997144422:secret:rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d-oYjysU',
|
|
6
7
|
openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
|
|
7
8
|
openSearchIndex: 'chemicals',
|
|
@@ -20,7 +21,6 @@ module.exports = {
|
|
|
20
21
|
rejectUnauthorized: true
|
|
21
22
|
}
|
|
22
23
|
},
|
|
23
|
-
bulkFailureQueueName: 'pegasus-bulk-failures-qa',
|
|
24
24
|
indexRoutes: {
|
|
25
25
|
chemicals: ['chemicals*'],
|
|
26
26
|
documents: ['documents*'],
|
package/lib/chemicals.js
CHANGED
|
@@ -28,50 +28,81 @@ class ChemicalsService {
|
|
|
28
28
|
return this.db;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
-
async
|
|
31
|
+
async sendSqlWriteFailure({ sql, parameters, error, retryCount, failedAt }) {
|
|
32
32
|
try {
|
|
33
|
-
const
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
const region = process.env.AWS_REGION || this.connection.region;
|
|
34
|
+
const { awsAccountId, environment } = this.connection.config;
|
|
35
|
+
const defaultQueueUrl = awsAccountId
|
|
36
|
+
? `https://sqs.${region}.amazonaws.com/${awsAccountId}/cr-pegasus-failed-items-${environment}`
|
|
37
|
+
: null;
|
|
38
|
+
const queueUrl = process.env.SQS_FAILED_ITEMS_QUEUE || defaultQueueUrl;
|
|
39
|
+
|
|
40
|
+
if (!queueUrl) {
|
|
41
|
+
logError('pegasus-sdk', 'sendSqlWriteFailure', 'No SQS queue URL available: set SQS_FAILED_ITEMS_QUEUE or provide awsAccountId in config');
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
logInfo('pegasus-sdk', `[sendSqlWriteFailure] Using queue: ${queueUrl}${process.env.SQS_FAILED_ITEMS_QUEUE ? ' (from env)' : ' (default)'}`);
|
|
46
|
+
|
|
37
47
|
if (!this.sqsClient) {
|
|
38
|
-
this.sqsClient = new SQSClient({ region
|
|
48
|
+
this.sqsClient = new SQSClient({ region });
|
|
39
49
|
}
|
|
40
50
|
|
|
41
51
|
const message = {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
52
|
+
MessageType: 'SqlWriteFailure',
|
|
53
|
+
SourceService: this.connection.config.sourceService || 'pegasus-sdk',
|
|
54
|
+
Timestamp: (failedAt || new Date()).toISOString(),
|
|
55
|
+
Sql: sql,
|
|
56
|
+
Parameters: parameters,
|
|
57
|
+
OriginalError: error.message,
|
|
58
|
+
RetryCount: retryCount
|
|
49
59
|
};
|
|
50
60
|
|
|
51
61
|
const command = new SendMessageCommand({
|
|
52
62
|
QueueUrl: queueUrl,
|
|
53
|
-
MessageBody: JSON.stringify(message)
|
|
54
|
-
MessageAttributes: {
|
|
55
|
-
Operation: { StringValue: operation.type || 'bulk', DataType: 'String' },
|
|
56
|
-
Environment: { StringValue: this.connection.environment, DataType: 'String' },
|
|
57
|
-
ErrorType: { StringValue: error.name || 'Unknown', DataType: 'String' },
|
|
58
|
-
Timestamp: { StringValue: new Date().toISOString(), DataType: 'String' }
|
|
59
|
-
}
|
|
63
|
+
MessageBody: JSON.stringify(message)
|
|
60
64
|
});
|
|
61
65
|
|
|
62
|
-
await this.sqsClient.send(command);
|
|
63
|
-
logInfo('pegasus-sdk', `[
|
|
66
|
+
const response = await this.sqsClient.send(command);
|
|
67
|
+
logInfo('pegasus-sdk', `[sendSqlWriteFailure] SqlWriteFailure posted to SQS: MessageId=${response.MessageId}, RetryCount=${retryCount}`);
|
|
64
68
|
return true;
|
|
65
69
|
} catch (sqsError) {
|
|
66
|
-
logError('pegasus-sdk', '
|
|
70
|
+
logError('pegasus-sdk', 'sendSqlWriteFailure', 'Failed to post SqlWriteFailure to SQS', sqsError);
|
|
67
71
|
return false;
|
|
68
72
|
}
|
|
69
73
|
}
|
|
70
74
|
|
|
75
|
+
_buildChemicalUpsertSql(chemical) {
|
|
76
|
+
const sql = [
|
|
77
|
+
'INSERT INTO chemicals (source_id, chemical_name, chemical_meta, chemical_identifiers, chemical_synonyms, chemical_categories, created_at, updated_at)',
|
|
78
|
+
'VALUES (@source_id, @chemical_name, @chemical_meta, @chemical_identifiers, @chemical_synonyms, @chemical_categories, @created_at, @updated_at)',
|
|
79
|
+
'ON CONFLICT (source_id) DO UPDATE SET',
|
|
80
|
+
' chemical_name = @chemical_name,',
|
|
81
|
+
' chemical_meta = @chemical_meta,',
|
|
82
|
+
' chemical_identifiers = @chemical_identifiers,',
|
|
83
|
+
' chemical_synonyms = @chemical_synonyms,',
|
|
84
|
+
' chemical_categories = @chemical_categories,',
|
|
85
|
+
' updated_at = @updated_at'
|
|
86
|
+
].join('\n');
|
|
87
|
+
|
|
88
|
+
const parameters = {
|
|
89
|
+
'@source_id': chemical.sourceId,
|
|
90
|
+
'@chemical_name': chemical.chemicalName,
|
|
91
|
+
'@chemical_meta': chemical.chemicalMeta,
|
|
92
|
+
'@chemical_identifiers': chemical.chemicalIdentifiers,
|
|
93
|
+
'@chemical_synonyms': chemical.chemicalSynonyms,
|
|
94
|
+
'@chemical_categories': chemical.chemicalCategories,
|
|
95
|
+
'@created_at': chemical.createdAt instanceof Date ? chemical.createdAt.toISOString() : chemical.createdAt,
|
|
96
|
+
'@updated_at': chemical.updatedAt instanceof Date ? chemical.updatedAt.toISOString() : chemical.updatedAt
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
return { sql, parameters };
|
|
100
|
+
}
|
|
101
|
+
|
|
71
102
|
async bulkIndexFielded(documents) {
|
|
72
103
|
try {
|
|
73
104
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
|
|
74
|
-
|
|
105
|
+
|
|
75
106
|
if (!documents || documents.length === 0) {
|
|
76
107
|
logInfo('pegasus-sdk', `[bulkIndexFielded] No documents provided, returning empty result`);
|
|
77
108
|
return { indexed: 0, errors: [], results: [] };
|
|
@@ -86,32 +117,37 @@ class ChemicalsService {
|
|
|
86
117
|
for (let i = 0; i < documents.length; i++) {
|
|
87
118
|
const doc = documents[i];
|
|
88
119
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
|
|
89
|
-
|
|
90
|
-
try {
|
|
91
|
-
const parseDate = (dateValue) => {
|
|
92
|
-
if (!dateValue) return new Date();
|
|
93
|
-
if (dateValue instanceof Date) return dateValue;
|
|
94
|
-
if (typeof dateValue === 'string') return new Date(dateValue);
|
|
95
|
-
return new Date();
|
|
96
|
-
};
|
|
97
120
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
121
|
+
const parseDate = (dateValue) => {
|
|
122
|
+
if (!dateValue) return new Date();
|
|
123
|
+
if (dateValue instanceof Date) return dateValue;
|
|
124
|
+
if (typeof dateValue === 'string') return new Date(dateValue);
|
|
125
|
+
return new Date();
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const chemical = {
|
|
129
|
+
sourceId: doc.source_id || doc._id,
|
|
130
|
+
chemicalName: doc.chemical_name || doc.name,
|
|
131
|
+
chemicalMeta: doc.chemical_meta || {},
|
|
132
|
+
chemicalIdentifiers: doc.chemical_identifiers || {},
|
|
133
|
+
chemicalSynonyms: doc.chemical_synonyms || [],
|
|
134
|
+
chemicalCategories: doc.chemical_categories || [],
|
|
135
|
+
createdAt: parseDate(doc.created_at),
|
|
136
|
+
updatedAt: parseDate(doc.updated_at),
|
|
137
|
+
...(doc.imported_at && { importedAt: doc.imported_at }),
|
|
138
|
+
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
139
|
+
};
|
|
110
140
|
|
|
111
|
-
|
|
141
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
|
|
112
142
|
|
|
113
|
-
|
|
114
|
-
|
|
143
|
+
const isConnectionError = (err) =>
|
|
144
|
+
err.message?.toLowerCase().includes('timeout') ||
|
|
145
|
+
err.message?.toLowerCase().includes('connection') ||
|
|
146
|
+
err.code === 'ECONNREFUSED' ||
|
|
147
|
+
err.code === 'ETIMEDOUT';
|
|
148
|
+
|
|
149
|
+
const attemptUpsert = () =>
|
|
150
|
+
db.insert(schema.chemicals)
|
|
115
151
|
.values(chemical)
|
|
116
152
|
.onConflictDoUpdate({
|
|
117
153
|
target: schema.chemicals.sourceId,
|
|
@@ -129,18 +165,56 @@ class ChemicalsService {
|
|
|
129
165
|
sourceId: schema.chemicals.sourceId
|
|
130
166
|
});
|
|
131
167
|
|
|
168
|
+
let lastError = null;
|
|
169
|
+
let retryCount = 0;
|
|
170
|
+
const failedAt = new Date();
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
const [result] = await attemptUpsert();
|
|
132
174
|
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
|
|
133
175
|
results.push({ index: i, success: true, result });
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
176
|
+
continue;
|
|
177
|
+
} catch (firstErr) {
|
|
178
|
+
lastError = firstErr;
|
|
179
|
+
if (!isConnectionError(firstErr)) {
|
|
180
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} first attempt failed (${firstErr.message}), retrying once`);
|
|
181
|
+
try {
|
|
182
|
+
const [result] = await attemptUpsert();
|
|
183
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully on retry: ${result?.chemicalId || 'no ID returned'}`);
|
|
184
|
+
results.push({ index: i, success: true, result });
|
|
185
|
+
continue;
|
|
186
|
+
} catch (retryErr) {
|
|
187
|
+
lastError = retryErr;
|
|
188
|
+
retryCount = 1;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} failed after ${retryCount} local retries (source_id=${chemical.sourceId})`, lastError);
|
|
194
|
+
|
|
195
|
+
const { sql: failureSql, parameters: failureParams } = this._buildChemicalUpsertSql(chemical);
|
|
196
|
+
const queued = await this.sendSqlWriteFailure({
|
|
197
|
+
sql: failureSql,
|
|
198
|
+
parameters: failureParams,
|
|
199
|
+
error: lastError,
|
|
200
|
+
retryCount,
|
|
201
|
+
failedAt
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
if (queued) {
|
|
205
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} (source_id=${chemical.sourceId}) queued for repair via SQS`);
|
|
206
|
+
} else {
|
|
207
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Document ${i} (source_id=${chemical.sourceId}) failed and could not be queued — data loss risk`, lastError);
|
|
138
208
|
}
|
|
209
|
+
|
|
210
|
+
results.push({ index: i, success: false, error: lastError.message, queued });
|
|
211
|
+
errors.push({ document: doc, error: lastError.message, queued });
|
|
139
212
|
}
|
|
140
213
|
|
|
141
214
|
const successCount = results.filter(r => r.success).length;
|
|
142
|
-
|
|
143
|
-
|
|
215
|
+
const queuedCount = results.filter(r => !r.success && r.queued).length;
|
|
216
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${queuedCount} queued for repair, ${errors.length - queuedCount} unhandled errors`);
|
|
217
|
+
|
|
144
218
|
return { indexed: successCount, errors, results };
|
|
145
219
|
} catch (error) {
|
|
146
220
|
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
|
|
@@ -789,43 +863,6 @@ class ChemicalsService {
|
|
|
789
863
|
};
|
|
790
864
|
} catch (error) {
|
|
791
865
|
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
|
|
792
|
-
|
|
793
|
-
const isTimeoutError = error.message && (
|
|
794
|
-
error.message.includes('timeout') ||
|
|
795
|
-
error.message.includes('Connection terminated') ||
|
|
796
|
-
error.code === 'ECONNREFUSED' ||
|
|
797
|
-
error.code === 'ETIMEDOUT'
|
|
798
|
-
);
|
|
799
|
-
|
|
800
|
-
if (isTimeoutError || error.message?.includes('Connection')) {
|
|
801
|
-
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Database timeout detected, attempting to queue for retry`);
|
|
802
|
-
|
|
803
|
-
const queueSuccess = await this.sendToRetryQueue({
|
|
804
|
-
type: 'bulk',
|
|
805
|
-
params,
|
|
806
|
-
cdiDocuments
|
|
807
|
-
}, error);
|
|
808
|
-
|
|
809
|
-
if (queueSuccess) {
|
|
810
|
-
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Successfully queued ${cdiDocuments.length} CDI documents for retry in SQS`);
|
|
811
|
-
return {
|
|
812
|
-
took: 1,
|
|
813
|
-
errors: false,
|
|
814
|
-
items: cdiDocuments.map(doc => ({
|
|
815
|
-
index: {
|
|
816
|
-
_index: 'chemical_data_index',
|
|
817
|
-
_id: doc.source_id,
|
|
818
|
-
status: 202,
|
|
819
|
-
result: 'accepted_for_retry',
|
|
820
|
-
_notes: 'Operation queued for retry due to database timeout'
|
|
821
|
-
}
|
|
822
|
-
}))
|
|
823
|
-
};
|
|
824
|
-
} else {
|
|
825
|
-
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Failed to queue operation for retry', error);
|
|
826
|
-
}
|
|
827
|
-
}
|
|
828
|
-
|
|
829
866
|
throw error;
|
|
830
867
|
}
|
|
831
868
|
},
|
package/package.json
CHANGED