@toxplanet/pegasus-sdk 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/connection.js CHANGED
@@ -1,208 +1,217 @@
1
- const { Client: OpenSearchClient } = require('@opensearch-project/opensearch');
2
- const { RDSDataClient, ExecuteStatementCommand } = require('@aws-sdk/client-rds-data');
3
- const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws');
4
- const { fromNodeProviderChain } = require('@aws-sdk/credential-providers');
5
- const { mapRecords } = require('./db');
6
- const { loadConfig } = require('../config');
7
- const { logInfo, logError } = require('@toxplanet/tphelper/logging');
8
-
9
- class PegasusConnection {
10
- constructor(config = {}) {
11
- const envConfig = loadConfig(config.environment);
12
-
13
- this.config = { ...envConfig, ...config };
14
- this.environment = this.config.environment;
15
- this.region = this.config.region;
16
- this.secretArn = this.config.secretArn;
17
- this.clusterArn = this.config.clusterArn;
18
- this.openSearchEndpoint = this.config.openSearchEndpoint;
19
- this.openSearchIndex = this.config.openSearchIndex;
20
- this.databaseName = this.config.database?.name;
21
-
22
- this.rdsDataClient = null;
23
- this.osClient = null;
24
- this.isConnected = false;
25
- }
26
-
27
- async connect() {
28
- if (this.isConnected) {
29
- return;
30
- }
31
-
32
- this.rdsDataClient = new RDSDataClient({ region: this.region });
33
-
34
- logInfo('pegasus-sdk', 'RDS Data API client initialized');
35
-
36
- try {
37
- const command = new ExecuteStatementCommand({
38
- resourceArn: this.clusterArn,
39
- secretArn: this.secretArn,
40
- database: this.databaseName,
41
- sql: 'SELECT 1'
42
- });
43
- await this.rdsDataClient.send(command);
44
- logInfo('pegasus-sdk', 'RDS Data API connection verified and ready');
45
- } catch (err) {
46
- logError('pegasus-sdk', 'PegasusConnection', 'connect.verification', err);
47
- throw err;
48
- }
49
-
50
- if (this.openSearchEndpoint) {
51
- this.osClient = new OpenSearchClient({
52
- ...AwsSigv4Signer({
53
- region: this.region,
54
- service: 'aoss',
55
- getCredentials: () => {
56
- const credentialsProvider = fromNodeProviderChain();
57
- return credentialsProvider();
58
- }
59
- }),
60
- node: this.openSearchEndpoint
61
- });
62
- }
63
-
64
- this.isConnected = true;
65
- }
66
-
67
- /**
68
- * With the RDS Data API, there are no stale connections or pool idleness.
69
- * Each call is stateless. Just ensure the client is initialized.
70
- *
71
- * @returns {Promise<boolean>} true if a connect happened, false if already connected.
72
- */
73
- async ensureConnected() {
74
- if (!this.isConnected) {
75
- await this.connect();
76
- return true;
77
- }
78
- return false;
79
- }
80
-
81
- /**
82
- * Data API is stateless, so reconnect simply means re-initializing.
83
- * Called in fallback scenarios but not necessary for stale connections.
84
- */
85
- async reconnect() {
86
- logInfo('pegasus-sdk', 'Reconnecting RDS Data API client');
87
- this.isConnected = false;
88
- await this.connect();
89
- }
90
-
91
- /**
92
- * With stateless Data API, there is no meaningful "activity" tracking needed.
93
- * This is a no-op for backward compatibility.
94
- */
95
- recordActivity() {
96
- }
97
-
98
- async disconnect() {
99
- if (!this.isConnected) {
100
- return;
101
- }
102
-
103
- this.rdsDataClient = null;
104
- this.osClient = null;
105
- this.isConnected = false;
106
- logInfo('pegasus-sdk', 'RDS Data API client disconnected');
107
- }
108
-
109
- getOpenSearchClient() {
110
- if (!this.osClient) {
111
- throw new Error('OpenSearch connection not established. Call connect() first or provide openSearchEndpoint.');
112
- }
113
- return this.osClient;
114
- }
115
-
116
- getOpenSearchIndex() {
117
- return this.openSearchIndex || 'chemicals';
118
- }
119
-
120
- async testConnection() {
121
- try {
122
- if (!this.rdsDataClient) {
123
- throw new Error('RDS Data API not initialized');
124
- }
125
-
126
- const command = new ExecuteStatementCommand({
127
- resourceArn: this.clusterArn,
128
- secretArn: this.secretArn,
129
- database: this.databaseName,
130
- sql: 'SELECT NOW() as current_time, version() as pg_version',
131
- includeResultMetadata: true
132
- });
133
-
134
- const result = await this.rdsDataClient.send(command);
135
- const rows = mapRecords(result.records, result.columnMetadata);
136
- const row = rows?.[0];
137
-
138
- const pgStatus = {
139
- connected: true,
140
- timestamp: row?.current_time,
141
- version: row?.pg_version
142
- };
143
-
144
- let osStatus = null;
145
- if (this.osClient) {
146
- try {
147
- const indexName = this.getOpenSearchIndex();
148
- const testSearch = await this.osClient.search({
149
- index: indexName,
150
- body: {
151
- size: 1,
152
- query: {
153
- match: { chemical_name: 'benzene' }
154
- }
155
- }
156
- });
157
- osStatus = {
158
- connected: true,
159
- resultsFound: testSearch.body.hits.total.value || 0
160
- };
161
- } catch (osError) {
162
- osStatus = {
163
- connected: false,
164
- error: osError.message
165
- };
166
- }
167
- }
168
-
169
- return {
170
- postgres: pgStatus,
171
- opensearch: osStatus,
172
- environment: this.environment,
173
- region: this.region
174
- };
175
- } catch (error) {
176
- return {
177
- postgres: { connected: false, error: error.message },
178
- opensearch: null,
179
- environment: this.environment,
180
- region: this.region
181
- };
182
- }
183
- }
184
-
185
- async query(sql, params) {
186
- const start = Date.now();
187
- logInfo('pegasus-sdk', `[SQL] ${sql}${params ? ` -- params: ${JSON.stringify(params)}` : ''}`);
188
-
189
- const command = new ExecuteStatementCommand({
190
- resourceArn: this.clusterArn,
191
- secretArn: this.secretArn,
192
- database: this.databaseName,
193
- sql,
194
- parameters: params || [],
195
- includeResultMetadata: true
196
- });
197
-
198
- const result = await this.rdsDataClient.send(command);
199
- logInfo('pegasus-sdk', `[SQL] rowCount: ${result.numberOfRecordsUpdated || 0} duration: ${Date.now() - start}ms`);
200
-
201
- return {
202
- rowCount: result.numberOfRecordsUpdated || result.records?.length || 0,
203
- rows: mapRecords(result.records, result.columnMetadata)
204
- };
205
- }
206
- }
207
-
208
- module.exports = PegasusConnection;
1
+ const { RDSDataClient, ExecuteStatementCommand } = require('@aws-sdk/client-rds-data');
2
+ const { LambdaClient, InvokeCommand } = require('@aws-sdk/client-lambda');
3
+ const { mapRecords } = require('./db');
4
+ const { loadConfig } = require('../config');
5
+ const { logInfo, logError } = require('@toxplanet/tphelper/logging');
6
+
7
+ class PegasusConnection {
8
+ constructor(config = {}) {
9
+ const envConfig = loadConfig(config.environment);
10
+
11
+ this.config = { ...envConfig, ...config };
12
+ this.environment = this.config.environment;
13
+ this.region = this.config.region;
14
+ this.secretArn = this.config.secretArn;
15
+ this.clusterArn = this.config.clusterArn;
16
+ this.databaseName = this.config.database?.name;
17
+
18
+ this.rdsDataClient = null;
19
+ this.lambdaClient = null;
20
+ this.isConnected = false;
21
+ }
22
+
23
+ async connect() {
24
+ if (this.isConnected) {
25
+ return;
26
+ }
27
+
28
+ this.rdsDataClient = new RDSDataClient({ region: this.region });
29
+ this.lambdaClient = new LambdaClient({ region: this.region });
30
+
31
+ logInfo('pegasus-sdk', 'RDS Data API client initialized');
32
+
33
+ try {
34
+ const command = new ExecuteStatementCommand({
35
+ resourceArn: this.clusterArn,
36
+ secretArn: this.secretArn,
37
+ database: this.databaseName,
38
+ sql: 'SELECT 1'
39
+ });
40
+ await this.rdsDataClient.send(command);
41
+ logInfo('pegasus-sdk', 'RDS Data API connection verified and ready');
42
+ } catch (err) {
43
+ logError('pegasus-sdk', 'PegasusConnection', 'connect.verification', err);
44
+ throw err;
45
+ }
46
+
47
+ this.isConnected = true;
48
+ }
49
+
50
+ /**
51
+ * With the RDS Data API, there are no stale connections or pool idleness.
52
+ * Each call is stateless. Just ensure the client is initialized.
53
+ *
54
+ * @returns {Promise<boolean>} true if a connect happened, false if already connected.
55
+ */
56
+ async ensureConnected() {
57
+ if (!this.isConnected) {
58
+ await this.connect();
59
+ return true;
60
+ }
61
+ return false;
62
+ }
63
+
64
+ /**
65
+ * Data API is stateless, so reconnect simply means re-initializing.
66
+ * Called in fallback scenarios but not necessary for stale connections.
67
+ */
68
+ async reconnect() {
69
+ logInfo('pegasus-sdk', 'Reconnecting RDS Data API client');
70
+ this.isConnected = false;
71
+ await this.connect();
72
+ }
73
+
74
+ /**
75
+ * With stateless Data API, there is no meaningful "activity" tracking needed.
76
+ * This is a no-op for backward compatibility.
77
+ */
78
+ recordActivity() {
79
+ }
80
+
81
+ async disconnect() {
82
+ if (!this.isConnected) {
83
+ return;
84
+ }
85
+
86
+ this.rdsDataClient = null;
87
+ this.lambdaClient = null;
88
+ this.isConnected = false;
89
+ logInfo('pegasus-sdk', 'RDS Data API client disconnected');
90
+ }
91
+
92
+ async invokeOpenSearch(event) {
93
+ if (!this.lambdaClient) {
94
+ throw new Error('Lambda client not initialized. Call connect() first.');
95
+ }
96
+
97
+ const lambdaArn = process.env.PEGASUS_SDK_OPENSEARCH_LAMBDA_ARN
98
+ || this.config.openSearchLambdaArn
99
+ || null;
100
+
101
+ if (!lambdaArn) {
102
+ throw new Error('No OpenSearch Lambda ARN configured. Set PEGASUS_SDK_OPENSEARCH_LAMBDA_ARN or provide awsAccountId in config.');
103
+ }
104
+
105
+ try {
106
+ const response = await this.lambdaClient.send(new InvokeCommand({
107
+ FunctionName: lambdaArn,
108
+ Payload: JSON.stringify(event)
109
+ }));
110
+
111
+ const rawPayload = response.Payload;
112
+ const payload = (rawPayload instanceof Uint8Array || Buffer.isBuffer(rawPayload))
113
+ ? Buffer.from(rawPayload).toString('utf-8')
114
+ : rawPayload;
115
+ const parsed = JSON.parse(payload);
116
+ const body = JSON.parse(parsed.body);
117
+
118
+ if (!body.success) {
119
+ const error = new Error(body.error);
120
+ error.statusCode = body.statusCode;
121
+ error.responseBody = body.body;
122
+ throw error;
123
+ }
124
+
125
+ return body.result;
126
+ } catch (error) {
127
+ logError('pegasus-sdk', 'PegasusConnection', 'invokeOpenSearch', error);
128
+ throw error;
129
+ }
130
+ }
131
+
132
+ async testConnection() {
133
+ try {
134
+ if (!this.rdsDataClient) {
135
+ throw new Error('RDS Data API not initialized');
136
+ }
137
+
138
+ const command = new ExecuteStatementCommand({
139
+ resourceArn: this.clusterArn,
140
+ secretArn: this.secretArn,
141
+ database: this.databaseName,
142
+ sql: 'SELECT NOW() as current_time, version() as pg_version',
143
+ includeResultMetadata: true
144
+ });
145
+
146
+ const result = await this.rdsDataClient.send(command);
147
+ const rows = mapRecords(result.records, result.columnMetadata);
148
+ const row = rows?.[0];
149
+
150
+ const pgStatus = {
151
+ connected: true,
152
+ timestamp: row?.current_time,
153
+ version: row?.pg_version
154
+ };
155
+
156
+ let osStatus = null;
157
+ try {
158
+ const testSearch = await this.invokeOpenSearch({
159
+ operation: 'search',
160
+ body: {
161
+ size: 1,
162
+ query: {
163
+ match: { chemical_name: 'benzene' }
164
+ }
165
+ }
166
+ });
167
+ osStatus = {
168
+ connected: true,
169
+ resultsFound: testSearch.hits.total.value || 0
170
+ };
171
+ } catch (osError) {
172
+ osStatus = {
173
+ connected: false,
174
+ error: osError.message
175
+ };
176
+ }
177
+
178
+ return {
179
+ postgres: pgStatus,
180
+ opensearch: osStatus,
181
+ environment: this.environment,
182
+ region: this.region
183
+ };
184
+ } catch (error) {
185
+ return {
186
+ postgres: { connected: false, error: error.message },
187
+ opensearch: null,
188
+ environment: this.environment,
189
+ region: this.region
190
+ };
191
+ }
192
+ }
193
+
194
+ async query(sql, params) {
195
+ const start = Date.now();
196
+ logInfo('pegasus-sdk', `[SQL] ${sql}${params ? ` -- params: ${JSON.stringify(params)}` : ''}`);
197
+
198
+ const command = new ExecuteStatementCommand({
199
+ resourceArn: this.clusterArn,
200
+ secretArn: this.secretArn,
201
+ database: this.databaseName,
202
+ sql,
203
+ parameters: params || [],
204
+ includeResultMetadata: true
205
+ });
206
+
207
+ const result = await this.rdsDataClient.send(command);
208
+ logInfo('pegasus-sdk', `[SQL] rowCount: ${result.numberOfRecordsUpdated || 0} duration: ${Date.now() - start}ms`);
209
+
210
+ return {
211
+ rowCount: result.numberOfRecordsUpdated || result.records?.length || 0,
212
+ rows: mapRecords(result.records, result.columnMetadata)
213
+ };
214
+ }
215
+ }
216
+
217
+ module.exports = PegasusConnection;
package/lib/db/index.js CHANGED
@@ -1,26 +1,26 @@
1
- function getFieldValue(field) {
2
- if (!field || field.isNull) return null;
3
- if ('stringValue' in field) return field.stringValue;
4
- if ('longValue' in field) return field.longValue;
5
- if ('doubleValue' in field) return field.doubleValue;
6
- if ('booleanValue' in field) return field.booleanValue;
7
- return null;
8
- }
9
-
10
- function mapRecord(record, columnMetadata) {
11
- const obj = {};
12
- columnMetadata.forEach((col, i) => {
13
- obj[col.name] = getFieldValue(record[i]);
14
- });
15
- return obj;
16
- }
17
-
18
- function mapRecords(records = [], columnMetadata = []) {
19
- return records.map(r => mapRecord(r, columnMetadata));
20
- }
21
-
22
- module.exports = {
23
- getFieldValue,
24
- mapRecord,
25
- mapRecords
26
- };
1
+ function getFieldValue(field) {
2
+ if (!field || field.isNull) return null;
3
+ if ('stringValue' in field) return field.stringValue;
4
+ if ('longValue' in field) return field.longValue;
5
+ if ('doubleValue' in field) return field.doubleValue;
6
+ if ('booleanValue' in field) return field.booleanValue;
7
+ return null;
8
+ }
9
+
10
+ function mapRecord(record, columnMetadata) {
11
+ const obj = {};
12
+ columnMetadata.forEach((col, i) => {
13
+ obj[col.name] = getFieldValue(record[i]);
14
+ });
15
+ return obj;
16
+ }
17
+
18
+ function mapRecords(records = [], columnMetadata = []) {
19
+ return records.map(r => mapRecord(r, columnMetadata));
20
+ }
21
+
22
+ module.exports = {
23
+ getFieldValue,
24
+ mapRecord,
25
+ mapRecords
26
+ };
package/lib/search.js CHANGED
@@ -109,8 +109,6 @@ class SearchService {
109
109
  const synonymPrefix = options.synonymPrefix !== undefined ? options.synonymPrefix : 3;
110
110
 
111
111
  try {
112
- const opensearchClient = this.connection.getOpenSearchClient();
113
-
114
112
  // Get CAS number variations (if applicable)
115
113
  const queryVariations = getCasNumberVariations(query);
116
114
 
@@ -123,24 +121,28 @@ class SearchService {
123
121
  const shouldClauses = [];
124
122
 
125
123
  for (const queryVariation of queryVariations) {
126
- // Exact matches (configurable priority)
127
124
  shouldClauses.push(
125
+ // keyword / array-of-keyword fields — exact term match
128
126
  { term: { 'cas_numbers': { value: queryVariation, boost: casExact } } },
129
- { term: { 'chemical_name.keyword': { value: queryVariation, boost: nameExact, case_insensitive: true } } },
130
127
  { term: { 'identifier_values': { value: queryVariation, boost: identifierExact } } },
128
+ // keyword sub-fields — exact + prefix (present when index has .keyword multi-field)
129
+ { term: { 'chemical_name.keyword': { value: queryVariation, boost: nameExact, case_insensitive: true } } },
131
130
  { term: { 'synonyms.keyword': { value: queryVariation, boost: synonymExact, case_insensitive: true } } },
132
- // Prefix matches (configurable priority)
133
- { prefix: { 'cas_numbers': { value: queryVariation, boost: casPrefix } } },
134
131
  { prefix: { 'chemical_name.keyword': { value: queryVariation, boost: namePrefix, case_insensitive: true } } },
132
+ { prefix: { 'synonyms.keyword': { value: queryVariation, boost: synonymPrefix, case_insensitive: true } } },
133
+ // keyword field prefix matching for cas / identifiers
134
+ { prefix: { 'cas_numbers': { value: queryVariation, boost: casPrefix } } },
135
135
  { prefix: { 'identifier_values': { value: queryVariation, boost: identifierPrefix } } },
136
- { prefix: { 'synonyms.keyword': { value: queryVariation, boost: synonymPrefix, case_insensitive: true } } }
136
+ // match queries work on plain text fields even without .keyword sub-fields
137
+ { match: { 'chemical_name': { query: queryVariation, boost: nameExact } } },
138
+ { match: { 'synonyms': { query: queryVariation, boost: synonymExact } } },
139
+ { match_phrase_prefix: { 'chemical_name': { query: queryVariation, boost: namePrefix } } },
140
+ { match_phrase_prefix: { 'synonyms': { query: queryVariation, boost: synonymPrefix } } }
137
141
  );
138
142
  }
139
143
 
140
- const indexName = this.connection.getOpenSearchIndex();
141
-
142
- const response = await opensearchClient.search({
143
- index: indexName,
144
+ const result = await this.connection.invokeOpenSearch({
145
+ operation: 'search',
144
146
  body: {
145
147
  size: limit,
146
148
  query: {
@@ -153,7 +155,7 @@ class SearchService {
153
155
  }
154
156
  });
155
157
 
156
- const hits = response.body?.hits?.hits || [];
158
+ const hits = result?.hits?.hits || [];
157
159
  const results = hits.map((hit) => ({
158
160
  id: hit._source.postgres_id,
159
161
  name: hit._source.chemical_name,
package/package.json CHANGED
@@ -1,47 +1,47 @@
1
- {
2
- "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.1",
4
- "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
- "main": "index.js",
6
- "type": "commonjs",
7
- "scripts": {
8
- "test": "vitest run",
9
- "test:watch": "vitest",
10
- "test:ui": "vitest --ui"
11
- },
12
- "keywords": [
13
- "elasticsearch",
14
- "opensearch",
15
- "postgresql",
16
- "aws",
17
- "chemicals",
18
- "database",
19
- "search",
20
- "sdk",
21
- "pegasus",
22
- "migration"
23
- ],
24
- "author": "Chemical Research Development Team",
25
- "license": "MIT",
26
- "dependencies": {
27
- "@toxplanet/tphelper": "1.2.8",
28
- "@opensearch-project/opensearch": "^2.5.0",
29
- "@aws-sdk/client-rds-data": "^3.490.0",
30
- "@aws-sdk/client-sqs": "^3.490.0",
31
- "@aws-sdk/credential-providers": "^3.490.0"
32
- },
33
- "engines": {
34
- "node": ">=18.0.0"
35
- },
36
- "files": [
37
- "index.js",
38
- "lib/",
39
- "config/",
40
- "README.md",
41
- "ELASTICSEARCH_CLIENT.md",
42
- "LICENSE"
43
- ],
44
- "devDependencies": {
45
- "vitest": "^1.2.0"
46
- }
47
- }
1
+ {
2
+ "name": "@toxplanet/pegasus-sdk",
3
+ "version": "1.2.3",
4
+ "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
+ "main": "index.js",
6
+ "type": "commonjs",
7
+ "scripts": {
8
+ "test": "vitest run",
9
+ "test:watch": "vitest",
10
+ "test:ui": "vitest --ui",
11
+ "test:e2e": "vitest run --config vitest.e2e.config.js"
12
+ },
13
+ "keywords": [
14
+ "elasticsearch",
15
+ "opensearch",
16
+ "postgresql",
17
+ "aws",
18
+ "chemicals",
19
+ "database",
20
+ "search",
21
+ "sdk",
22
+ "pegasus",
23
+ "migration"
24
+ ],
25
+ "author": "Chemical Research Development Team",
26
+ "license": "MIT",
27
+ "dependencies": {
28
+ "@toxplanet/tphelper": "1.2.8",
29
+ "@aws-sdk/client-lambda": "^3.490.0",
30
+ "@aws-sdk/client-rds-data": "^3.490.0",
31
+ "@aws-sdk/client-sqs": "^3.490.0"
32
+ },
33
+ "engines": {
34
+ "node": ">=18.0.0"
35
+ },
36
+ "files": [
37
+ "index.js",
38
+ "lib/",
39
+ "config/",
40
+ "README.md",
41
+ "ELASTICSEARCH_CLIENT.md",
42
+ "LICENSE"
43
+ ],
44
+ "devDependencies": {
45
+ "vitest": "^1.2.0"
46
+ }
47
+ }