@toxplanet/pegasus-sdk 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,10 @@ module.exports = {
5
5
  sourceService: 'pegasus-sdk',
6
6
  secretArn: 'arn:aws:secretsmanager:us-east-1:605134466764:secret:rds!cluster-9b502dde-5e2a-49db-b2c5-9801141ee40b-gkHbLm',
7
7
  clusterArn: 'arn:aws:rds:us-east-1:605134466764:cluster:cr-chemicals-acc',
8
+ openSearchLambdaArn: 'arn:aws:lambda:us-east-1:605134466764:function:pegasus-os-bridge-acc',
8
9
  database: {
9
10
  name: 'chemicals'
10
11
  },
11
- openSearchEndpoint: 'https://1pbu0yqr197lq07hfcjh.us-east-1.aoss.amazonaws.com',
12
- openSearchIndex: 'chemicals',
13
12
  indexRoutes: {
14
13
  chemicals: ['chemicals*'],
15
14
  documents: ['documents*'],
@@ -5,11 +5,10 @@ module.exports = {
5
5
  sourceService: 'pegasus-sdk',
6
6
  secretArn: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
7
7
  clusterArn: 'arn:aws:rds:us-east-1:292931567094:cluster:cr-chemicals',
8
+ openSearchLambdaArn: 'arn:aws:lambda:us-east-1:292931567094:function:pegasus-os-bridge-dev',
8
9
  database: {
9
10
  name: 'chemicals'
10
11
  },
11
- openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
12
- openSearchIndex: 'chemicals',
13
12
  indexRoutes: {
14
13
  chemicals: ['chemicals*'],
15
14
  documents: ['documents*'],
@@ -4,12 +4,11 @@ module.exports = {
4
4
  awsAccountId: '147997144422',
5
5
  sourceService: 'pegasus-sdk',
6
6
  secretArn: 'arn:aws:secretsmanager:us-east-1:964963729446:secret:rds!cluster-bd301b0f-93b7-4dcb-a4fa-ebf753fd1c00-atsPOm',
7
- clusterArn: 'arn:aws:rds:us-east-1:147997144422:cluster:cr-chemicals-prod',
7
+ clusterArn: 'arn:aws:rds:us-east-1:964963729446:cluster:cr-chemicals-prod',
8
+ openSearchLambdaArn: 'arn:aws:lambda:us-east-1:964963729446:function:pegasus-os-bridge-prod',
8
9
  database: {
9
10
  name: 'chemicals'
10
11
  },
11
- openSearchEndpoint: 'https://aq6ftqi0hawm42795fci.us-east-1.aoss-fips.amazonaws.com',
12
- openSearchIndex: 'chemicals',
13
12
  indexRoutes: {
14
13
  chemicals: ['chemicals*'],
15
14
  documents: ['documents*'],
@@ -5,11 +5,10 @@ module.exports = {
5
5
  sourceService: 'pegasus-sdk',
6
6
  secretArn: 'arn:aws:secretsmanager:us-east-1:147997144422:secret:rds!cluster-25483b3f-3758-43ed-9548-26c91de16c2d-oYjysU',
7
7
  clusterArn: 'arn:aws:rds:us-east-1:147997144422:cluster:cr-chemicals-qa',
8
+ openSearchLambdaArn: 'arn:aws:lambda:us-east-1:147997144422:function:pegasus-os-bridge-qa',
8
9
  database: {
9
10
  name: 'chemicals'
10
11
  },
11
- openSearchEndpoint: 'https://odusb11s00j5hyy5r6.us-east-1.aoss.amazonaws.com',
12
- openSearchIndex: 'chemicals',
13
12
  indexRoutes: {
14
13
  chemicals: ['chemicals*'],
15
14
  documents: ['documents*'],
package/config/index.js CHANGED
@@ -17,12 +17,8 @@ function loadConfig(envOverride = null) {
17
17
  config.secretName = process.env.PEGASUS_SDK_DB_SECRET_ARN;
18
18
  }
19
19
 
20
- if (process.env.PEGASUS_SDK_OPENSEARCH_ENDPOINT) {
21
- config.openSearchEndpoint = process.env.PEGASUS_SDK_OPENSEARCH_ENDPOINT;
22
- }
23
-
24
- if (process.env.PEGASUS_SDK_OPENSEARCH_INDEX) {
25
- config.openSearchIndex = process.env.PEGASUS_SDK_OPENSEARCH_INDEX;
20
+ if (process.env.PEGASUS_SDK_OPENSEARCH_LAMBDA_ARN) {
21
+ config.openSearchLambdaArn = process.env.PEGASUS_SDK_OPENSEARCH_LAMBDA_ARN;
26
22
  }
27
23
 
28
24
  if (process.env.PEGASUS_SDK_DATABASE_HOST) {
package/lib/chemicals.js CHANGED
@@ -738,11 +738,8 @@ class ChemicalsService {
738
738
  }
739
739
 
740
740
  try {
741
- const opensearchClient = this.connection.getOpenSearchClient();
742
- const indexName = this.connection.getOpenSearchIndex();
743
-
744
- const response = await opensearchClient.search({
745
- index: indexName,
741
+ const result = await this.connection.invokeOpenSearch({
742
+ operation: 'search',
746
743
  body: {
747
744
  size: limit,
748
745
  query: {
@@ -751,7 +748,11 @@ class ChemicalsService {
751
748
  { term: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
752
749
  { prefix: { 'chemical_name.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
753
750
  { term: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
754
- { prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
751
+ { prefix: { 'synonyms.keyword': { value: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
752
+ { match: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
753
+ { match: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
754
+ { match_phrase_prefix: { 'chemical_name': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
755
+ { match_phrase_prefix: { 'synonyms': { query: searchTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
755
756
  ],
756
757
  minimum_should_match: 1
757
758
  }
@@ -760,7 +761,7 @@ class ChemicalsService {
760
761
  }
761
762
  });
762
763
 
763
- const hits = response.body?.hits?.hits || [];
764
+ const hits = result?.hits?.hits || [];
764
765
  const results = hits.map((hit) => ({
765
766
  id: hit._source.postgres_id,
766
767
  name: hit._source.chemical_name,
@@ -789,11 +790,8 @@ class ChemicalsService {
789
790
  }
790
791
 
791
792
  try {
792
- const opensearchClient = this.connection.getOpenSearchClient();
793
- const indexName = this.connection.getOpenSearchIndex();
794
-
795
- const response = await opensearchClient.search({
796
- index: indexName,
793
+ const result = await this.connection.invokeOpenSearch({
794
+ operation: 'search',
797
795
  body: {
798
796
  size: limit,
799
797
  query: {
@@ -802,7 +800,11 @@ class ChemicalsService {
802
800
  { term: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY, case_insensitive: true } } },
803
801
  { prefix: { 'synonyms.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY, case_insensitive: true } } },
804
802
  { term: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY, case_insensitive: true } } },
805
- { prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } }
803
+ { prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY, case_insensitive: true } } },
804
+ { match: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_PRIMARY } } },
805
+ { match: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_EXACT_SECONDARY } } },
806
+ { match_phrase_prefix: { 'synonyms': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_PRIMARY } } },
807
+ { match_phrase_prefix: { 'chemical_name': { query: synonymTerm, boost: SEARCH_BOOST_PREFIX_SECONDARY } } }
806
808
  ],
807
809
  minimum_should_match: 1
808
810
  }
@@ -811,7 +813,7 @@ class ChemicalsService {
811
813
  }
812
814
  });
813
815
 
814
- const hits = response.body?.hits?.hits || [];
816
+ const hits = result?.hits?.hits || [];
815
817
  const results = hits.map((hit) => ({
816
818
  id: hit._source.postgres_id,
817
819
  name: hit._source.chemical_name,
package/lib/connection.js CHANGED
@@ -1,7 +1,5 @@
1
- const { Client: OpenSearchClient } = require('@opensearch-project/opensearch');
2
1
  const { RDSDataClient, ExecuteStatementCommand } = require('@aws-sdk/client-rds-data');
3
- const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws');
4
- const { fromNodeProviderChain } = require('@aws-sdk/credential-providers');
2
+ const { LambdaClient, InvokeCommand } = require('@aws-sdk/client-lambda');
5
3
  const { mapRecords } = require('./db');
6
4
  const { loadConfig } = require('../config');
7
5
  const { logInfo, logError } = require('@toxplanet/tphelper/logging');
@@ -15,12 +13,10 @@ class PegasusConnection {
15
13
  this.region = this.config.region;
16
14
  this.secretArn = this.config.secretArn;
17
15
  this.clusterArn = this.config.clusterArn;
18
- this.openSearchEndpoint = this.config.openSearchEndpoint;
19
- this.openSearchIndex = this.config.openSearchIndex;
20
16
  this.databaseName = this.config.database?.name;
21
17
 
22
18
  this.rdsDataClient = null;
23
- this.osClient = null;
19
+ this.lambdaClient = null;
24
20
  this.isConnected = false;
25
21
  }
26
22
 
@@ -30,6 +26,7 @@ class PegasusConnection {
30
26
  }
31
27
 
32
28
  this.rdsDataClient = new RDSDataClient({ region: this.region });
29
+ this.lambdaClient = new LambdaClient({ region: this.region });
33
30
 
34
31
  logInfo('pegasus-sdk', 'RDS Data API client initialized');
35
32
 
@@ -47,20 +44,6 @@ class PegasusConnection {
47
44
  throw err;
48
45
  }
49
46
 
50
- if (this.openSearchEndpoint) {
51
- this.osClient = new OpenSearchClient({
52
- ...AwsSigv4Signer({
53
- region: this.region,
54
- service: 'aoss',
55
- getCredentials: () => {
56
- const credentialsProvider = fromNodeProviderChain();
57
- return credentialsProvider();
58
- }
59
- }),
60
- node: this.openSearchEndpoint
61
- });
62
- }
63
-
64
47
  this.isConnected = true;
65
48
  }
66
49
 
@@ -101,20 +84,49 @@ class PegasusConnection {
101
84
  }
102
85
 
103
86
  this.rdsDataClient = null;
104
- this.osClient = null;
87
+ this.lambdaClient = null;
105
88
  this.isConnected = false;
106
89
  logInfo('pegasus-sdk', 'RDS Data API client disconnected');
107
90
  }
108
91
 
109
- getOpenSearchClient() {
110
- if (!this.osClient) {
111
- throw new Error('OpenSearch connection not established. Call connect() first or provide openSearchEndpoint.');
92
+ async invokeOpenSearch(event) {
93
+ if (!this.lambdaClient) {
94
+ throw new Error('Lambda client not initialized. Call connect() first.');
112
95
  }
113
- return this.osClient;
114
- }
115
96
 
116
- getOpenSearchIndex() {
117
- return this.openSearchIndex || 'chemicals';
97
+ const lambdaArn = process.env.PEGASUS_SDK_OPENSEARCH_LAMBDA_ARN
98
+ || this.config.openSearchLambdaArn
99
+ || null;
100
+
101
+ if (!lambdaArn) {
102
+ throw new Error('No OpenSearch Lambda ARN configured. Set PEGASUS_SDK_OPENSEARCH_LAMBDA_ARN or provide awsAccountId in config.');
103
+ }
104
+
105
+ try {
106
+ const response = await this.lambdaClient.send(new InvokeCommand({
107
+ FunctionName: lambdaArn,
108
+ Payload: JSON.stringify(event)
109
+ }));
110
+
111
+ const rawPayload = response.Payload;
112
+ const payload = (rawPayload instanceof Uint8Array || Buffer.isBuffer(rawPayload))
113
+ ? Buffer.from(rawPayload).toString('utf-8')
114
+ : rawPayload;
115
+ const parsed = JSON.parse(payload);
116
+ const body = JSON.parse(parsed.body);
117
+
118
+ if (!body.success) {
119
+ const error = new Error(body.error);
120
+ error.statusCode = body.statusCode;
121
+ error.responseBody = body.body;
122
+ throw error;
123
+ }
124
+
125
+ return body.result;
126
+ } catch (error) {
127
+ logError('pegasus-sdk', 'PegasusConnection', 'invokeOpenSearch', error);
128
+ throw error;
129
+ }
118
130
  }
119
131
 
120
132
  async testConnection() {
@@ -142,28 +154,25 @@ class PegasusConnection {
142
154
  };
143
155
 
144
156
  let osStatus = null;
145
- if (this.osClient) {
146
- try {
147
- const indexName = this.getOpenSearchIndex();
148
- const testSearch = await this.osClient.search({
149
- index: indexName,
150
- body: {
151
- size: 1,
152
- query: {
153
- match: { chemical_name: 'benzene' }
154
- }
157
+ try {
158
+ const testSearch = await this.invokeOpenSearch({
159
+ operation: 'search',
160
+ body: {
161
+ size: 1,
162
+ query: {
163
+ match: { chemical_name: 'benzene' }
155
164
  }
156
- });
157
- osStatus = {
158
- connected: true,
159
- resultsFound: testSearch.body.hits.total.value || 0
160
- };
161
- } catch (osError) {
162
- osStatus = {
163
- connected: false,
164
- error: osError.message
165
- };
166
- }
165
+ }
166
+ });
167
+ osStatus = {
168
+ connected: true,
169
+ resultsFound: testSearch.hits.total.value || 0
170
+ };
171
+ } catch (osError) {
172
+ osStatus = {
173
+ connected: false,
174
+ error: osError.message
175
+ };
167
176
  }
168
177
 
169
178
  return {
package/lib/search.js CHANGED
@@ -109,8 +109,6 @@ class SearchService {
109
109
  const synonymPrefix = options.synonymPrefix !== undefined ? options.synonymPrefix : 3;
110
110
 
111
111
  try {
112
- const opensearchClient = this.connection.getOpenSearchClient();
113
-
114
112
  // Get CAS number variations (if applicable)
115
113
  const queryVariations = getCasNumberVariations(query);
116
114
 
@@ -123,24 +121,28 @@ class SearchService {
123
121
  const shouldClauses = [];
124
122
 
125
123
  for (const queryVariation of queryVariations) {
126
- // Exact matches (configurable priority)
127
124
  shouldClauses.push(
125
+ // keyword / array-of-keyword fields — exact term match
128
126
  { term: { 'cas_numbers': { value: queryVariation, boost: casExact } } },
129
- { term: { 'chemical_name.keyword': { value: queryVariation, boost: nameExact, case_insensitive: true } } },
130
127
  { term: { 'identifier_values': { value: queryVariation, boost: identifierExact } } },
128
+ // keyword sub-fields — exact + prefix (present when index has .keyword multi-field)
129
+ { term: { 'chemical_name.keyword': { value: queryVariation, boost: nameExact, case_insensitive: true } } },
131
130
  { term: { 'synonyms.keyword': { value: queryVariation, boost: synonymExact, case_insensitive: true } } },
132
- // Prefix matches (configurable priority)
133
- { prefix: { 'cas_numbers': { value: queryVariation, boost: casPrefix } } },
134
131
  { prefix: { 'chemical_name.keyword': { value: queryVariation, boost: namePrefix, case_insensitive: true } } },
132
+ { prefix: { 'synonyms.keyword': { value: queryVariation, boost: synonymPrefix, case_insensitive: true } } },
133
+ // keyword field prefix matching for cas / identifiers
134
+ { prefix: { 'cas_numbers': { value: queryVariation, boost: casPrefix } } },
135
135
  { prefix: { 'identifier_values': { value: queryVariation, boost: identifierPrefix } } },
136
- { prefix: { 'synonyms.keyword': { value: queryVariation, boost: synonymPrefix, case_insensitive: true } } }
136
+ // match queries work on plain text fields even without .keyword sub-fields
137
+ { match: { 'chemical_name': { query: queryVariation, boost: nameExact } } },
138
+ { match: { 'synonyms': { query: queryVariation, boost: synonymExact } } },
139
+ { match_phrase_prefix: { 'chemical_name': { query: queryVariation, boost: namePrefix } } },
140
+ { match_phrase_prefix: { 'synonyms': { query: queryVariation, boost: synonymPrefix } } }
137
141
  );
138
142
  }
139
143
 
140
- const indexName = this.connection.getOpenSearchIndex();
141
-
142
- const response = await opensearchClient.search({
143
- index: indexName,
144
+ const result = await this.connection.invokeOpenSearch({
145
+ operation: 'search',
144
146
  body: {
145
147
  size: limit,
146
148
  query: {
@@ -153,7 +155,7 @@ class SearchService {
153
155
  }
154
156
  });
155
157
 
156
- const hits = response.body?.hits?.hits || [];
158
+ const hits = result?.hits?.hits || [];
157
159
  const results = hits.map((hit) => ({
158
160
  id: hit._source.postgres_id,
159
161
  name: hit._source.chemical_name,
package/package.json CHANGED
@@ -1,13 +1,14 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.2.0",
3
+ "version": "1.2.2",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",
7
7
  "scripts": {
8
8
  "test": "vitest run",
9
9
  "test:watch": "vitest",
10
- "test:ui": "vitest --ui"
10
+ "test:ui": "vitest --ui",
11
+ "test:e2e": "vitest run --config vitest.e2e.config.js"
11
12
  },
12
13
  "keywords": [
13
14
  "elasticsearch",
@@ -25,10 +26,9 @@
25
26
  "license": "MIT",
26
27
  "dependencies": {
27
28
  "@toxplanet/tphelper": "1.2.8",
28
- "@opensearch-project/opensearch": "^2.5.0",
29
+ "@aws-sdk/client-lambda": "^3.490.0",
29
30
  "@aws-sdk/client-rds-data": "^3.490.0",
30
- "@aws-sdk/client-sqs": "^3.490.0",
31
- "@aws-sdk/credential-providers": "^3.490.0"
31
+ "@aws-sdk/client-sqs": "^3.490.0"
32
32
  },
33
33
  "engines": {
34
34
  "node": ">=18.0.0"