@toxplanet/pegasus-sdk 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/environment.dev.js +22 -0
- package/config/environment.prod.js +22 -0
- package/config/environment.qa.js +22 -0
- package/config/index.js +67 -0
- package/env.example +21 -3
- package/index.d.ts +51 -14
- package/lib/chemicals.js +179 -7
- package/lib/connection.js +43 -26
- package/lib/db/index.js +11 -0
- package/lib/db/schema.js +28 -0
- package/lib/search.js +282 -8
- package/package.json +6 -4
- package/tests/chemicals.js +165 -0
- package/tests/search.js +138 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module.exports = {
|
|
2
|
+
environment: 'development',
|
|
3
|
+
region: 'us-east-1',
|
|
4
|
+
secretName: 'arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI',
|
|
5
|
+
openSearchEndpoint: 'https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com',
|
|
6
|
+
openSearchIndex: 'chemicals',
|
|
7
|
+
database: {
|
|
8
|
+
host: 'cr-chemicals.cluster-cz0iqdg8irhb.us-east-1.rds.amazonaws.com',
|
|
9
|
+
name: 'chemicals'
|
|
10
|
+
},
|
|
11
|
+
postgres: {
|
|
12
|
+
maxConnections: 2,
|
|
13
|
+
minConnections: 0,
|
|
14
|
+
idleTimeoutMillis: 30000,
|
|
15
|
+
connectionTimeoutMillis: 5000,
|
|
16
|
+
statementTimeout: 30000,
|
|
17
|
+
queryTimeout: 30000,
|
|
18
|
+
ssl: {
|
|
19
|
+
rejectUnauthorized: false
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module.exports = {
|
|
2
|
+
environment: 'production',
|
|
3
|
+
region: 'us-east-1',
|
|
4
|
+
secretName: 'pegasus/production/database',
|
|
5
|
+
openSearchEndpoint: null,
|
|
6
|
+
openSearchIndex: 'chemicals',
|
|
7
|
+
database: {
|
|
8
|
+
host: null,
|
|
9
|
+
name: 'chemicals'
|
|
10
|
+
},
|
|
11
|
+
postgres: {
|
|
12
|
+
maxConnections: 10,
|
|
13
|
+
minConnections: 2,
|
|
14
|
+
idleTimeoutMillis: 120000,
|
|
15
|
+
connectionTimeoutMillis: 15000,
|
|
16
|
+
statementTimeout: 120000,
|
|
17
|
+
queryTimeout: 120000,
|
|
18
|
+
ssl: {
|
|
19
|
+
rejectUnauthorized: true
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module.exports = {
|
|
2
|
+
environment: 'qa',
|
|
3
|
+
region: 'us-east-1',
|
|
4
|
+
secretName: 'pegasus/qa/database',
|
|
5
|
+
openSearchEndpoint: null,
|
|
6
|
+
openSearchIndex: 'chemicals',
|
|
7
|
+
database: {
|
|
8
|
+
host: null,
|
|
9
|
+
name: 'chemicals'
|
|
10
|
+
},
|
|
11
|
+
postgres: {
|
|
12
|
+
maxConnections: 5,
|
|
13
|
+
minConnections: 1,
|
|
14
|
+
idleTimeoutMillis: 60000,
|
|
15
|
+
connectionTimeoutMillis: 10000,
|
|
16
|
+
statementTimeout: 60000,
|
|
17
|
+
queryTimeout: 60000,
|
|
18
|
+
ssl: {
|
|
19
|
+
rejectUnauthorized: true
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
};
|
package/config/index.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
const { logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
|
+
|
|
3
|
+
function loadConfig(envOverride = null) {
|
|
4
|
+
const env = envOverride || process.env.NODE_ENV || 'development';
|
|
5
|
+
|
|
6
|
+
let envConfig;
|
|
7
|
+
try {
|
|
8
|
+
envConfig = require(`./environment.${env}.js`);
|
|
9
|
+
} catch (error) {
|
|
10
|
+
logInfo('pegasus-sdk', `Warning: Could not load config for environment "${env}", falling back to development`);
|
|
11
|
+
envConfig = require('./environment.dev.js');
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const config = { ...envConfig };
|
|
15
|
+
|
|
16
|
+
if (process.env.PEGASUS_SDK_DB_SECRET_ARN) {
|
|
17
|
+
config.secretName = process.env.PEGASUS_SDK_DB_SECRET_ARN;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
if (process.env.PEGASUS_SDK_OPENSEARCH_ENDPOINT) {
|
|
21
|
+
config.openSearchEndpoint = process.env.PEGASUS_SDK_OPENSEARCH_ENDPOINT;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (process.env.PEGASUS_SDK_OPENSEARCH_INDEX) {
|
|
25
|
+
config.openSearchIndex = process.env.PEGASUS_SDK_OPENSEARCH_INDEX;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (process.env.PEGASUS_SDK_DATABASE_HOST) {
|
|
29
|
+
config.database.host = process.env.PEGASUS_SDK_DATABASE_HOST;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (process.env.PEGASUS_SDK_DATABASE_NAME) {
|
|
33
|
+
config.database.name = process.env.PEGASUS_SDK_DATABASE_NAME;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (process.env.PEGASUS_SDK_AWS_REGION) {
|
|
37
|
+
config.region = process.env.PEGASUS_SDK_AWS_REGION;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (process.env.PEGASUS_SDK_MAX_CONNECTIONS) {
|
|
41
|
+
config.postgres.maxConnections = parseInt(process.env.PEGASUS_SDK_MAX_CONNECTIONS, 10);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (process.env.PEGASUS_SDK_MIN_CONNECTIONS) {
|
|
45
|
+
config.postgres.minConnections = parseInt(process.env.PEGASUS_SDK_MIN_CONNECTIONS, 10);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (process.env.PEGASUS_SDK_IDLE_TIMEOUT) {
|
|
49
|
+
config.postgres.idleTimeoutMillis = parseInt(process.env.PEGASUS_SDK_IDLE_TIMEOUT, 10);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (process.env.PEGASUS_SDK_CONNECTION_TIMEOUT) {
|
|
53
|
+
config.postgres.connectionTimeoutMillis = parseInt(process.env.PEGASUS_SDK_CONNECTION_TIMEOUT, 10);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (process.env.PEGASUS_SDK_STATEMENT_TIMEOUT) {
|
|
57
|
+
config.postgres.statementTimeout = parseInt(process.env.PEGASUS_SDK_STATEMENT_TIMEOUT, 10);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (process.env.PEGASUS_SDK_QUERY_TIMEOUT) {
|
|
61
|
+
config.postgres.queryTimeout = parseInt(process.env.PEGASUS_SDK_QUERY_TIMEOUT, 10);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return config;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
module.exports = { loadConfig };
|
package/env.example
CHANGED
|
@@ -1,3 +1,21 @@
|
|
|
1
|
-
NODE_ENV=development
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
NODE_ENV=development
|
|
2
|
+
|
|
3
|
+
# Pegasus SDK Configuration Overrides
|
|
4
|
+
PEGASUS_SDK_DB_SECRET_ARN=arn:aws:secretsmanager:us-east-1:292931567094:secret:rds!cluster-b851c3ce-58cc-41cd-aeae-05cc7f5e031a-ZYSjiI
|
|
5
|
+
PEGASUS_SDK_OPENSEARCH_ENDPOINT=https://war8lk73nzswquk8dcz1.us-east-1.aoss.amazonaws.com
|
|
6
|
+
PEGASUS_SDK_AWS_REGION=us-east-1
|
|
7
|
+
|
|
8
|
+
# Database Configuration
|
|
9
|
+
PEGASUS_SDK_DATABASE_HOST=cr-chemicals.cluster-cz0iqdg8irhb.us-east-1.rds.amazonaws.com
|
|
10
|
+
PEGASUS_SDK_DATABASE_NAME=chemicals
|
|
11
|
+
|
|
12
|
+
# OpenSearch Configuration
|
|
13
|
+
PEGASUS_SDK_OPENSEARCH_INDEX=chemicals
|
|
14
|
+
|
|
15
|
+
# PostgreSQL Connection Pool Settings (optional)
|
|
16
|
+
PEGASUS_SDK_MAX_CONNECTIONS=2
|
|
17
|
+
PEGASUS_SDK_MIN_CONNECTIONS=0
|
|
18
|
+
PEGASUS_SDK_IDLE_TIMEOUT=30000
|
|
19
|
+
PEGASUS_SDK_CONNECTION_TIMEOUT=5000
|
|
20
|
+
PEGASUS_SDK_STATEMENT_TIMEOUT=30000
|
|
21
|
+
PEGASUS_SDK_QUERY_TIMEOUT=30000
|
package/index.d.ts
CHANGED
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
export interface
|
|
2
|
-
environment?: string;
|
|
3
|
-
region?: string;
|
|
4
|
-
secretName?: string;
|
|
5
|
-
openSearchEndpoint?: string;
|
|
1
|
+
export interface PostgresConfig {
|
|
6
2
|
maxConnections?: number;
|
|
7
3
|
minConnections?: number;
|
|
8
4
|
idleTimeoutMillis?: number;
|
|
@@ -11,6 +7,21 @@ export interface PegasusConfig {
|
|
|
11
7
|
queryTimeout?: number;
|
|
12
8
|
}
|
|
13
9
|
|
|
10
|
+
export interface DatabaseConfig {
|
|
11
|
+
host?: string;
|
|
12
|
+
name?: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface PegasusConfig {
|
|
16
|
+
environment?: string;
|
|
17
|
+
region?: string;
|
|
18
|
+
secretName?: string;
|
|
19
|
+
openSearchEndpoint?: string;
|
|
20
|
+
openSearchIndex?: string;
|
|
21
|
+
database?: DatabaseConfig;
|
|
22
|
+
postgres?: PostgresConfig;
|
|
23
|
+
}
|
|
24
|
+
|
|
14
25
|
export interface Chemical {
|
|
15
26
|
chemical_id?: string;
|
|
16
27
|
source_id: string;
|
|
@@ -72,6 +83,7 @@ export declare class PegasusConnection {
|
|
|
72
83
|
disconnect(): Promise<void>;
|
|
73
84
|
getPostgresClient(): any;
|
|
74
85
|
getOpenSearchClient(): any;
|
|
86
|
+
getOpenSearchIndex(): string;
|
|
75
87
|
testConnection(): Promise<ConnectionStatus>;
|
|
76
88
|
query(sql: string, params?: any[]): Promise<any>;
|
|
77
89
|
getClient(): Promise<any>;
|
|
@@ -102,8 +114,8 @@ export declare class ChemicalsService {
|
|
|
102
114
|
getSynonymCount(synonymTerm: string): Promise<number>;
|
|
103
115
|
convertIdentifier(fromIdentifier: string, toIdentifierType: string): Promise<any>;
|
|
104
116
|
convertIdentifiersBatch(fromIdentifiers: string[], toIdentifierType: string): Promise<any[]>;
|
|
105
|
-
searchByName(searchTerm: string, limit
|
|
106
|
-
searchBySynonym(synonymTerm: string, limit
|
|
117
|
+
searchByName(searchTerm: string, limit?: number): Promise<SearchResults>;
|
|
118
|
+
searchBySynonym(synonymTerm: string, limit?: number): Promise<SearchResults>;
|
|
107
119
|
findChemicalsWithoutDocuments(collectionName: string, searchTerm: string, pageSize: number): Promise<Chemical[]>;
|
|
108
120
|
countChemicalsWithoutDocuments(collectionName: string): Promise<number>;
|
|
109
121
|
}
|
|
@@ -133,15 +145,40 @@ export declare class DocumentsService {
|
|
|
133
145
|
processDocument(documentPath: string, documentData: any): Promise<Document>;
|
|
134
146
|
}
|
|
135
147
|
|
|
148
|
+
export interface SearchOptions {
|
|
149
|
+
limit?: number;
|
|
150
|
+
casExact?: number;
|
|
151
|
+
casPrefix?: number;
|
|
152
|
+
nameExact?: number;
|
|
153
|
+
namePrefix?: number;
|
|
154
|
+
identifierExact?: number;
|
|
155
|
+
identifierPrefix?: number;
|
|
156
|
+
synonymExact?: number;
|
|
157
|
+
synonymPrefix?: number;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export interface ChemicalSearchResult {
|
|
161
|
+
id: number;
|
|
162
|
+
name: string;
|
|
163
|
+
cas: string[];
|
|
164
|
+
identifiers: string[];
|
|
165
|
+
synonyms: string[];
|
|
166
|
+
score: number;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export interface SearchResults {
|
|
170
|
+
results: ChemicalSearchResult[];
|
|
171
|
+
}
|
|
172
|
+
|
|
136
173
|
export declare class SearchService {
|
|
137
174
|
constructor(connection: PegasusConnection);
|
|
138
|
-
searchChemicals(
|
|
139
|
-
searchStartsWith(searchTerm: string, limit
|
|
140
|
-
searchContains(searchTerm: string, limit
|
|
141
|
-
searchExact(searchTerm: string, limit
|
|
142
|
-
searchByCAS(casNumber: string, searchType
|
|
143
|
-
searchByIdentifier(identifierValue: string, searchType
|
|
144
|
-
searchBySynonym(synonymTerm: string, searchType
|
|
175
|
+
searchChemicals(query: string, options?: SearchOptions): Promise<SearchResults>;
|
|
176
|
+
searchStartsWith(searchTerm: string, limit?: number): Promise<SearchResults>;
|
|
177
|
+
searchContains(searchTerm: string, limit?: number): Promise<SearchResults>;
|
|
178
|
+
searchExact(searchTerm: string, limit?: number): Promise<SearchResults>;
|
|
179
|
+
searchByCAS(casNumber: string, searchType?: string): Promise<SearchResults>;
|
|
180
|
+
searchByIdentifier(identifierValue: string, searchType?: string): Promise<SearchResults>;
|
|
181
|
+
searchBySynonym(synonymTerm: string, searchType?: string): Promise<SearchResults>;
|
|
145
182
|
advancedSearch(queryBuilder: any): Promise<any[]>;
|
|
146
183
|
searchWithFilters(searchTerm: string, filters: any, limit: number): Promise<any[]>;
|
|
147
184
|
searchByCollection(collectionName: string, searchTerm: string, limit: number): Promise<any[]>;
|
package/lib/chemicals.js
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
|
+
const { logError } = require('@toxplanet/tphelper/logging');
|
|
2
|
+
const { getDrizzle, schema } = require('./db');
|
|
3
|
+
const { eq } = require('drizzle-orm');
|
|
4
|
+
|
|
1
5
|
class ChemicalsService {
|
|
2
|
-
constructor(connection) {
|
|
6
|
+
constructor(connection) {
|
|
7
|
+
this.connection = connection;
|
|
8
|
+
this.db = null;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
getDb() {
|
|
12
|
+
if (!this.db) {
|
|
13
|
+
this.db = getDrizzle(this.connection.pgPool);
|
|
14
|
+
}
|
|
15
|
+
return this.db;
|
|
16
|
+
}
|
|
3
17
|
|
|
4
18
|
async bulkIndexFielded(documents) {}
|
|
5
19
|
|
|
@@ -7,11 +21,50 @@ class ChemicalsService {
|
|
|
7
21
|
|
|
8
22
|
async bulkIndexSubstances(substances) {}
|
|
9
23
|
|
|
10
|
-
async createChemical(chemical) {
|
|
24
|
+
async createChemical(chemical) {
|
|
25
|
+
try {
|
|
26
|
+
const db = this.getDb();
|
|
27
|
+
|
|
28
|
+
const [result] = await db
|
|
29
|
+
.insert(schema.chemicals)
|
|
30
|
+
.values({
|
|
31
|
+
sourceId: chemical.source_id,
|
|
32
|
+
chemicalName: chemical.chemical_name,
|
|
33
|
+
chemicalMeta: chemical.chemical_meta,
|
|
34
|
+
chemicalIdentifiers: chemical.chemical_identifiers,
|
|
35
|
+
chemicalSynonyms: chemical.chemical_synonyms,
|
|
36
|
+
chemicalCategories: chemical.chemical_categories,
|
|
37
|
+
createdAt: chemical.created_at || new Date(),
|
|
38
|
+
updatedAt: chemical.updated_at || new Date(),
|
|
39
|
+
...(chemical.imported_at && { importedAt: chemical.imported_at }),
|
|
40
|
+
...(chemical.chemical_id && { chemicalId: chemical.chemical_id })
|
|
41
|
+
})
|
|
42
|
+
.returning();
|
|
43
|
+
|
|
44
|
+
return result;
|
|
45
|
+
} catch (error) {
|
|
46
|
+
logError('pegasus-sdk', 'ChemicalsService', 'createChemical', error);
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
11
50
|
|
|
12
51
|
async updateChemical(chemicalId, updates) {}
|
|
13
52
|
|
|
14
|
-
async deleteChemical(chemicalId) {
|
|
53
|
+
async deleteChemical(chemicalId) {
|
|
54
|
+
try {
|
|
55
|
+
const db = this.getDb();
|
|
56
|
+
|
|
57
|
+
const [deleted] = await db
|
|
58
|
+
.delete(schema.chemicals)
|
|
59
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
60
|
+
.returning();
|
|
61
|
+
|
|
62
|
+
return deleted || null;
|
|
63
|
+
} catch (error) {
|
|
64
|
+
logError('pegasus-sdk', 'ChemicalsService', 'deleteChemical', error);
|
|
65
|
+
throw error;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
15
68
|
|
|
16
69
|
async deleteBySourceId(sourceId) {}
|
|
17
70
|
|
|
@@ -21,7 +74,22 @@ class ChemicalsService {
|
|
|
21
74
|
|
|
22
75
|
async bulkUpdateProperty(filter, propertyPath, newValue) {}
|
|
23
76
|
|
|
24
|
-
async getChemicalById(chemicalId) {
|
|
77
|
+
async getChemicalById(chemicalId) {
|
|
78
|
+
try {
|
|
79
|
+
const db = this.getDb();
|
|
80
|
+
|
|
81
|
+
const [result] = await db
|
|
82
|
+
.select()
|
|
83
|
+
.from(schema.chemicals)
|
|
84
|
+
.where(eq(schema.chemicals.chemicalId, chemicalId))
|
|
85
|
+
.limit(1);
|
|
86
|
+
|
|
87
|
+
return result || null;
|
|
88
|
+
} catch (error) {
|
|
89
|
+
logError('pegasus-sdk', 'ChemicalsService', 'getChemicalById', error);
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
25
93
|
|
|
26
94
|
async getChemicalBySourceId(sourceId) {}
|
|
27
95
|
|
|
@@ -45,9 +113,113 @@ class ChemicalsService {
|
|
|
45
113
|
|
|
46
114
|
async convertIdentifiersBatch(fromIdentifiers, toIdentifierType) {}
|
|
47
115
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
116
|
+
/**
|
|
117
|
+
* Search for chemicals by name using OpenSearch
|
|
118
|
+
* @param {string} searchTerm - Name to search for
|
|
119
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
120
|
+
* @returns {Promise<Object>} Search results
|
|
121
|
+
*/
|
|
122
|
+
async searchByName(searchTerm, limit = 10) {
|
|
123
|
+
if (!searchTerm) {
|
|
124
|
+
return { results: [] };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
const opensearchClient = this.connection.getOpenSearchClient();
|
|
129
|
+
const indexName = this.connection.getOpenSearchIndex();
|
|
130
|
+
|
|
131
|
+
const response = await opensearchClient.search({
|
|
132
|
+
index: indexName,
|
|
133
|
+
body: {
|
|
134
|
+
size: limit,
|
|
135
|
+
query: {
|
|
136
|
+
bool: {
|
|
137
|
+
should: [
|
|
138
|
+
// Prioritize exact name matches
|
|
139
|
+
{ term: { 'chemical_name.keyword': { value: searchTerm, boost: 100, case_insensitive: true } } },
|
|
140
|
+
// Then prefix matches
|
|
141
|
+
{ prefix: { 'chemical_name.keyword': { value: searchTerm, boost: 50, case_insensitive: true } } },
|
|
142
|
+
// Include synonym matches as secondary
|
|
143
|
+
{ term: { 'synonyms.keyword': { value: searchTerm, boost: 30, case_insensitive: true } } },
|
|
144
|
+
{ prefix: { 'synonyms.keyword': { value: searchTerm, boost: 10, case_insensitive: true } } }
|
|
145
|
+
],
|
|
146
|
+
minimum_should_match: 1
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
const hits = response.body?.hits?.hits || [];
|
|
154
|
+
const results = hits.map((hit) => ({
|
|
155
|
+
id: hit._source.postgres_id,
|
|
156
|
+
name: hit._source.chemical_name,
|
|
157
|
+
cas: hit._source.cas_numbers || [],
|
|
158
|
+
identifiers: hit._source.identifier_values || [],
|
|
159
|
+
synonyms: hit._source.synonyms || [],
|
|
160
|
+
score: hit._score
|
|
161
|
+
}));
|
|
162
|
+
|
|
163
|
+
return { results };
|
|
164
|
+
} catch (error) {
|
|
165
|
+
logError('pegasus-sdk', 'ChemicalsService', 'searchByName', error);
|
|
166
|
+
throw error;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Search for chemicals by synonym using OpenSearch
|
|
172
|
+
* @param {string} synonymTerm - Synonym to search for
|
|
173
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
174
|
+
* @returns {Promise<Object>} Search results
|
|
175
|
+
*/
|
|
176
|
+
async searchBySynonym(synonymTerm, limit = 10) {
|
|
177
|
+
if (!synonymTerm) {
|
|
178
|
+
return { results: [] };
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
try {
|
|
182
|
+
const opensearchClient = this.connection.getOpenSearchClient();
|
|
183
|
+
const indexName = this.connection.getOpenSearchIndex();
|
|
184
|
+
|
|
185
|
+
const response = await opensearchClient.search({
|
|
186
|
+
index: indexName,
|
|
187
|
+
body: {
|
|
188
|
+
size: limit,
|
|
189
|
+
query: {
|
|
190
|
+
bool: {
|
|
191
|
+
should: [
|
|
192
|
+
// Prioritize exact synonym matches
|
|
193
|
+
{ term: { 'synonyms.keyword': { value: synonymTerm, boost: 100, case_insensitive: true } } },
|
|
194
|
+
// Then prefix matches
|
|
195
|
+
{ prefix: { 'synonyms.keyword': { value: synonymTerm, boost: 50, case_insensitive: true } } },
|
|
196
|
+
// Include name matches as secondary
|
|
197
|
+
{ term: { 'chemical_name.keyword': { value: synonymTerm, boost: 30, case_insensitive: true } } },
|
|
198
|
+
{ prefix: { 'chemical_name.keyword': { value: synonymTerm, boost: 10, case_insensitive: true } } }
|
|
199
|
+
],
|
|
200
|
+
minimum_should_match: 1
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
const hits = response.body?.hits?.hits || [];
|
|
208
|
+
const results = hits.map((hit) => ({
|
|
209
|
+
id: hit._source.postgres_id,
|
|
210
|
+
name: hit._source.chemical_name,
|
|
211
|
+
cas: hit._source.cas_numbers || [],
|
|
212
|
+
identifiers: hit._source.identifier_values || [],
|
|
213
|
+
synonyms: hit._source.synonyms || [],
|
|
214
|
+
score: hit._score
|
|
215
|
+
}));
|
|
216
|
+
|
|
217
|
+
return { results };
|
|
218
|
+
} catch (error) {
|
|
219
|
+
logError('pegasus-sdk', 'ChemicalsService', 'searchBySynonym', error);
|
|
220
|
+
throw error;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
51
223
|
|
|
52
224
|
async findChemicalsWithoutDocuments(collectionName, searchTerm, pageSize) {}
|
|
53
225
|
|
package/lib/connection.js
CHANGED
|
@@ -2,15 +2,22 @@ const { Pool } = require('pg');
|
|
|
2
2
|
const { Client } = require('@opensearch-project/opensearch');
|
|
3
3
|
const { SecretsManagerClient, GetSecretValueCommand } = require('@aws-sdk/client-secrets-manager');
|
|
4
4
|
const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws');
|
|
5
|
-
const {
|
|
5
|
+
const { fromNodeProviderChain } = require('@aws-sdk/credential-providers');
|
|
6
|
+
const { loadConfig } = require('../config');
|
|
7
|
+
const { logInfo, logError } = require('@toxplanet/tphelper/logging');
|
|
6
8
|
|
|
7
9
|
class PegasusConnection {
|
|
8
10
|
constructor(config = {}) {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
this.
|
|
12
|
-
this.
|
|
13
|
-
this.
|
|
11
|
+
const envConfig = loadConfig(config.environment);
|
|
12
|
+
|
|
13
|
+
this.config = { ...envConfig, ...config };
|
|
14
|
+
this.environment = this.config.environment;
|
|
15
|
+
this.region = this.config.region;
|
|
16
|
+
this.secretName = this.config.secretName;
|
|
17
|
+
this.openSearchEndpoint = this.config.openSearchEndpoint;
|
|
18
|
+
this.openSearchIndex = this.config.openSearchIndex;
|
|
19
|
+
this.databaseHost = this.config.database?.host;
|
|
20
|
+
this.databaseName = this.config.database?.name;
|
|
14
21
|
|
|
15
22
|
this.pgPool = null;
|
|
16
23
|
this.osClient = null;
|
|
@@ -47,35 +54,33 @@ class PegasusConnection {
|
|
|
47
54
|
const secret = await this.getSecret();
|
|
48
55
|
|
|
49
56
|
const poolConfig = {
|
|
50
|
-
host:
|
|
51
|
-
port:
|
|
52
|
-
database:
|
|
53
|
-
user: secret.username
|
|
57
|
+
host: this.databaseHost,
|
|
58
|
+
port: 5432,
|
|
59
|
+
database: this.databaseName,
|
|
60
|
+
user: secret.username,
|
|
54
61
|
password: secret.password,
|
|
55
|
-
max: this.config.maxConnections
|
|
56
|
-
min: this.config.minConnections
|
|
57
|
-
idleTimeoutMillis: this.config.idleTimeoutMillis
|
|
58
|
-
connectionTimeoutMillis: this.config.connectionTimeoutMillis
|
|
62
|
+
max: this.config.postgres.maxConnections,
|
|
63
|
+
min: this.config.postgres.minConnections,
|
|
64
|
+
idleTimeoutMillis: this.config.postgres.idleTimeoutMillis,
|
|
65
|
+
connectionTimeoutMillis: this.config.postgres.connectionTimeoutMillis,
|
|
59
66
|
allowExitOnIdle: true,
|
|
60
|
-
ssl:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
statement_timeout: this.config.statementTimeout || 30000,
|
|
64
|
-
query_timeout: this.config.queryTimeout || 30000
|
|
67
|
+
ssl: this.config.postgres.ssl,
|
|
68
|
+
statement_timeout: this.config.postgres.statementTimeout,
|
|
69
|
+
query_timeout: this.config.postgres.queryTimeout
|
|
65
70
|
};
|
|
66
71
|
|
|
67
72
|
this.pgPool = new Pool(poolConfig);
|
|
68
73
|
|
|
69
74
|
this.pgPool.on('error', (err) => {
|
|
70
|
-
|
|
75
|
+
logError('pegasus-sdk', 'PegasusConnection', 'pgPool.error', err);
|
|
71
76
|
});
|
|
72
77
|
|
|
73
78
|
this.pgPool.on('connect', () => {
|
|
74
|
-
|
|
79
|
+
logInfo('pegasus-sdk', 'PostgreSQL client connected');
|
|
75
80
|
});
|
|
76
81
|
|
|
77
82
|
this.pgPool.on('remove', () => {
|
|
78
|
-
|
|
83
|
+
logInfo('pegasus-sdk', 'PostgreSQL client removed from pool');
|
|
79
84
|
});
|
|
80
85
|
|
|
81
86
|
if (this.openSearchEndpoint) {
|
|
@@ -84,7 +89,7 @@ class PegasusConnection {
|
|
|
84
89
|
region: this.region,
|
|
85
90
|
service: 'aoss',
|
|
86
91
|
getCredentials: () => {
|
|
87
|
-
const credentialsProvider =
|
|
92
|
+
const credentialsProvider = fromNodeProviderChain();
|
|
88
93
|
return credentialsProvider();
|
|
89
94
|
}
|
|
90
95
|
}),
|
|
@@ -124,6 +129,10 @@ class PegasusConnection {
|
|
|
124
129
|
return this.osClient;
|
|
125
130
|
}
|
|
126
131
|
|
|
132
|
+
getOpenSearchIndex() {
|
|
133
|
+
return this.openSearchIndex || 'chemicals';
|
|
134
|
+
}
|
|
135
|
+
|
|
127
136
|
async testConnection() {
|
|
128
137
|
try {
|
|
129
138
|
if (this.pgPool) {
|
|
@@ -143,11 +152,19 @@ class PegasusConnection {
|
|
|
143
152
|
let osStatus = null;
|
|
144
153
|
if (this.osClient) {
|
|
145
154
|
try {
|
|
146
|
-
const
|
|
155
|
+
const indexName = this.getOpenSearchIndex();
|
|
156
|
+
const testSearch = await this.osClient.search({
|
|
157
|
+
index: indexName,
|
|
158
|
+
body: {
|
|
159
|
+
size: 1,
|
|
160
|
+
query: {
|
|
161
|
+
match: { chemical_name: 'benzene' }
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
});
|
|
147
165
|
osStatus = {
|
|
148
166
|
connected: true,
|
|
149
|
-
|
|
150
|
-
cluster: osInfo.body.cluster_name
|
|
167
|
+
resultsFound: testSearch.body.hits.total.value || 0
|
|
151
168
|
};
|
|
152
169
|
} catch (osError) {
|
|
153
170
|
osStatus = {
|
package/lib/db/index.js
ADDED
package/lib/db/schema.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
const { pgTable, uuid, text, jsonb, timestamp, index, uniqueIndex } = require('drizzle-orm/pg-core');
|
|
2
|
+
const { sql } = require('drizzle-orm');
|
|
3
|
+
|
|
4
|
+
const chemicals = pgTable('chemicals', {
|
|
5
|
+
chemicalId: uuid('chemical_id').defaultRandom().primaryKey(),
|
|
6
|
+
sourceId: text('source_id').notNull().unique(),
|
|
7
|
+
chemicalName: text('chemical_name').notNull(),
|
|
8
|
+
chemicalMeta: jsonb('chemical_meta'),
|
|
9
|
+
chemicalIdentifiers: jsonb('chemical_identifiers'),
|
|
10
|
+
chemicalSynonyms: text('chemical_synonyms').array(),
|
|
11
|
+
chemicalCategories: text('chemical_categories').array(),
|
|
12
|
+
createdAt: timestamp('created_at', { withTimezone: true }).notNull(),
|
|
13
|
+
updatedAt: timestamp('updated_at', { withTimezone: true }).notNull(),
|
|
14
|
+
importedAt: timestamp('imported_at', { withTimezone: true }).defaultNow()
|
|
15
|
+
}, (table) => {
|
|
16
|
+
return {
|
|
17
|
+
sourceIdIdx: uniqueIndex('idx_chemicals_source_id').on(table.sourceId),
|
|
18
|
+
nameIdx: index('idx_chemicals_name').on(table.chemicalName),
|
|
19
|
+
createdAtIdx: index('idx_chemicals_created_at').on(table.createdAt),
|
|
20
|
+
updatedAtIdx: index('idx_chemicals_updated_at').on(table.updatedAt),
|
|
21
|
+
identifiersGinIdx: index('idx_chemicals_identifiers_gin').on(table.chemicalIdentifiers),
|
|
22
|
+
synonymsGinIdx: index('idx_chemicals_synonyms_gin').on(table.chemicalSynonyms)
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
module.exports = {
|
|
27
|
+
chemicals
|
|
28
|
+
};
|
package/lib/search.js
CHANGED
|
@@ -1,19 +1,293 @@
|
|
|
1
|
+
const { logInfo, logError } = require('@toxplanet/tphelper/logging');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Detect if query looks like a CAS number (numbers with - or / separators)
|
|
5
|
+
* Returns array of alternative formats to try
|
|
6
|
+
*
|
|
7
|
+
* CAS format: XXXXXXX-XX-X (registry-sequence-check)
|
|
8
|
+
* - First part: 2-7 digits (registry number)
|
|
9
|
+
* - Second part: ALWAYS 2 digits (zero-padded)
|
|
10
|
+
* - Third part: ALWAYS 1 digit (check digit)
|
|
11
|
+
*/
|
|
12
|
+
function getCasNumberVariations(query) {
|
|
13
|
+
// Check if it's all numbers and separators (-, /)
|
|
14
|
+
if (!/^[\d\-\/]+$/.test(query)) {
|
|
15
|
+
return [query]; // Not a CAS-like format, return as-is
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Check if it has at least one separator
|
|
19
|
+
if (!query.includes('-') && !query.includes('/')) {
|
|
20
|
+
return [query]; // No separator, return as-is
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Generate variations
|
|
24
|
+
const variations = new Set();
|
|
25
|
+
|
|
26
|
+
// Add original
|
|
27
|
+
variations.add(query);
|
|
28
|
+
|
|
29
|
+
// Split by any separator
|
|
30
|
+
const parts = query.split(/[-\/]/);
|
|
31
|
+
|
|
32
|
+
if (parts.length === 3) {
|
|
33
|
+
// Three parts detected
|
|
34
|
+
// Could be:
|
|
35
|
+
// 1. Standard: registry-sequence-check (e.g., "7440-06-4")
|
|
36
|
+
// 2. Reversed: sequence/check/registry (e.g., "6/4/7440")
|
|
37
|
+
|
|
38
|
+
const [part1, part2, part3] = parts;
|
|
39
|
+
|
|
40
|
+
// Check if it looks like reversed format
|
|
41
|
+
// (small first part, small second part, large third part)
|
|
42
|
+
const isLikelyReversed = part1.length <= 2 && part2.length <= 2 && part3.length >= 3;
|
|
43
|
+
|
|
44
|
+
if (isLikelyReversed) {
|
|
45
|
+
// Format: sequence/check/registry → registry-sequence-check
|
|
46
|
+
const registry = part3;
|
|
47
|
+
const sequence = part1.padStart(2, '0'); // Zero-pad to 2 digits
|
|
48
|
+
const check = part2;
|
|
49
|
+
|
|
50
|
+
// Add properly formatted CAS
|
|
51
|
+
variations.add(`${registry}-${sequence}-${check}`);
|
|
52
|
+
variations.add(`${registry}/${sequence}/${check}`);
|
|
53
|
+
} else {
|
|
54
|
+
// Looks like standard format, try both separators
|
|
55
|
+
const registry = part1;
|
|
56
|
+
const sequence = part2.padStart(2, '0'); // Ensure 2 digits
|
|
57
|
+
const check = part3;
|
|
58
|
+
|
|
59
|
+
variations.add(`${registry}-${sequence}-${check}`);
|
|
60
|
+
variations.add(`${registry}/${sequence}/${check}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Also try with different separator on original parts
|
|
64
|
+
variations.add(parts.join('-'));
|
|
65
|
+
variations.add(parts.join('/'));
|
|
66
|
+
} else if (parts.length === 2) {
|
|
67
|
+
// Two parts - just try both separators
|
|
68
|
+
variations.add(parts.join('-'));
|
|
69
|
+
variations.add(parts.join('/'));
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return Array.from(variations);
|
|
73
|
+
}
|
|
74
|
+
|
|
1
75
|
class SearchService {
|
|
2
|
-
constructor(connection) {
|
|
76
|
+
constructor(connection) {
|
|
77
|
+
this.connection = connection;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Search chemicals using OpenSearch with configurable boost parameters
|
|
82
|
+
* @param {string} query - Search query string
|
|
83
|
+
* @param {Object} options - Search options
|
|
84
|
+
* @param {number} options.limit - Maximum number of results (default: 10)
|
|
85
|
+
* @param {number} options.casExact - Boost for exact CAS matches (default: 50)
|
|
86
|
+
* @param {number} options.casPrefix - Boost for CAS prefix matches (default: 10)
|
|
87
|
+
* @param {number} options.nameExact - Boost for exact name matches (default: 40)
|
|
88
|
+
* @param {number} options.namePrefix - Boost for name prefix matches (default: 8)
|
|
89
|
+
* @param {number} options.identifierExact - Boost for exact identifier matches (default: 30)
|
|
90
|
+
* @param {number} options.identifierPrefix - Boost for identifier prefix matches (default: 5)
|
|
91
|
+
* @param {number} options.synonymExact - Boost for exact synonym matches (default: 100)
|
|
92
|
+
* @param {number} options.synonymPrefix - Boost for synonym prefix matches (default: 3)
|
|
93
|
+
* @returns {Promise<Object>} Search results with chemicals array
|
|
94
|
+
*/
|
|
95
|
+
async searchChemicals(query, options = {}) {
|
|
96
|
+
if (!query) {
|
|
97
|
+
return { results: [] };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Extract options with defaults
|
|
101
|
+
const limit = options.limit || 10;
|
|
102
|
+
const casExact = options.casExact !== undefined ? options.casExact : 50;
|
|
103
|
+
const casPrefix = options.casPrefix !== undefined ? options.casPrefix : 10;
|
|
104
|
+
const nameExact = options.nameExact !== undefined ? options.nameExact : 40;
|
|
105
|
+
const namePrefix = options.namePrefix !== undefined ? options.namePrefix : 8;
|
|
106
|
+
const identifierExact = options.identifierExact !== undefined ? options.identifierExact : 30;
|
|
107
|
+
const identifierPrefix = options.identifierPrefix !== undefined ? options.identifierPrefix : 5;
|
|
108
|
+
const synonymExact = options.synonymExact !== undefined ? options.synonymExact : 100;
|
|
109
|
+
const synonymPrefix = options.synonymPrefix !== undefined ? options.synonymPrefix : 3;
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
const opensearchClient = this.connection.getOpenSearchClient();
|
|
113
|
+
|
|
114
|
+
// Get CAS number variations (if applicable)
|
|
115
|
+
const queryVariations = getCasNumberVariations(query);
|
|
116
|
+
|
|
117
|
+
// Log if we're trying multiple variations
|
|
118
|
+
if (queryVariations.length > 1) {
|
|
119
|
+
logInfo('pegasus-sdk', `CAS format detection: trying ${queryVariations.length} variations for "${query}": ${JSON.stringify(queryVariations)}`);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Build should clauses for all query variations
|
|
123
|
+
const shouldClauses = [];
|
|
124
|
+
|
|
125
|
+
for (const queryVariation of queryVariations) {
|
|
126
|
+
// Exact matches (configurable priority)
|
|
127
|
+
shouldClauses.push(
|
|
128
|
+
{ term: { 'cas_numbers': { value: queryVariation, boost: casExact } } },
|
|
129
|
+
{ term: { 'chemical_name.keyword': { value: queryVariation, boost: nameExact, case_insensitive: true } } },
|
|
130
|
+
{ term: { 'identifier_values': { value: queryVariation, boost: identifierExact } } },
|
|
131
|
+
{ term: { 'synonyms.keyword': { value: queryVariation, boost: synonymExact, case_insensitive: true } } },
|
|
132
|
+
// Prefix matches (configurable priority)
|
|
133
|
+
{ prefix: { 'cas_numbers': { value: queryVariation, boost: casPrefix } } },
|
|
134
|
+
{ prefix: { 'chemical_name.keyword': { value: queryVariation, boost: namePrefix, case_insensitive: true } } },
|
|
135
|
+
{ prefix: { 'identifier_values': { value: queryVariation, boost: identifierPrefix } } },
|
|
136
|
+
{ prefix: { 'synonyms.keyword': { value: queryVariation, boost: synonymPrefix, case_insensitive: true } } }
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const indexName = this.connection.getOpenSearchIndex();
|
|
141
|
+
|
|
142
|
+
const response = await opensearchClient.search({
|
|
143
|
+
index: indexName,
|
|
144
|
+
body: {
|
|
145
|
+
size: limit,
|
|
146
|
+
query: {
|
|
147
|
+
bool: {
|
|
148
|
+
should: shouldClauses,
|
|
149
|
+
minimum_should_match: 1
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
_source: ['postgres_id', 'chemical_name', 'cas_numbers', 'identifier_values', 'synonyms']
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
const hits = response.body?.hits?.hits || [];
|
|
157
|
+
const results = hits.map((hit) => ({
|
|
158
|
+
id: hit._source.postgres_id,
|
|
159
|
+
name: hit._source.chemical_name,
|
|
160
|
+
cas: hit._source.cas_numbers || [],
|
|
161
|
+
identifiers: hit._source.identifier_values || [],
|
|
162
|
+
synonyms: hit._source.synonyms || [],
|
|
163
|
+
score: hit._score
|
|
164
|
+
}));
|
|
3
165
|
|
|
4
|
-
|
|
166
|
+
return { results };
|
|
167
|
+
} catch (error) {
|
|
168
|
+
logError('pegasus-sdk', 'SearchService', 'searchChemicals', error);
|
|
169
|
+
throw error;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
5
172
|
|
|
6
|
-
|
|
173
|
+
/**
|
|
174
|
+
* Search for chemicals with prefix matching priority
|
|
175
|
+
* @param {string} searchTerm - Search term
|
|
176
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
177
|
+
* @returns {Promise<Object>} Search results
|
|
178
|
+
*/
|
|
179
|
+
async searchStartsWith(searchTerm, limit = 10) {
|
|
180
|
+
return this.searchChemicals(searchTerm, {
|
|
181
|
+
limit,
|
|
182
|
+
// Prioritize prefix matches over exact matches
|
|
183
|
+
casPrefix: 50,
|
|
184
|
+
casExact: 20,
|
|
185
|
+
namePrefix: 40,
|
|
186
|
+
nameExact: 15,
|
|
187
|
+
identifierPrefix: 30,
|
|
188
|
+
identifierExact: 10,
|
|
189
|
+
synonymPrefix: 35,
|
|
190
|
+
synonymExact: 10
|
|
191
|
+
});
|
|
192
|
+
}
|
|
7
193
|
|
|
8
|
-
|
|
194
|
+
/**
|
|
195
|
+
* Search for chemicals (alias for general search)
|
|
196
|
+
* @param {string} searchTerm - Search term
|
|
197
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
198
|
+
* @returns {Promise<Object>} Search results
|
|
199
|
+
*/
|
|
200
|
+
async searchContains(searchTerm, limit = 10) {
|
|
201
|
+
// Use default balanced weights for contains search
|
|
202
|
+
return this.searchChemicals(searchTerm, { limit });
|
|
203
|
+
}
|
|
9
204
|
|
|
10
|
-
|
|
205
|
+
/**
|
|
206
|
+
* Search for chemicals with exact matching priority
|
|
207
|
+
* @param {string} searchTerm - Search term
|
|
208
|
+
* @param {number} limit - Maximum number of results (default: 10)
|
|
209
|
+
* @returns {Promise<Object>} Search results
|
|
210
|
+
*/
|
|
211
|
+
async searchExact(searchTerm, limit = 10) {
|
|
212
|
+
return this.searchChemicals(searchTerm, {
|
|
213
|
+
limit,
|
|
214
|
+
// Prioritize exact matches, minimize prefix matches
|
|
215
|
+
casExact: 100,
|
|
216
|
+
casPrefix: 1,
|
|
217
|
+
nameExact: 80,
|
|
218
|
+
namePrefix: 1,
|
|
219
|
+
identifierExact: 60,
|
|
220
|
+
identifierPrefix: 1,
|
|
221
|
+
synonymExact: 150,
|
|
222
|
+
synonymPrefix: 1
|
|
223
|
+
});
|
|
224
|
+
}
|
|
11
225
|
|
|
12
|
-
|
|
226
|
+
/**
|
|
227
|
+
* Search for chemicals by CAS number
|
|
228
|
+
* @param {string} casNumber - CAS number to search for
|
|
229
|
+
* @param {string} searchType - Search type: 'exact' or 'prefix' (default: 'exact')
|
|
230
|
+
* @returns {Promise<Object>} Search results
|
|
231
|
+
*/
|
|
232
|
+
async searchByCAS(casNumber, searchType = 'exact') {
|
|
233
|
+
const isExact = searchType === 'exact';
|
|
234
|
+
return this.searchChemicals(casNumber, {
|
|
235
|
+
limit: 10,
|
|
236
|
+
// Heavily prioritize CAS field
|
|
237
|
+
casExact: isExact ? 200 : 50,
|
|
238
|
+
casPrefix: isExact ? 10 : 100,
|
|
239
|
+
nameExact: 5,
|
|
240
|
+
namePrefix: 1,
|
|
241
|
+
identifierExact: 5,
|
|
242
|
+
identifierPrefix: 1,
|
|
243
|
+
synonymExact: 5,
|
|
244
|
+
synonymPrefix: 1
|
|
245
|
+
});
|
|
246
|
+
}
|
|
13
247
|
|
|
14
|
-
|
|
248
|
+
/**
|
|
249
|
+
* Search for chemicals by identifier value
|
|
250
|
+
* @param {string} identifierValue - Identifier value to search for
|
|
251
|
+
* @param {string} searchType - Search type: 'exact' or 'prefix' (default: 'exact')
|
|
252
|
+
* @returns {Promise<Object>} Search results
|
|
253
|
+
*/
|
|
254
|
+
async searchByIdentifier(identifierValue, searchType = 'exact') {
|
|
255
|
+
const isExact = searchType === 'exact';
|
|
256
|
+
return this.searchChemicals(identifierValue, {
|
|
257
|
+
limit: 10,
|
|
258
|
+
// Heavily prioritize identifier field
|
|
259
|
+
identifierExact: isExact ? 200 : 50,
|
|
260
|
+
identifierPrefix: isExact ? 10 : 100,
|
|
261
|
+
casExact: 10,
|
|
262
|
+
casPrefix: 5,
|
|
263
|
+
nameExact: 5,
|
|
264
|
+
namePrefix: 1,
|
|
265
|
+
synonymExact: 5,
|
|
266
|
+
synonymPrefix: 1
|
|
267
|
+
});
|
|
268
|
+
}
|
|
15
269
|
|
|
16
|
-
|
|
270
|
+
/**
|
|
271
|
+
* Search for chemicals by synonym
|
|
272
|
+
* @param {string} synonymTerm - Synonym term to search for
|
|
273
|
+
* @param {string} searchType - Search type: 'exact' or 'prefix' (default: 'exact')
|
|
274
|
+
* @returns {Promise<Object>} Search results
|
|
275
|
+
*/
|
|
276
|
+
async searchBySynonym(synonymTerm, searchType = 'exact') {
|
|
277
|
+
const isExact = searchType === 'exact';
|
|
278
|
+
return this.searchChemicals(synonymTerm, {
|
|
279
|
+
limit: 10,
|
|
280
|
+
// Heavily prioritize synonym field
|
|
281
|
+
synonymExact: isExact ? 200 : 50,
|
|
282
|
+
synonymPrefix: isExact ? 10 : 100,
|
|
283
|
+
nameExact: 20,
|
|
284
|
+
namePrefix: 5,
|
|
285
|
+
casExact: 10,
|
|
286
|
+
casPrefix: 5,
|
|
287
|
+
identifierExact: 5,
|
|
288
|
+
identifierPrefix: 1
|
|
289
|
+
});
|
|
290
|
+
}
|
|
17
291
|
|
|
18
292
|
async advancedSearch(queryBuilder) {}
|
|
19
293
|
|
package/package.json
CHANGED
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@toxplanet/pegasus-sdk",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
7
|
+
"test:search": "node tests/search.js",
|
|
8
|
+
"test:chemicals": "node tests/chemicals.js"
|
|
7
9
|
},
|
|
8
10
|
"keywords": [
|
|
9
11
|
],
|
|
10
12
|
"author": "Chemical Research Development Team",
|
|
11
13
|
"license": "MIT",
|
|
12
14
|
"dependencies": {
|
|
15
|
+
"@toxplanet/tphelper": "1.2.8",
|
|
13
16
|
"pg": "^8.11.3",
|
|
17
|
+
"drizzle-orm": "^0.30.0",
|
|
14
18
|
"@opensearch-project/opensearch": "^2.5.0",
|
|
15
|
-
"@aws-sdk/client-opensearch-serverless": "^3.490.0",
|
|
16
19
|
"@aws-sdk/client-secrets-manager": "^3.490.0",
|
|
17
|
-
"@aws-sdk/credential-providers": "^3.490.0"
|
|
18
|
-
"aws4fetch": "^1.0.18"
|
|
20
|
+
"@aws-sdk/credential-providers": "^3.490.0"
|
|
19
21
|
},
|
|
20
22
|
"engines": {
|
|
21
23
|
"node": ">=18.0.0"
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
const PegasusSDK = require('../index');
|
|
2
|
+
const { logInfo, logError } = require('@toxplanet/tphelper/logging');
|
|
3
|
+
|
|
4
|
+
const ICONS = {
|
|
5
|
+
PASS: '[OK]',
|
|
6
|
+
FAIL: '[!!]',
|
|
7
|
+
WARN: '[?]'
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
async function runChemicalTests() {
|
|
11
|
+
const sdk = new PegasusSDK();
|
|
12
|
+
let testsPassed = 0;
|
|
13
|
+
let testsFailed = 0;
|
|
14
|
+
|
|
15
|
+
const log = (test, status, details = '') => {
|
|
16
|
+
logInfo('pegasus-sdk-tests', `${test}: ${status} ${details}`);
|
|
17
|
+
if (status === ICONS.PASS) testsPassed++;
|
|
18
|
+
else if (status === ICONS.FAIL) testsFailed++;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const print = (message) => {
|
|
22
|
+
logInfo('pegasus-sdk-tests', message);
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
let createdChemicalId = null;
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
await sdk.connect();
|
|
29
|
+
log('Connection', ICONS.PASS);
|
|
30
|
+
|
|
31
|
+
const testChemical = {
|
|
32
|
+
source_id: `test-chemical-${Date.now()}`,
|
|
33
|
+
chemical_name: 'Test Chemical (Benzene)',
|
|
34
|
+
chemical_meta: {
|
|
35
|
+
test: true,
|
|
36
|
+
description: 'This is a test chemical record'
|
|
37
|
+
},
|
|
38
|
+
chemical_identifiers: [
|
|
39
|
+
{ type: 'cas', value: '71-43-2' },
|
|
40
|
+
{ type: 'pubchem_cid', value: 'CID241' }
|
|
41
|
+
],
|
|
42
|
+
chemical_synonyms: ['Test Synonym 1', 'Test Synonym 2'],
|
|
43
|
+
chemical_categories: ['test', 'aromatic'],
|
|
44
|
+
created_at: new Date(),
|
|
45
|
+
updated_at: new Date()
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
print('\n' + '='.repeat(60));
|
|
49
|
+
print('TEST 1: Create Chemical');
|
|
50
|
+
print('='.repeat(60));
|
|
51
|
+
|
|
52
|
+
const created = await sdk.chemicals.createChemical(testChemical);
|
|
53
|
+
createdChemicalId = created.chemicalId;
|
|
54
|
+
|
|
55
|
+
if (created && created.chemicalId) {
|
|
56
|
+
log('Create Chemical', ICONS.PASS, `(ID: ${created.chemicalId.substring(0, 8)}...)`);
|
|
57
|
+
print(` Source ID: ${created.sourceId}`);
|
|
58
|
+
print(` Name: ${created.chemicalName}`);
|
|
59
|
+
print(` Identifiers: ${created.chemicalIdentifiers.length} found`);
|
|
60
|
+
print(` Synonyms: ${created.chemicalSynonyms.length} found`);
|
|
61
|
+
} else {
|
|
62
|
+
log('Create Chemical', ICONS.FAIL, 'No ID returned');
|
|
63
|
+
testsFailed++;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
print('\n' + '='.repeat(60));
|
|
67
|
+
print('TEST 2: Get Chemical By ID');
|
|
68
|
+
print('='.repeat(60));
|
|
69
|
+
|
|
70
|
+
const retrieved = await sdk.chemicals.getChemicalById(createdChemicalId);
|
|
71
|
+
|
|
72
|
+
if (retrieved && retrieved.chemicalId === createdChemicalId) {
|
|
73
|
+
log('Get Chemical By ID', ICONS.PASS);
|
|
74
|
+
print(` Retrieved Name: ${retrieved.chemicalName}`);
|
|
75
|
+
print(` Source ID: ${retrieved.sourceId}`);
|
|
76
|
+
|
|
77
|
+
if (retrieved.chemicalName === testChemical.chemical_name) {
|
|
78
|
+
log('Name Match', ICONS.PASS);
|
|
79
|
+
} else {
|
|
80
|
+
log('Name Match', ICONS.FAIL, `Expected: ${testChemical.chemical_name}, Got: ${retrieved.chemicalName}`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (retrieved.sourceId === testChemical.source_id) {
|
|
84
|
+
log('Source ID Match', ICONS.PASS);
|
|
85
|
+
} else {
|
|
86
|
+
log('Source ID Match', ICONS.FAIL);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (Array.isArray(retrieved.chemicalIdentifiers) && retrieved.chemicalIdentifiers.length === 2) {
|
|
90
|
+
log('Identifiers Match', ICONS.PASS, `(${retrieved.chemicalIdentifiers.length} identifiers)`);
|
|
91
|
+
} else {
|
|
92
|
+
log('Identifiers Match', ICONS.FAIL);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (Array.isArray(retrieved.chemicalSynonyms) && retrieved.chemicalSynonyms.length === 2) {
|
|
96
|
+
log('Synonyms Match', ICONS.PASS, `(${retrieved.chemicalSynonyms.length} synonyms)`);
|
|
97
|
+
} else {
|
|
98
|
+
log('Synonyms Match', ICONS.FAIL);
|
|
99
|
+
}
|
|
100
|
+
} else {
|
|
101
|
+
log('Get Chemical By ID', ICONS.FAIL, 'Chemical not found or ID mismatch');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
print('\n' + '='.repeat(60));
|
|
105
|
+
print('TEST 3: Get Non-Existent Chemical');
|
|
106
|
+
print('='.repeat(60));
|
|
107
|
+
|
|
108
|
+
const nonExistent = await sdk.chemicals.getChemicalById('00000000-0000-0000-0000-000000000000');
|
|
109
|
+
|
|
110
|
+
if (nonExistent === null) {
|
|
111
|
+
log('Get Non-Existent', ICONS.PASS, '(Correctly returned null)');
|
|
112
|
+
} else {
|
|
113
|
+
log('Get Non-Existent', ICONS.FAIL, 'Should return null for non-existent ID');
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
print('\n' + '='.repeat(60));
|
|
117
|
+
print('TEST 4: Delete Chemical');
|
|
118
|
+
print('='.repeat(60));
|
|
119
|
+
|
|
120
|
+
const deleted = await sdk.chemicals.deleteChemical(createdChemicalId);
|
|
121
|
+
|
|
122
|
+
if (deleted && deleted.chemicalId === createdChemicalId) {
|
|
123
|
+
log('Delete Chemical', ICONS.PASS, `(Deleted ID: ${deleted.chemicalId.substring(0, 8)}...)`);
|
|
124
|
+
} else {
|
|
125
|
+
log('Delete Chemical', ICONS.FAIL, 'Delete did not return expected result');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
print('\n' + '='.repeat(60));
|
|
129
|
+
print('TEST 5: Verify Deletion');
|
|
130
|
+
print('='.repeat(60));
|
|
131
|
+
|
|
132
|
+
const shouldBeGone = await sdk.chemicals.getChemicalById(createdChemicalId);
|
|
133
|
+
|
|
134
|
+
if (shouldBeGone === null) {
|
|
135
|
+
log('Verify Deletion', ICONS.PASS, '(Chemical no longer exists)');
|
|
136
|
+
} else {
|
|
137
|
+
log('Verify Deletion', ICONS.FAIL, 'Chemical still exists after deletion');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
print('\n' + '='.repeat(60));
|
|
141
|
+
print(`${ICONS.PASS} Tests Passed: ${testsPassed}`);
|
|
142
|
+
if (testsFailed > 0) print(`${ICONS.FAIL} Tests Failed: ${testsFailed}`);
|
|
143
|
+
print('='.repeat(60));
|
|
144
|
+
|
|
145
|
+
} catch (error) {
|
|
146
|
+
logError('pegasus-sdk-tests', 'chemicals-tests', 'runChemicalTests', error);
|
|
147
|
+
print(`\n${ICONS.FAIL} Test Failed: ${error.message}`);
|
|
148
|
+
|
|
149
|
+
if (createdChemicalId) {
|
|
150
|
+
print('\nCleaning up test chemical...');
|
|
151
|
+
try {
|
|
152
|
+
await sdk.chemicals.deleteChemical(createdChemicalId);
|
|
153
|
+
print('Cleanup successful');
|
|
154
|
+
} catch (cleanupError) {
|
|
155
|
+
print('Cleanup failed (chemical may need manual deletion)');
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
process.exit(1);
|
|
160
|
+
} finally {
|
|
161
|
+
await sdk.disconnect();
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
runChemicalTests();
|
package/tests/search.js
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
const PegasusSDK = require('../index');
|
|
2
|
+
const { logInfo, logError } = require('@toxplanet/tphelper/logging');
|
|
3
|
+
|
|
4
|
+
const ICONS = {
|
|
5
|
+
PASS: '[OK]',
|
|
6
|
+
FAIL: '[!!]',
|
|
7
|
+
WARN: '[?]'
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
async function runTests() {
|
|
11
|
+
const sdk = new PegasusSDK();
|
|
12
|
+
let testsPassed = 0;
|
|
13
|
+
let testsFailed = 0;
|
|
14
|
+
|
|
15
|
+
const log = (test, status, details = '') => {
|
|
16
|
+
logInfo('pegasus-sdk-tests', `${test}: ${status} ${details}`);
|
|
17
|
+
if (status === ICONS.PASS) testsPassed++;
|
|
18
|
+
else if (status === ICONS.FAIL) testsFailed++;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const print = (message) => {
|
|
22
|
+
logInfo('pegasus-sdk-tests', message);
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
try {
|
|
26
|
+
await sdk.connect();
|
|
27
|
+
log('Connection', ICONS.PASS);
|
|
28
|
+
|
|
29
|
+
const health = await sdk.healthCheck();
|
|
30
|
+
const pgStatus = health.postgres.connected ? ICONS.PASS : ICONS.FAIL;
|
|
31
|
+
const osStatus = health.opensearch?.connected ? ICONS.PASS : ICONS.WARN;
|
|
32
|
+
log('Health Check', health.postgres.connected && health.opensearch?.connected ? ICONS.PASS : ICONS.WARN, `(PG:${pgStatus} OS:${osStatus})`);
|
|
33
|
+
|
|
34
|
+
if (!sdk.search?.searchChemicals) throw new Error('Search service not found');
|
|
35
|
+
log('Search Service', ICONS.PASS);
|
|
36
|
+
|
|
37
|
+
const emptyResult = await sdk.search.searchChemicals('');
|
|
38
|
+
if (emptyResult.results.length !== 0) throw new Error('Empty query should return no results');
|
|
39
|
+
log('Empty Query', ICONS.PASS);
|
|
40
|
+
|
|
41
|
+
const basicResult = await sdk.search.searchChemicals('test', { limit: 5 });
|
|
42
|
+
if (!Array.isArray(basicResult.results)) throw new Error('Invalid result structure');
|
|
43
|
+
log('Basic Search', ICONS.PASS, `(${basicResult.results.length} results)`);
|
|
44
|
+
|
|
45
|
+
const customResult = await sdk.search.searchChemicals('carbon', {
|
|
46
|
+
limit: 3,
|
|
47
|
+
casExact: 100,
|
|
48
|
+
nameExact: 50,
|
|
49
|
+
synonymExact: 75
|
|
50
|
+
});
|
|
51
|
+
log('Custom Boost', ICONS.PASS, `(${customResult.results.length} results)`);
|
|
52
|
+
|
|
53
|
+
const casResult = await sdk.search.searchChemicals('7440-06-4', { limit: 3 });
|
|
54
|
+
log('CAS Search', ICONS.PASS, `(${casResult.results.length} results)`);
|
|
55
|
+
|
|
56
|
+
const reversedCasResult = await sdk.search.searchChemicals('06/4/7440', { limit: 3 });
|
|
57
|
+
log('CAS Reversed Format', ICONS.PASS, `(${reversedCasResult.results.length} results)`);
|
|
58
|
+
|
|
59
|
+
if (basicResult.results.length > 0) {
|
|
60
|
+
const result = basicResult.results[0];
|
|
61
|
+
const requiredFields = ['id', 'name', 'cas', 'identifiers', 'synonyms', 'score'];
|
|
62
|
+
const missingFields = requiredFields.filter(field => !(field in result));
|
|
63
|
+
if (missingFields.length > 0) throw new Error(`Missing fields: ${missingFields.join(', ')}`);
|
|
64
|
+
if (!Array.isArray(result.cas) || !Array.isArray(result.identifiers) || !Array.isArray(result.synonyms)) {
|
|
65
|
+
throw new Error('Array fields invalid');
|
|
66
|
+
}
|
|
67
|
+
if (typeof result.score !== 'number') throw new Error('Score not a number');
|
|
68
|
+
log('Result Structure', ICONS.PASS);
|
|
69
|
+
} else {
|
|
70
|
+
log('Result Structure', ICONS.WARN, '(no results to validate)');
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const startsWithResult = await sdk.search.searchStartsWith('test', 3);
|
|
74
|
+
const containsResult = await sdk.search.searchContains('test', 3);
|
|
75
|
+
const exactResult = await sdk.search.searchExact('test', 3);
|
|
76
|
+
if (!startsWithResult.results || !containsResult.results || !exactResult.results) {
|
|
77
|
+
throw new Error('Forwarding methods failed');
|
|
78
|
+
}
|
|
79
|
+
log('SearchService Methods', ICONS.PASS, `(starts:${startsWithResult.results.length} contains:${containsResult.results.length} exact:${exactResult.results.length})`);
|
|
80
|
+
|
|
81
|
+
const casExactResult = await sdk.search.searchByCAS('7440-06-4', 'exact');
|
|
82
|
+
const casPrefixResult = await sdk.search.searchByCAS('7440', 'prefix');
|
|
83
|
+
log('CAS Methods', ICONS.PASS, `(exact:${casExactResult.results.length} prefix:${casPrefixResult.results.length})`);
|
|
84
|
+
|
|
85
|
+
const identifierResult = await sdk.search.searchByIdentifier('CCCO', 'exact');
|
|
86
|
+
log('Identifier Method', ICONS.PASS, `(${identifierResult.results.length} results)`);
|
|
87
|
+
|
|
88
|
+
const synonymExactResult = await sdk.search.searchBySynonym('grain alcohol', 'exact');
|
|
89
|
+
const synonymPrefixResult = await sdk.search.searchBySynonym('alco', 'prefix');
|
|
90
|
+
log('Synonym Methods', ICONS.PASS, `(exact:${synonymExactResult.results.length} prefix:${synonymPrefixResult.results.length})`);
|
|
91
|
+
|
|
92
|
+
const nameSearchResult = await sdk.chemicals.searchByName('platinum', 3);
|
|
93
|
+
const synonymSearchResult = await sdk.chemicals.searchBySynonym('alcohol', 3);
|
|
94
|
+
if (!nameSearchResult.results || !synonymSearchResult.results) {
|
|
95
|
+
throw new Error('ChemicalsService methods failed');
|
|
96
|
+
}
|
|
97
|
+
log('ChemicalsService Methods', ICONS.PASS, `(name:${nameSearchResult.results.length} synonym:${synonymSearchResult.results.length})`);
|
|
98
|
+
|
|
99
|
+
const highLimitResult = await sdk.search.searchChemicals('carbon', { limit: 20 });
|
|
100
|
+
log('High Limit Search', ICONS.PASS, `(${highLimitResult.results.length} results)`);
|
|
101
|
+
|
|
102
|
+
print(`\n${'='.repeat(60)}`);
|
|
103
|
+
print('FINAL TEST: Benzene Search Results');
|
|
104
|
+
print('='.repeat(60));
|
|
105
|
+
const benzeneResults = await sdk.search.searchChemicals('benzene', { limit: 3 });
|
|
106
|
+
print(`Found ${benzeneResults.results.length} results for "benzene":\n`);
|
|
107
|
+
benzeneResults.results.forEach((result, i) => {
|
|
108
|
+
print(`${i + 1}. ${result.name}`);
|
|
109
|
+
print(` CAS: ${result.cas.join(', ') || 'N/A'}`);
|
|
110
|
+
print(` Score: ${result.score.toFixed(2)}`);
|
|
111
|
+
if (result.synonyms.length > 0) {
|
|
112
|
+
print(` Synonyms: ${result.synonyms.slice(0, 3).join(', ')}${result.synonyms.length > 3 ? '...' : ''}`);
|
|
113
|
+
}
|
|
114
|
+
print('');
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
print('='.repeat(60));
|
|
118
|
+
print(`${ICONS.PASS} Tests Passed: ${testsPassed}`);
|
|
119
|
+
if (testsFailed > 0) print(`${ICONS.FAIL} Tests Failed: ${testsFailed}`);
|
|
120
|
+
print('='.repeat(60));
|
|
121
|
+
|
|
122
|
+
} catch (error) {
|
|
123
|
+
logError('pegasus-sdk-tests', 'search-tests', 'runTests', error);
|
|
124
|
+
print(`\n${ICONS.FAIL} Test Failed: ${error.message}`);
|
|
125
|
+
process.exit(1);
|
|
126
|
+
} finally {
|
|
127
|
+
await sdk.disconnect();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (require.main === module) {
|
|
132
|
+
runTests().catch((error) => {
|
|
133
|
+
logError('pegasus-sdk-tests', 'search-tests', 'main', error);
|
|
134
|
+
process.exit(1);
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
module.exports = runTests;
|