@toxplanet/pegasus-sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/env.example +3 -0
- package/index.d.ts +215 -0
- package/index.js +37 -0
- package/lib/chemicals.js +57 -0
- package/lib/connection.js +206 -0
- package/lib/documents.js +47 -0
- package/lib/search.js +33 -0
- package/lib/sync.js +41 -0
- package/lib/utils.js +47 -0
- package/package.json +23 -0
package/env.example
ADDED
package/index.d.ts
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
export interface PegasusConfig {
|
|
2
|
+
environment?: string;
|
|
3
|
+
region?: string;
|
|
4
|
+
secretName?: string;
|
|
5
|
+
openSearchEndpoint?: string;
|
|
6
|
+
maxConnections?: number;
|
|
7
|
+
minConnections?: number;
|
|
8
|
+
idleTimeoutMillis?: number;
|
|
9
|
+
connectionTimeoutMillis?: number;
|
|
10
|
+
statementTimeout?: number;
|
|
11
|
+
queryTimeout?: number;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface Chemical {
|
|
15
|
+
chemical_id?: string;
|
|
16
|
+
source_id: string;
|
|
17
|
+
chemical_name: string;
|
|
18
|
+
chemical_identifiers: Identifier[];
|
|
19
|
+
chemical_synonyms: string[];
|
|
20
|
+
chemical_categories: string[];
|
|
21
|
+
chemical_meta: Record<string, any>;
|
|
22
|
+
created_at?: Date;
|
|
23
|
+
updated_at?: Date;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface Identifier {
|
|
27
|
+
type: string;
|
|
28
|
+
value: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface Document {
|
|
32
|
+
document_id?: string;
|
|
33
|
+
document_path: string;
|
|
34
|
+
processed_at?: Date;
|
|
35
|
+
cas_count?: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface OutboxEntry {
|
|
39
|
+
outbox_id: number;
|
|
40
|
+
chemical_id: string;
|
|
41
|
+
operation: 'INSERT' | 'UPDATE' | 'DELETE';
|
|
42
|
+
created_at: Date;
|
|
43
|
+
processed_at?: Date;
|
|
44
|
+
retry_count: number;
|
|
45
|
+
last_error?: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface ConnectionStatus {
|
|
49
|
+
postgres: {
|
|
50
|
+
connected: boolean;
|
|
51
|
+
timestamp?: Date;
|
|
52
|
+
version?: string;
|
|
53
|
+
poolSize?: number;
|
|
54
|
+
idleConnections?: number;
|
|
55
|
+
waitingRequests?: number;
|
|
56
|
+
error?: string;
|
|
57
|
+
};
|
|
58
|
+
opensearch: {
|
|
59
|
+
connected: boolean;
|
|
60
|
+
version?: string;
|
|
61
|
+
cluster?: string;
|
|
62
|
+
error?: string;
|
|
63
|
+
} | null;
|
|
64
|
+
environment: string;
|
|
65
|
+
region: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export declare class PegasusConnection {
|
|
69
|
+
constructor(config?: PegasusConfig);
|
|
70
|
+
getSecret(): Promise<any>;
|
|
71
|
+
connect(): Promise<void>;
|
|
72
|
+
disconnect(): Promise<void>;
|
|
73
|
+
getPostgresClient(): any;
|
|
74
|
+
getOpenSearchClient(): any;
|
|
75
|
+
testConnection(): Promise<ConnectionStatus>;
|
|
76
|
+
query(sql: string, params?: any[]): Promise<any>;
|
|
77
|
+
getClient(): Promise<any>;
|
|
78
|
+
transaction(callback: (client: any) => Promise<any>): Promise<any>;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export declare class ChemicalsService {
|
|
82
|
+
constructor(connection: PegasusConnection);
|
|
83
|
+
bulkIndexFielded(documents: any[]): Promise<void>;
|
|
84
|
+
bulkIndexFulltext(documents: any[]): Promise<void>;
|
|
85
|
+
bulkIndexSubstances(substances: any[]): Promise<void>;
|
|
86
|
+
createChemical(chemical: Chemical): Promise<Chemical>;
|
|
87
|
+
updateChemical(chemicalId: string, updates: Partial<Chemical>): Promise<Chemical>;
|
|
88
|
+
deleteChemical(chemicalId: string): Promise<void>;
|
|
89
|
+
deleteBySourceId(sourceId: string): Promise<void>;
|
|
90
|
+
deleteCollection(collectionName: string): Promise<number>;
|
|
91
|
+
updateCollectionProperty(collectionName: string, propertyPath: string, newValue: any): Promise<number>;
|
|
92
|
+
bulkUpdateProperty(filter: any, propertyPath: string, newValue: any): Promise<number>;
|
|
93
|
+
getChemicalById(chemicalId: string): Promise<Chemical>;
|
|
94
|
+
getChemicalBySourceId(sourceId: string): Promise<Chemical>;
|
|
95
|
+
getChemicalsByCAS(casNumber: string): Promise<Chemical[]>;
|
|
96
|
+
getChemicalsByIdentifier(identifierType: string, identifierValue: string): Promise<Chemical[]>;
|
|
97
|
+
countAll(): Promise<number>;
|
|
98
|
+
countByCollection(collectionName: string): Promise<number>;
|
|
99
|
+
countByIdentifier(identifierValue: string): Promise<number>;
|
|
100
|
+
countByCAS(casNumber: string): Promise<number>;
|
|
101
|
+
getTotalSynonymCount(): Promise<number>;
|
|
102
|
+
getSynonymCount(synonymTerm: string): Promise<number>;
|
|
103
|
+
convertIdentifier(fromIdentifier: string, toIdentifierType: string): Promise<any>;
|
|
104
|
+
convertIdentifiersBatch(fromIdentifiers: string[], toIdentifierType: string): Promise<any[]>;
|
|
105
|
+
searchByName(searchTerm: string, limit: number): Promise<Chemical[]>;
|
|
106
|
+
searchBySynonym(synonymTerm: string, limit: number): Promise<Chemical[]>;
|
|
107
|
+
findChemicalsWithoutDocuments(collectionName: string, searchTerm: string, pageSize: number): Promise<Chemical[]>;
|
|
108
|
+
countChemicalsWithoutDocuments(collectionName: string): Promise<number>;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export declare class DocumentsService {
|
|
112
|
+
constructor(connection: PegasusConnection);
|
|
113
|
+
createDocument(documentPath: string, casNumbers: string[]): Promise<Document>;
|
|
114
|
+
bulkCreateDocuments(documents: any[]): Promise<void>;
|
|
115
|
+
updateDocument(documentId: string, updates: Partial<Document>): Promise<Document>;
|
|
116
|
+
deleteDocument(documentId: string): Promise<void>;
|
|
117
|
+
deleteDocumentByPath(documentPath: string): Promise<void>;
|
|
118
|
+
getDocumentById(documentId: string): Promise<Document>;
|
|
119
|
+
getDocumentByPath(documentPath: string): Promise<Document>;
|
|
120
|
+
getDocumentsByCAS(casNumber: string): Promise<Document[]>;
|
|
121
|
+
getCASByDocument(documentId: string): Promise<string[]>;
|
|
122
|
+
getCASByDocumentPath(documentPath: string): Promise<string[]>;
|
|
123
|
+
addCASToDocument(documentId: string, casNumber: string): Promise<void>;
|
|
124
|
+
addCASToDocumentBatch(documentId: string, casNumbers: string[]): Promise<void>;
|
|
125
|
+
removeCASFromDocument(documentId: string, casNumber: string): Promise<void>;
|
|
126
|
+
findDocumentsWithMultipleCAS(casNumbers: string[], requireAll: boolean): Promise<Document[]>;
|
|
127
|
+
countDocuments(): Promise<number>;
|
|
128
|
+
countDocumentsByCAS(casNumber: string): Promise<number>;
|
|
129
|
+
countUniqueCAS(): Promise<number>;
|
|
130
|
+
getTopCASByDocumentCount(limit: number): Promise<any[]>;
|
|
131
|
+
extractTextFromPDF(pdfBuffer: Buffer): Promise<string>;
|
|
132
|
+
extractCASFromText(text: string): Promise<string[]>;
|
|
133
|
+
processDocument(documentPath: string, documentData: any): Promise<Document>;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export declare class SearchService {
|
|
137
|
+
constructor(connection: PegasusConnection);
|
|
138
|
+
searchChemicals(searchTerm: string, searchType: string, limit: number, offset: number): Promise<any[]>;
|
|
139
|
+
searchStartsWith(searchTerm: string, limit: number): Promise<any[]>;
|
|
140
|
+
searchContains(searchTerm: string, limit: number): Promise<any[]>;
|
|
141
|
+
searchExact(searchTerm: string, limit: number): Promise<any[]>;
|
|
142
|
+
searchByCAS(casNumber: string, searchType: string): Promise<any[]>;
|
|
143
|
+
searchByIdentifier(identifierValue: string, searchType: string): Promise<any[]>;
|
|
144
|
+
searchBySynonym(synonymTerm: string, searchType: string): Promise<any[]>;
|
|
145
|
+
advancedSearch(queryBuilder: any): Promise<any[]>;
|
|
146
|
+
searchWithFilters(searchTerm: string, filters: any, limit: number): Promise<any[]>;
|
|
147
|
+
searchByCollection(collectionName: string, searchTerm: string, limit: number): Promise<any[]>;
|
|
148
|
+
aggregateByCategory(): Promise<any>;
|
|
149
|
+
aggregateByIdentifierType(): Promise<any>;
|
|
150
|
+
getSearchSuggestions(partialTerm: string, limit: number): Promise<string[]>;
|
|
151
|
+
findSimilarChemicals(chemicalId: string, limit: number): Promise<any[]>;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export declare class SyncService {
|
|
155
|
+
constructor(connection: PegasusConnection);
|
|
156
|
+
syncBatch(batchSize: number): Promise<number>;
|
|
157
|
+
syncAll(): Promise<number>;
|
|
158
|
+
syncContinuous(intervalMs: number): Promise<void>;
|
|
159
|
+
stopContinuousSync(): Promise<void>;
|
|
160
|
+
getPendingCount(): Promise<number>;
|
|
161
|
+
getOldestPending(): Promise<Date>;
|
|
162
|
+
getSyncLag(): Promise<number>;
|
|
163
|
+
getFailedEntries(minRetryCount: number): Promise<OutboxEntry[]>;
|
|
164
|
+
retryFailed(outboxId: number): Promise<void>;
|
|
165
|
+
retryAllFailed(): Promise<number>;
|
|
166
|
+
markAsProcessed(outboxId: number): Promise<void>;
|
|
167
|
+
deleteProcessedOlderThan(days: number): Promise<number>;
|
|
168
|
+
cleanupOutbox(daysToKeep: number): Promise<number>;
|
|
169
|
+
getSyncStats(timeWindowMinutes: number): Promise<any>;
|
|
170
|
+
getSyncThroughput(): Promise<number>;
|
|
171
|
+
verifySync(chemicalId: string): Promise<boolean>;
|
|
172
|
+
forceResync(chemicalId: string): Promise<void>;
|
|
173
|
+
getOutboxHealth(): Promise<any>;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
export declare class UtilsService {
|
|
178
|
+
constructor(connection: PegasusConnection);
|
|
179
|
+
executeBatch(operations: any[], batchSize: number, concurrency: number): Promise<void>;
|
|
180
|
+
withTransaction(callback: () => Promise<any>): Promise<any>;
|
|
181
|
+
withRetry(operation: () => Promise<any>, maxRetries: number, backoffMs: number): Promise<any>;
|
|
182
|
+
validateChemical(chemical: Chemical): boolean;
|
|
183
|
+
validateDocument(document: Document): boolean;
|
|
184
|
+
validateIdentifier(identifier: Identifier): boolean;
|
|
185
|
+
validateCAS(casNumber: string): boolean;
|
|
186
|
+
transformForOpenSearch(chemical: Chemical): any;
|
|
187
|
+
transformFromElasticsearch(esDocument: any): Chemical;
|
|
188
|
+
transformFromDynamoDB(dynamoItem: any): Chemical;
|
|
189
|
+
buildOpenSearchQuery(searchTerm: string, searchType: string): any;
|
|
190
|
+
buildPostgresFilter(filters: any): any;
|
|
191
|
+
parseChemicalIdentifiers(identifiers: any): Identifier[];
|
|
192
|
+
parseSynonyms(synonyms: any): string[];
|
|
193
|
+
extractCASFromText(text: string): string[];
|
|
194
|
+
sanitizeSearchTerm(term: string): string;
|
|
195
|
+
generateSourceId(chemical: Chemical): string;
|
|
196
|
+
calculateChecksum(data: any): string;
|
|
197
|
+
formatError(error: Error): any;
|
|
198
|
+
logOperation(operation: string, duration: number, metadata: any): void;
|
|
199
|
+
getTimestamp(): Date;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
export default class PegasusSDK {
|
|
204
|
+
connection: PegasusConnection;
|
|
205
|
+
chemicals: ChemicalsService;
|
|
206
|
+
documents: DocumentsService;
|
|
207
|
+
search: SearchService;
|
|
208
|
+
sync: SyncService;
|
|
209
|
+
utils: UtilsService;
|
|
210
|
+
|
|
211
|
+
constructor(config: PegasusConfig);
|
|
212
|
+
connect(): Promise<void>;
|
|
213
|
+
disconnect(): Promise<void>;
|
|
214
|
+
healthCheck(): Promise<any>;
|
|
215
|
+
}
|
package/index.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
const PegasusConnection = require('./lib/connection');
|
|
2
|
+
const ChemicalsService = require('./lib/chemicals');
|
|
3
|
+
const DocumentsService = require('./lib/documents');
|
|
4
|
+
const SearchService = require('./lib/search');
|
|
5
|
+
const SyncService = require('./lib/sync');
|
|
6
|
+
const UtilsService = require('./lib/utils');
|
|
7
|
+
|
|
8
|
+
class PegasusSDK {
|
|
9
|
+
constructor(config) {
|
|
10
|
+
this.connection = new PegasusConnection(config);
|
|
11
|
+
this.chemicals = new ChemicalsService(this.connection);
|
|
12
|
+
this.documents = new DocumentsService(this.connection);
|
|
13
|
+
this.search = new SearchService(this.connection);
|
|
14
|
+
this.sync = new SyncService(this.connection);
|
|
15
|
+
this.utils = new UtilsService(this.connection);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async connect() {
|
|
19
|
+
return this.connection.connect();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async disconnect() {
|
|
23
|
+
return this.connection.disconnect();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async healthCheck() {
|
|
27
|
+
return this.connection.testConnection();
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
module.exports = PegasusSDK;
|
|
32
|
+
module.exports.PegasusConnection = PegasusConnection;
|
|
33
|
+
module.exports.ChemicalsService = ChemicalsService;
|
|
34
|
+
module.exports.DocumentsService = DocumentsService;
|
|
35
|
+
module.exports.SearchService = SearchService;
|
|
36
|
+
module.exports.SyncService = SyncService;
|
|
37
|
+
module.exports.UtilsService = UtilsService;
|
package/lib/chemicals.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
class ChemicalsService {
|
|
2
|
+
constructor(connection) {}
|
|
3
|
+
|
|
4
|
+
async bulkIndexFielded(documents) {}
|
|
5
|
+
|
|
6
|
+
async bulkIndexFulltext(documents) {}
|
|
7
|
+
|
|
8
|
+
async bulkIndexSubstances(substances) {}
|
|
9
|
+
|
|
10
|
+
async createChemical(chemical) {}
|
|
11
|
+
|
|
12
|
+
async updateChemical(chemicalId, updates) {}
|
|
13
|
+
|
|
14
|
+
async deleteChemical(chemicalId) {}
|
|
15
|
+
|
|
16
|
+
async deleteBySourceId(sourceId) {}
|
|
17
|
+
|
|
18
|
+
async deleteCollection(collectionName) {}
|
|
19
|
+
|
|
20
|
+
async updateCollectionProperty(collectionName, propertyPath, newValue) {}
|
|
21
|
+
|
|
22
|
+
async bulkUpdateProperty(filter, propertyPath, newValue) {}
|
|
23
|
+
|
|
24
|
+
async getChemicalById(chemicalId) {}
|
|
25
|
+
|
|
26
|
+
async getChemicalBySourceId(sourceId) {}
|
|
27
|
+
|
|
28
|
+
async getChemicalsByCAS(casNumber) {}
|
|
29
|
+
|
|
30
|
+
async getChemicalsByIdentifier(identifierType, identifierValue) {}
|
|
31
|
+
|
|
32
|
+
async countAll() {}
|
|
33
|
+
|
|
34
|
+
async countByCollection(collectionName) {}
|
|
35
|
+
|
|
36
|
+
async countByIdentifier(identifierValue) {}
|
|
37
|
+
|
|
38
|
+
async countByCAS(casNumber) {}
|
|
39
|
+
|
|
40
|
+
async getTotalSynonymCount() {}
|
|
41
|
+
|
|
42
|
+
async getSynonymCount(synonymTerm) {}
|
|
43
|
+
|
|
44
|
+
async convertIdentifier(fromIdentifier, toIdentifierType) {}
|
|
45
|
+
|
|
46
|
+
async convertIdentifiersBatch(fromIdentifiers, toIdentifierType) {}
|
|
47
|
+
|
|
48
|
+
async searchByName(searchTerm, limit) {}
|
|
49
|
+
|
|
50
|
+
async searchBySynonym(synonymTerm, limit) {}
|
|
51
|
+
|
|
52
|
+
async findChemicalsWithoutDocuments(collectionName, searchTerm, pageSize) {}
|
|
53
|
+
|
|
54
|
+
async countChemicalsWithoutDocuments(collectionName) {}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = ChemicalsService;
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
const { Pool } = require('pg');
|
|
2
|
+
const { Client } = require('@opensearch-project/opensearch');
|
|
3
|
+
const { SecretsManagerClient, GetSecretValueCommand } = require('@aws-sdk/client-secrets-manager');
|
|
4
|
+
const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws');
|
|
5
|
+
const { defaultProvider } = require('@aws-sdk/credential-providers');
|
|
6
|
+
|
|
7
|
+
class PegasusConnection {
|
|
8
|
+
constructor(config = {}) {
|
|
9
|
+
this.config = config;
|
|
10
|
+
this.environment = config.environment || process.env.NODE_ENV || 'development';
|
|
11
|
+
this.region = config.region || process.env.AWS_REGION || 'us-east-1';
|
|
12
|
+
this.secretName = config.secretName || `pegasus/${this.environment}/database`;
|
|
13
|
+
this.openSearchEndpoint = config.openSearchEndpoint || process.env.OPENSEARCH_ENDPOINT;
|
|
14
|
+
|
|
15
|
+
this.pgPool = null;
|
|
16
|
+
this.osClient = null;
|
|
17
|
+
this.secretsClient = null;
|
|
18
|
+
this.cachedSecret = null;
|
|
19
|
+
this.isConnected = false;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async getSecret() {
|
|
23
|
+
if (this.cachedSecret) {
|
|
24
|
+
return this.cachedSecret;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (!this.secretsClient) {
|
|
28
|
+
this.secretsClient = new SecretsManagerClient({ region: this.region });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const command = new GetSecretValueCommand({ SecretId: this.secretName });
|
|
32
|
+
const response = await this.secretsClient.send(command);
|
|
33
|
+
|
|
34
|
+
if (response.SecretString) {
|
|
35
|
+
this.cachedSecret = JSON.parse(response.SecretString);
|
|
36
|
+
return this.cachedSecret;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
throw new Error(`Secret ${this.secretName} does not contain SecretString`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async connect() {
|
|
43
|
+
if (this.isConnected) {
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const secret = await this.getSecret();
|
|
48
|
+
|
|
49
|
+
const poolConfig = {
|
|
50
|
+
host: secret.host || secret.endpoint,
|
|
51
|
+
port: secret.port || 5432,
|
|
52
|
+
database: secret.database || secret.dbname || 'chemicals',
|
|
53
|
+
user: secret.username || secret.user,
|
|
54
|
+
password: secret.password,
|
|
55
|
+
max: this.config.maxConnections || 2,
|
|
56
|
+
min: this.config.minConnections || 0,
|
|
57
|
+
idleTimeoutMillis: this.config.idleTimeoutMillis || 30000,
|
|
58
|
+
connectionTimeoutMillis: this.config.connectionTimeoutMillis || 5000,
|
|
59
|
+
allowExitOnIdle: true,
|
|
60
|
+
ssl: {
|
|
61
|
+
rejectUnauthorized: true
|
|
62
|
+
},
|
|
63
|
+
statement_timeout: this.config.statementTimeout || 30000,
|
|
64
|
+
query_timeout: this.config.queryTimeout || 30000
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
this.pgPool = new Pool(poolConfig);
|
|
68
|
+
|
|
69
|
+
this.pgPool.on('error', (err) => {
|
|
70
|
+
console.error('Unexpected PostgreSQL pool error:', err);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
this.pgPool.on('connect', () => {
|
|
74
|
+
console.log('PostgreSQL client connected');
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
this.pgPool.on('remove', () => {
|
|
78
|
+
console.log('PostgreSQL client removed from pool');
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
if (this.openSearchEndpoint) {
|
|
82
|
+
this.osClient = new Client({
|
|
83
|
+
...AwsSigv4Signer({
|
|
84
|
+
region: this.region,
|
|
85
|
+
service: 'aoss',
|
|
86
|
+
getCredentials: () => {
|
|
87
|
+
const credentialsProvider = defaultProvider();
|
|
88
|
+
return credentialsProvider();
|
|
89
|
+
}
|
|
90
|
+
}),
|
|
91
|
+
node: this.openSearchEndpoint
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
this.isConnected = true;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async disconnect() {
|
|
99
|
+
if (!this.isConnected) {
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (this.pgPool) {
|
|
104
|
+
await this.pgPool.end();
|
|
105
|
+
this.pgPool = null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
this.osClient = null;
|
|
109
|
+
this.isConnected = false;
|
|
110
|
+
this.cachedSecret = null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
getPostgresClient() {
|
|
114
|
+
if (!this.pgPool) {
|
|
115
|
+
throw new Error('PostgreSQL connection not established. Call connect() first.');
|
|
116
|
+
}
|
|
117
|
+
return this.pgPool;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
getOpenSearchClient() {
|
|
121
|
+
if (!this.osClient) {
|
|
122
|
+
throw new Error('OpenSearch connection not established. Call connect() first or provide openSearchEndpoint.');
|
|
123
|
+
}
|
|
124
|
+
return this.osClient;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async testConnection() {
|
|
128
|
+
try {
|
|
129
|
+
if (this.pgPool) {
|
|
130
|
+
const client = await this.pgPool.connect();
|
|
131
|
+
const result = await client.query('SELECT NOW() as current_time, version() as pg_version');
|
|
132
|
+
client.release();
|
|
133
|
+
|
|
134
|
+
const pgStatus = {
|
|
135
|
+
connected: true,
|
|
136
|
+
timestamp: result.rows[0].current_time,
|
|
137
|
+
version: result.rows[0].pg_version,
|
|
138
|
+
poolSize: this.pgPool.totalCount,
|
|
139
|
+
idleConnections: this.pgPool.idleCount,
|
|
140
|
+
waitingRequests: this.pgPool.waitingCount
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
let osStatus = null;
|
|
144
|
+
if (this.osClient) {
|
|
145
|
+
try {
|
|
146
|
+
const osInfo = await this.osClient.info();
|
|
147
|
+
osStatus = {
|
|
148
|
+
connected: true,
|
|
149
|
+
version: osInfo.body.version.number,
|
|
150
|
+
cluster: osInfo.body.cluster_name
|
|
151
|
+
};
|
|
152
|
+
} catch (osError) {
|
|
153
|
+
osStatus = {
|
|
154
|
+
connected: false,
|
|
155
|
+
error: osError.message
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
postgres: pgStatus,
|
|
162
|
+
opensearch: osStatus,
|
|
163
|
+
environment: this.environment,
|
|
164
|
+
region: this.region
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
throw new Error('No active connections');
|
|
169
|
+
} catch (error) {
|
|
170
|
+
return {
|
|
171
|
+
postgres: { connected: false, error: error.message },
|
|
172
|
+
opensearch: null,
|
|
173
|
+
environment: this.environment,
|
|
174
|
+
region: this.region
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
async query(sql, params) {
|
|
180
|
+
const pool = this.getPostgresClient();
|
|
181
|
+
return pool.query(sql, params);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
async getClient() {
|
|
185
|
+
const pool = this.getPostgresClient();
|
|
186
|
+
return pool.connect();
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
async transaction(callback) {
|
|
190
|
+
const client = await this.getClient();
|
|
191
|
+
|
|
192
|
+
try {
|
|
193
|
+
await client.query('BEGIN');
|
|
194
|
+
const result = await callback(client);
|
|
195
|
+
await client.query('COMMIT');
|
|
196
|
+
return result;
|
|
197
|
+
} catch (error) {
|
|
198
|
+
await client.query('ROLLBACK');
|
|
199
|
+
throw error;
|
|
200
|
+
} finally {
|
|
201
|
+
client.release();
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
module.exports = PegasusConnection;
|
package/lib/documents.js
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
class DocumentsService {
|
|
2
|
+
constructor(connection) {}
|
|
3
|
+
|
|
4
|
+
async createDocument(documentPath, casNumbers) {}
|
|
5
|
+
|
|
6
|
+
async bulkCreateDocuments(documents) {}
|
|
7
|
+
|
|
8
|
+
async updateDocument(documentId, updates) {}
|
|
9
|
+
|
|
10
|
+
async deleteDocument(documentId) {}
|
|
11
|
+
|
|
12
|
+
async deleteDocumentByPath(documentPath) {}
|
|
13
|
+
|
|
14
|
+
async getDocumentById(documentId) {}
|
|
15
|
+
|
|
16
|
+
async getDocumentByPath(documentPath) {}
|
|
17
|
+
|
|
18
|
+
async getDocumentsByCAS(casNumber) {}
|
|
19
|
+
|
|
20
|
+
async getCASByDocument(documentId) {}
|
|
21
|
+
|
|
22
|
+
async getCASByDocumentPath(documentPath) {}
|
|
23
|
+
|
|
24
|
+
async addCASToDocument(documentId, casNumber) {}
|
|
25
|
+
|
|
26
|
+
async addCASToDocumentBatch(documentId, casNumbers) {}
|
|
27
|
+
|
|
28
|
+
async removeCASFromDocument(documentId, casNumber) {}
|
|
29
|
+
|
|
30
|
+
async findDocumentsWithMultipleCAS(casNumbers, requireAll) {}
|
|
31
|
+
|
|
32
|
+
async countDocuments() {}
|
|
33
|
+
|
|
34
|
+
async countDocumentsByCAS(casNumber) {}
|
|
35
|
+
|
|
36
|
+
async countUniqueCAS() {}
|
|
37
|
+
|
|
38
|
+
async getTopCASByDocumentCount(limit) {}
|
|
39
|
+
|
|
40
|
+
async extractTextFromPDF(pdfBuffer) {}
|
|
41
|
+
|
|
42
|
+
async extractCASFromText(text) {}
|
|
43
|
+
|
|
44
|
+
async processDocument(documentPath, documentData) {}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
module.exports = DocumentsService;
|
package/lib/search.js
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
class SearchService {
|
|
2
|
+
constructor(connection) {}
|
|
3
|
+
|
|
4
|
+
async searchChemicals(searchTerm, searchType, limit, offset) {}
|
|
5
|
+
|
|
6
|
+
async searchStartsWith(searchTerm, limit) {}
|
|
7
|
+
|
|
8
|
+
async searchContains(searchTerm, limit) {}
|
|
9
|
+
|
|
10
|
+
async searchExact(searchTerm, limit) {}
|
|
11
|
+
|
|
12
|
+
async searchByCAS(casNumber, searchType) {}
|
|
13
|
+
|
|
14
|
+
async searchByIdentifier(identifierValue, searchType) {}
|
|
15
|
+
|
|
16
|
+
async searchBySynonym(synonymTerm, searchType) {}
|
|
17
|
+
|
|
18
|
+
async advancedSearch(queryBuilder) {}
|
|
19
|
+
|
|
20
|
+
async searchWithFilters(searchTerm, filters, limit) {}
|
|
21
|
+
|
|
22
|
+
async searchByCollection(collectionName, searchTerm, limit) {}
|
|
23
|
+
|
|
24
|
+
async aggregateByCategory() {}
|
|
25
|
+
|
|
26
|
+
async aggregateByIdentifierType() {}
|
|
27
|
+
|
|
28
|
+
async getSearchSuggestions(partialTerm, limit) {}
|
|
29
|
+
|
|
30
|
+
async findSimilarChemicals(chemicalId, limit) {}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
module.exports = SearchService;
|
package/lib/sync.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
class SyncService {
|
|
2
|
+
constructor(connection) {}
|
|
3
|
+
|
|
4
|
+
async syncBatch(batchSize) {}
|
|
5
|
+
|
|
6
|
+
async syncAll() {}
|
|
7
|
+
|
|
8
|
+
async syncContinuous(intervalMs) {}
|
|
9
|
+
|
|
10
|
+
async stopContinuousSync() {}
|
|
11
|
+
|
|
12
|
+
async getPendingCount() {}
|
|
13
|
+
|
|
14
|
+
async getOldestPending() {}
|
|
15
|
+
|
|
16
|
+
async getSyncLag() {}
|
|
17
|
+
|
|
18
|
+
async getFailedEntries(minRetryCount) {}
|
|
19
|
+
|
|
20
|
+
async retryFailed(outboxId) {}
|
|
21
|
+
|
|
22
|
+
async retryAllFailed() {}
|
|
23
|
+
|
|
24
|
+
async markAsProcessed(outboxId) {}
|
|
25
|
+
|
|
26
|
+
async deleteProcessedOlderThan(days) {}
|
|
27
|
+
|
|
28
|
+
async cleanupOutbox(daysToKeep) {}
|
|
29
|
+
|
|
30
|
+
async getSyncStats(timeWindowMinutes) {}
|
|
31
|
+
|
|
32
|
+
async getSyncThroughput() {}
|
|
33
|
+
|
|
34
|
+
async verifySync(chemicalId) {}
|
|
35
|
+
|
|
36
|
+
async forceResync(chemicalId) {}
|
|
37
|
+
|
|
38
|
+
async getOutboxHealth() {}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
module.exports = SyncService;
|
package/lib/utils.js
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
class UtilsService {
|
|
2
|
+
constructor(connection) {}
|
|
3
|
+
|
|
4
|
+
async executeBatch(operations, batchSize, concurrency) {}
|
|
5
|
+
|
|
6
|
+
async withTransaction(callback) {}
|
|
7
|
+
|
|
8
|
+
async withRetry(operation, maxRetries, backoffMs) {}
|
|
9
|
+
|
|
10
|
+
validateChemical(chemical) {}
|
|
11
|
+
|
|
12
|
+
validateDocument(document) {}
|
|
13
|
+
|
|
14
|
+
validateIdentifier(identifier) {}
|
|
15
|
+
|
|
16
|
+
validateCAS(casNumber) {}
|
|
17
|
+
|
|
18
|
+
transformForOpenSearch(chemical) {}
|
|
19
|
+
|
|
20
|
+
transformFromElasticsearch(esDocument) {}
|
|
21
|
+
|
|
22
|
+
transformFromDynamoDB(dynamoItem) {}
|
|
23
|
+
|
|
24
|
+
buildOpenSearchQuery(searchTerm, searchType) {}
|
|
25
|
+
|
|
26
|
+
buildPostgresFilter(filters) {}
|
|
27
|
+
|
|
28
|
+
parseChemicalIdentifiers(identifiers) {}
|
|
29
|
+
|
|
30
|
+
parseSynonyms(synonyms) {}
|
|
31
|
+
|
|
32
|
+
extractCASFromText(text) {}
|
|
33
|
+
|
|
34
|
+
sanitizeSearchTerm(term) {}
|
|
35
|
+
|
|
36
|
+
generateSourceId(chemical) {}
|
|
37
|
+
|
|
38
|
+
calculateChecksum(data) {}
|
|
39
|
+
|
|
40
|
+
formatError(error) {}
|
|
41
|
+
|
|
42
|
+
logOperation(operation, duration, metadata) {}
|
|
43
|
+
|
|
44
|
+
getTimestamp() {}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
module.exports = UtilsService;
|
package/package.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@toxplanet/pegasus-sdk",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
},
|
|
8
|
+
"keywords": [
|
|
9
|
+
],
|
|
10
|
+
"author": "Chemical Research Development Team",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"pg": "^8.11.3",
|
|
14
|
+
"@opensearch-project/opensearch": "^2.5.0",
|
|
15
|
+
"@aws-sdk/client-opensearch-serverless": "^3.490.0",
|
|
16
|
+
"@aws-sdk/client-secrets-manager": "^3.490.0",
|
|
17
|
+
"@aws-sdk/credential-providers": "^3.490.0",
|
|
18
|
+
"aws4fetch": "^1.0.18"
|
|
19
|
+
},
|
|
20
|
+
"engines": {
|
|
21
|
+
"node": ">=18.0.0"
|
|
22
|
+
}
|
|
23
|
+
}
|