@toxplanet/pegasus-sdk 1.1.8 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/chemicals.js +69 -18
- package/lib/elasticsearch.js +19 -0
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const { logError } = require('@toxplanet/tphelper/logging');
|
|
1
|
+
const { logError, logInfo } = require('@toxplanet/tphelper/logging');
|
|
2
2
|
const { getDrizzle, schema } = require('./db');
|
|
3
3
|
const { eq, sql, and, inArray, arrayContains } = require('drizzle-orm');
|
|
4
4
|
|
|
@@ -28,7 +28,10 @@ class ChemicalsService {
|
|
|
28
28
|
|
|
29
29
|
async bulkIndexFielded(documents) {
|
|
30
30
|
try {
|
|
31
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
|
|
32
|
+
|
|
31
33
|
if (!documents || documents.length === 0) {
|
|
34
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] No documents provided, returning empty result`);
|
|
32
35
|
return { indexed: 0, errors: [], results: [] };
|
|
33
36
|
}
|
|
34
37
|
|
|
@@ -36,8 +39,12 @@ class ChemicalsService {
|
|
|
36
39
|
const results = [];
|
|
37
40
|
const errors = [];
|
|
38
41
|
|
|
42
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Database connection established`);
|
|
43
|
+
|
|
39
44
|
for (let i = 0; i < documents.length; i++) {
|
|
40
45
|
const doc = documents[i];
|
|
46
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
|
|
47
|
+
|
|
41
48
|
try {
|
|
42
49
|
const chemical = {
|
|
43
50
|
sourceId: doc.source_id || doc._id,
|
|
@@ -52,6 +59,8 @@ class ChemicalsService {
|
|
|
52
59
|
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
53
60
|
};
|
|
54
61
|
|
|
62
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
|
|
63
|
+
|
|
55
64
|
const [result] = await db
|
|
56
65
|
.insert(schema.chemicals)
|
|
57
66
|
.values(chemical)
|
|
@@ -68,14 +77,19 @@ class ChemicalsService {
|
|
|
68
77
|
})
|
|
69
78
|
.returning();
|
|
70
79
|
|
|
80
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
|
|
71
81
|
results.push({ index: i, success: true, result });
|
|
72
82
|
} catch (err) {
|
|
83
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Error indexing document ${i}`, err);
|
|
73
84
|
results.push({ index: i, success: false, error: err.message });
|
|
74
85
|
errors.push({ document: doc, error: err.message });
|
|
75
86
|
}
|
|
76
87
|
}
|
|
77
88
|
|
|
78
|
-
|
|
89
|
+
const successCount = results.filter(r => r.success).length;
|
|
90
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${errors.length} errors`);
|
|
91
|
+
|
|
92
|
+
return { indexed: successCount, errors, results };
|
|
79
93
|
} catch (error) {
|
|
80
94
|
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
|
|
81
95
|
throw error;
|
|
@@ -645,16 +659,31 @@ class ChemicalsService {
|
|
|
645
659
|
|
|
646
660
|
bulk: async (params) => {
|
|
647
661
|
const operations = params.body || params.operations;
|
|
662
|
+
|
|
663
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Starting bulk operation with ${operations?.length || 0} total operations`);
|
|
664
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Params index: ${params.index}`);
|
|
665
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Operations array type: ${Array.isArray(operations) ? 'array' : typeof operations}`);
|
|
666
|
+
|
|
648
667
|
const cdiDocuments = [];
|
|
668
|
+
let cdiOpCount = 0;
|
|
669
|
+
let otherOpCount = 0;
|
|
649
670
|
|
|
650
671
|
for (let i = 0; i < operations.length; i++) {
|
|
651
672
|
const op = operations[i];
|
|
673
|
+
const isIndexOp = !!(op.index || op.create);
|
|
674
|
+
const indexName = op.index?._index || op.create?._index || op.delete?._index || op.update?._index;
|
|
675
|
+
|
|
676
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Op[${i}]: action=${Object.keys(op)[0] || 'unknown'}, index=${indexName}`);
|
|
677
|
+
|
|
652
678
|
if ((op.index || op.create) &&
|
|
653
679
|
(op.index?._index === 'chemical_data_index' || op.create?._index === 'chemical_data_index')) {
|
|
654
680
|
const doc = operations[i + 1];
|
|
655
681
|
const sourceId = op.index?._id || op.create?._id;
|
|
682
|
+
|
|
683
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Found CDI entry: sourceId=${sourceId}, hasDoc=${!!doc}`);
|
|
684
|
+
|
|
656
685
|
if (doc && sourceId) {
|
|
657
|
-
|
|
686
|
+
const cdiDoc = {
|
|
658
687
|
source_id: sourceId,
|
|
659
688
|
chemical_name: doc.chemical_primary_name || (doc.chemical_names && doc.chemical_names[0]) || null,
|
|
660
689
|
chemical_meta: doc.chemical_meta || {},
|
|
@@ -663,31 +692,53 @@ class ChemicalsService {
|
|
|
663
692
|
chemical_categories: doc.chemical_categories || [],
|
|
664
693
|
created_at: doc.chemical_created_at,
|
|
665
694
|
updated_at: doc.chemical_updated_at
|
|
666
|
-
}
|
|
695
|
+
};
|
|
696
|
+
cdiDocuments.push(cdiDoc);
|
|
697
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Extracted CDI doc: ${JSON.stringify({ source_id: cdiDoc.source_id, chemical_name: cdiDoc.chemical_name })}`);
|
|
667
698
|
i++;
|
|
699
|
+
cdiOpCount++;
|
|
700
|
+
} else {
|
|
701
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] CDI entry incomplete: sourceId=${sourceId}, doc=${!!doc}`);
|
|
668
702
|
}
|
|
703
|
+
} else {
|
|
704
|
+
otherOpCount++;
|
|
669
705
|
}
|
|
670
706
|
}
|
|
671
707
|
|
|
708
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Scan complete: ${cdiOpCount} CDI docs found, ${otherOpCount} other operations skipped`);
|
|
709
|
+
|
|
672
710
|
if (cdiDocuments.length === 0) {
|
|
711
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] No CDI documents to index, returning empty no-op response`);
|
|
673
712
|
return { took: 0, errors: false, items: [] };
|
|
674
713
|
}
|
|
675
714
|
|
|
676
|
-
|
|
715
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Calling bulkIndexFielded with ${cdiDocuments.length} CDI documents`);
|
|
677
716
|
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
717
|
+
try {
|
|
718
|
+
const result = await this.bulkIndexFielded(cdiDocuments);
|
|
719
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] bulkIndexFielded returned: indexed=${result.indexed}, errors=${result.errors.length}`);
|
|
720
|
+
|
|
721
|
+
if (result.errors.length > 0) {
|
|
722
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Errors during bulk indexing', result.errors);
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
return {
|
|
726
|
+
took: 1,
|
|
727
|
+
errors: result.errors.length > 0,
|
|
728
|
+
items: result.results.map((res, idx) => ({
|
|
729
|
+
index: {
|
|
730
|
+
_index: 'chemical_data_index',
|
|
731
|
+
_id: cdiDocuments[idx].source_id,
|
|
732
|
+
status: res.success ? 200 : 400,
|
|
733
|
+
result: res.success ? 'created' : 'error',
|
|
734
|
+
...(res.success ? {} : { error: { type: 'mapper_parsing_exception', reason: res.error } })
|
|
735
|
+
}
|
|
736
|
+
}))
|
|
737
|
+
};
|
|
738
|
+
} catch (error) {
|
|
739
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
|
|
740
|
+
throw error;
|
|
741
|
+
}
|
|
691
742
|
},
|
|
692
743
|
|
|
693
744
|
get: async (params) => {
|
package/lib/elasticsearch.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
const { logInfo, logError } = require('@toxplanet/tphelper/logging');
|
|
2
|
+
|
|
1
3
|
class ElasticsearchService {
|
|
2
4
|
constructor(connection) {
|
|
3
5
|
this.connection = connection;
|
|
@@ -65,25 +67,42 @@ class ElasticsearchService {
|
|
|
65
67
|
|
|
66
68
|
async bulk(params) {
|
|
67
69
|
const operations = params.body || params.operations;
|
|
70
|
+
|
|
71
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Starting bulk routing with ${operations?.length || 0} operations`);
|
|
72
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] params.index=${params.index}`);
|
|
73
|
+
|
|
68
74
|
if (!Array.isArray(operations) || operations.length === 0) {
|
|
69
75
|
throw new Error('Bulk operations must be a non-empty array');
|
|
70
76
|
}
|
|
71
77
|
|
|
72
78
|
let indexName = params.index;
|
|
73
79
|
let cdiIndexName = null;
|
|
80
|
+
const foundIndices = [];
|
|
81
|
+
|
|
74
82
|
for (const op of operations) {
|
|
75
83
|
const name = op?.index?._index || op?.index?.index ||
|
|
76
84
|
op?.create?._index || op?.create?.index ||
|
|
77
85
|
op?.delete?._index || op?.delete?.index ||
|
|
78
86
|
op?.update?._index || op?.update?.index;
|
|
79
87
|
if (!name) continue;
|
|
88
|
+
|
|
89
|
+
if (!foundIndices.includes(name)) foundIndices.push(name);
|
|
90
|
+
|
|
80
91
|
if (name === 'chemical_data_index') { cdiIndexName = name; break; }
|
|
81
92
|
if (!indexName) indexName = name;
|
|
82
93
|
}
|
|
94
|
+
|
|
95
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Found indices in operations: ${foundIndices.join(', ')}`);
|
|
96
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] cdiIndexName=${cdiIndexName}, fallback indexName=${indexName}`);
|
|
97
|
+
|
|
83
98
|
indexName = cdiIndexName || indexName;
|
|
84
99
|
if (!indexName) throw new Error('Could not determine index from bulk operations');
|
|
85
100
|
|
|
101
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Routing to index: ${indexName}`);
|
|
102
|
+
|
|
86
103
|
const handler = this.getRouteHandler(indexName);
|
|
104
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Handler found: ${!!handler}, has bulk method: ${!!(handler && handler.bulk)}`);
|
|
105
|
+
|
|
87
106
|
if (handler && handler.bulk) return await handler.bulk(params);
|
|
88
107
|
throw new Error(`No handler registered for index: ${indexName}`);
|
|
89
108
|
}
|
package/package.json
CHANGED