@toxplanet/pegasus-sdk 1.1.8 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/chemicals.js +69 -17
- package/lib/elasticsearch.js +18 -0
- package/package.json +1 -1
package/lib/chemicals.js
CHANGED
|
@@ -28,7 +28,10 @@ class ChemicalsService {
|
|
|
28
28
|
|
|
29
29
|
async bulkIndexFielded(documents) {
|
|
30
30
|
try {
|
|
31
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
|
|
32
|
+
|
|
31
33
|
if (!documents || documents.length === 0) {
|
|
34
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] No documents provided, returning empty result`);
|
|
32
35
|
return { indexed: 0, errors: [], results: [] };
|
|
33
36
|
}
|
|
34
37
|
|
|
@@ -36,8 +39,12 @@ class ChemicalsService {
|
|
|
36
39
|
const results = [];
|
|
37
40
|
const errors = [];
|
|
38
41
|
|
|
42
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Database connection established`);
|
|
43
|
+
|
|
39
44
|
for (let i = 0; i < documents.length; i++) {
|
|
40
45
|
const doc = documents[i];
|
|
46
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
|
|
47
|
+
|
|
41
48
|
try {
|
|
42
49
|
const chemical = {
|
|
43
50
|
sourceId: doc.source_id || doc._id,
|
|
@@ -52,6 +59,8 @@ class ChemicalsService {
|
|
|
52
59
|
...(doc.chemical_id && { chemicalId: doc.chemical_id })
|
|
53
60
|
};
|
|
54
61
|
|
|
62
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
|
|
63
|
+
|
|
55
64
|
const [result] = await db
|
|
56
65
|
.insert(schema.chemicals)
|
|
57
66
|
.values(chemical)
|
|
@@ -68,14 +77,19 @@ class ChemicalsService {
|
|
|
68
77
|
})
|
|
69
78
|
.returning();
|
|
70
79
|
|
|
80
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
|
|
71
81
|
results.push({ index: i, success: true, result });
|
|
72
82
|
} catch (err) {
|
|
83
|
+
logError('pegasus-sdk', 'bulkIndexFielded', `Error indexing document ${i}`, err);
|
|
73
84
|
results.push({ index: i, success: false, error: err.message });
|
|
74
85
|
errors.push({ document: doc, error: err.message });
|
|
75
86
|
}
|
|
76
87
|
}
|
|
77
88
|
|
|
78
|
-
|
|
89
|
+
const successCount = results.filter(r => r.success).length;
|
|
90
|
+
logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${errors.length} errors`);
|
|
91
|
+
|
|
92
|
+
return { indexed: successCount, errors, results };
|
|
79
93
|
} catch (error) {
|
|
80
94
|
logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
|
|
81
95
|
throw error;
|
|
@@ -644,17 +658,33 @@ class ChemicalsService {
|
|
|
644
658
|
},
|
|
645
659
|
|
|
646
660
|
bulk: async (params) => {
|
|
661
|
+
const { logInfo, logError } = require('@toxplanet/tphelper/logging');
|
|
647
662
|
const operations = params.body || params.operations;
|
|
663
|
+
|
|
664
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Starting bulk operation with ${operations?.length || 0} total operations`);
|
|
665
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Params index: ${params.index}`);
|
|
666
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Operations array type: ${Array.isArray(operations) ? 'array' : typeof operations}`);
|
|
667
|
+
|
|
648
668
|
const cdiDocuments = [];
|
|
669
|
+
let cdiOpCount = 0;
|
|
670
|
+
let otherOpCount = 0;
|
|
649
671
|
|
|
650
672
|
for (let i = 0; i < operations.length; i++) {
|
|
651
673
|
const op = operations[i];
|
|
674
|
+
const isIndexOp = !!(op.index || op.create);
|
|
675
|
+
const indexName = op.index?._index || op.create?._index || op.delete?._index || op.update?._index;
|
|
676
|
+
|
|
677
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Op[${i}]: action=${Object.keys(op)[0] || 'unknown'}, index=${indexName}`);
|
|
678
|
+
|
|
652
679
|
if ((op.index || op.create) &&
|
|
653
680
|
(op.index?._index === 'chemical_data_index' || op.create?._index === 'chemical_data_index')) {
|
|
654
681
|
const doc = operations[i + 1];
|
|
655
682
|
const sourceId = op.index?._id || op.create?._id;
|
|
683
|
+
|
|
684
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Found CDI entry: sourceId=${sourceId}, hasDoc=${!!doc}`);
|
|
685
|
+
|
|
656
686
|
if (doc && sourceId) {
|
|
657
|
-
|
|
687
|
+
const cdiDoc = {
|
|
658
688
|
source_id: sourceId,
|
|
659
689
|
chemical_name: doc.chemical_primary_name || (doc.chemical_names && doc.chemical_names[0]) || null,
|
|
660
690
|
chemical_meta: doc.chemical_meta || {},
|
|
@@ -663,31 +693,53 @@ class ChemicalsService {
|
|
|
663
693
|
chemical_categories: doc.chemical_categories || [],
|
|
664
694
|
created_at: doc.chemical_created_at,
|
|
665
695
|
updated_at: doc.chemical_updated_at
|
|
666
|
-
}
|
|
696
|
+
};
|
|
697
|
+
cdiDocuments.push(cdiDoc);
|
|
698
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Extracted CDI doc: ${JSON.stringify({ source_id: cdiDoc.source_id, chemical_name: cdiDoc.chemical_name })}`);
|
|
667
699
|
i++;
|
|
700
|
+
cdiOpCount++;
|
|
701
|
+
} else {
|
|
702
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] CDI entry incomplete: sourceId=${sourceId}, doc=${!!doc}`);
|
|
668
703
|
}
|
|
704
|
+
} else {
|
|
705
|
+
otherOpCount++;
|
|
669
706
|
}
|
|
670
707
|
}
|
|
671
708
|
|
|
709
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Scan complete: ${cdiOpCount} CDI docs found, ${otherOpCount} other operations skipped`);
|
|
710
|
+
|
|
672
711
|
if (cdiDocuments.length === 0) {
|
|
712
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] No CDI documents to index, returning empty no-op response`);
|
|
673
713
|
return { took: 0, errors: false, items: [] };
|
|
674
714
|
}
|
|
675
715
|
|
|
676
|
-
|
|
716
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] Calling bulkIndexFielded with ${cdiDocuments.length} CDI documents`);
|
|
677
717
|
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
718
|
+
try {
|
|
719
|
+
const result = await this.bulkIndexFielded(cdiDocuments);
|
|
720
|
+
logInfo('pegasus-sdk', `[ChemicalsService.bulk] bulkIndexFielded returned: indexed=${result.indexed}, errors=${result.errors.length}`);
|
|
721
|
+
|
|
722
|
+
if (result.errors.length > 0) {
|
|
723
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Errors during bulk indexing', result.errors);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
return {
|
|
727
|
+
took: 1,
|
|
728
|
+
errors: result.errors.length > 0,
|
|
729
|
+
items: result.results.map((res, idx) => ({
|
|
730
|
+
index: {
|
|
731
|
+
_index: 'chemical_data_index',
|
|
732
|
+
_id: cdiDocuments[idx].source_id,
|
|
733
|
+
status: res.success ? 200 : 400,
|
|
734
|
+
result: res.success ? 'created' : 'error',
|
|
735
|
+
...(res.success ? {} : { error: { type: 'mapper_parsing_exception', reason: res.error } })
|
|
736
|
+
}
|
|
737
|
+
}))
|
|
738
|
+
};
|
|
739
|
+
} catch (error) {
|
|
740
|
+
logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
|
|
741
|
+
throw error;
|
|
742
|
+
}
|
|
691
743
|
},
|
|
692
744
|
|
|
693
745
|
get: async (params) => {
|
package/lib/elasticsearch.js
CHANGED
|
@@ -64,26 +64,44 @@ class ElasticsearchService {
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
async bulk(params) {
|
|
67
|
+
const { logInfo, logError } = require('@toxplanet/tphelper/logging');
|
|
67
68
|
const operations = params.body || params.operations;
|
|
69
|
+
|
|
70
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Starting bulk routing with ${operations?.length || 0} operations`);
|
|
71
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] params.index=${params.index}`);
|
|
72
|
+
|
|
68
73
|
if (!Array.isArray(operations) || operations.length === 0) {
|
|
69
74
|
throw new Error('Bulk operations must be a non-empty array');
|
|
70
75
|
}
|
|
71
76
|
|
|
72
77
|
let indexName = params.index;
|
|
73
78
|
let cdiIndexName = null;
|
|
79
|
+
const foundIndices = [];
|
|
80
|
+
|
|
74
81
|
for (const op of operations) {
|
|
75
82
|
const name = op?.index?._index || op?.index?.index ||
|
|
76
83
|
op?.create?._index || op?.create?.index ||
|
|
77
84
|
op?.delete?._index || op?.delete?.index ||
|
|
78
85
|
op?.update?._index || op?.update?.index;
|
|
79
86
|
if (!name) continue;
|
|
87
|
+
|
|
88
|
+
if (!foundIndices.includes(name)) foundIndices.push(name);
|
|
89
|
+
|
|
80
90
|
if (name === 'chemical_data_index') { cdiIndexName = name; break; }
|
|
81
91
|
if (!indexName) indexName = name;
|
|
82
92
|
}
|
|
93
|
+
|
|
94
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Found indices in operations: ${foundIndices.join(', ')}`);
|
|
95
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] cdiIndexName=${cdiIndexName}, fallback indexName=${indexName}`);
|
|
96
|
+
|
|
83
97
|
indexName = cdiIndexName || indexName;
|
|
84
98
|
if (!indexName) throw new Error('Could not determine index from bulk operations');
|
|
85
99
|
|
|
100
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Routing to index: ${indexName}`);
|
|
101
|
+
|
|
86
102
|
const handler = this.getRouteHandler(indexName);
|
|
103
|
+
logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Handler found: ${!!handler}, has bulk method: ${!!(handler && handler.bulk)}`);
|
|
104
|
+
|
|
87
105
|
if (handler && handler.bulk) return await handler.bulk(params);
|
|
88
106
|
throw new Error(`No handler registered for index: ${indexName}`);
|
|
89
107
|
}
|
package/package.json
CHANGED