@toxplanet/pegasus-sdk 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ module.exports = {
20
20
  }
21
21
  },
22
22
  indexRoutes: {
23
- chemicals: ['*'],
23
+ chemicals: ['chemicals*'],
24
24
  documents: ['documents*'],
25
25
  search: [/^(chemicals|substances|search)/]
26
26
  }
@@ -19,9 +19,9 @@ module.exports = {
19
19
  rejectUnauthorized: false
20
20
  }
21
21
  },
22
- indexRoutes: {
23
- chemicals: ['chemical_index*', 'synonym_lookup_index'],
24
- documents: ['document_nones_index*'],
25
- search: [/^(chemical_index|document_nones_index|search)/]
26
- }
22
+ indexRoutes: {
23
+ chemicals: ['chemicals*'],
24
+ documents: ['documents*'],
25
+ search: [/^(chemicals|substances|search)/]
26
+ }
27
27
  };
@@ -20,7 +20,7 @@ module.exports = {
20
20
  }
21
21
  },
22
22
  indexRoutes: {
23
- chemicals: ['*'],
23
+ chemicals: ['chemicals*'],
24
24
  documents: ['documents*'],
25
25
  search: [/^(chemicals|substances|search)/]
26
26
  }
package/lib/chemicals.js CHANGED
@@ -28,7 +28,10 @@ class ChemicalsService {
28
28
 
29
29
  async bulkIndexFielded(documents) {
30
30
  try {
31
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Starting bulk index with ${documents?.length || 0} documents`);
32
+
31
33
  if (!documents || documents.length === 0) {
34
+ logInfo('pegasus-sdk', `[bulkIndexFielded] No documents provided, returning empty result`);
32
35
  return { indexed: 0, errors: [], results: [] };
33
36
  }
34
37
 
@@ -36,8 +39,12 @@ class ChemicalsService {
36
39
  const results = [];
37
40
  const errors = [];
38
41
 
42
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Database connection established`);
43
+
39
44
  for (let i = 0; i < documents.length; i++) {
40
45
  const doc = documents[i];
46
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Processing document ${i}: source_id=${doc.source_id}, chemical_name=${doc.chemical_name}`);
47
+
41
48
  try {
42
49
  const chemical = {
43
50
  sourceId: doc.source_id || doc._id,
@@ -52,6 +59,8 @@ class ChemicalsService {
52
59
  ...(doc.chemical_id && { chemicalId: doc.chemical_id })
53
60
  };
54
61
 
62
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Prepared chemical object: sourceId=${chemical.sourceId}, chemicalName=${chemical.chemicalName}`);
63
+
55
64
  const [result] = await db
56
65
  .insert(schema.chemicals)
57
66
  .values(chemical)
@@ -68,14 +77,19 @@ class ChemicalsService {
68
77
  })
69
78
  .returning();
70
79
 
80
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Document ${i} indexed successfully: ${result?.chemicalId || 'no ID returned'}`);
71
81
  results.push({ index: i, success: true, result });
72
82
  } catch (err) {
83
+ logError('pegasus-sdk', 'bulkIndexFielded', `Error indexing document ${i}`, err);
73
84
  results.push({ index: i, success: false, error: err.message });
74
85
  errors.push({ document: doc, error: err.message });
75
86
  }
76
87
  }
77
88
 
78
- return { indexed: results.filter(r => r.success).length, errors, results };
89
+ const successCount = results.filter(r => r.success).length;
90
+ logInfo('pegasus-sdk', `[bulkIndexFielded] Bulk index complete: ${successCount}/${documents.length} succeeded, ${errors.length} errors`);
91
+
92
+ return { indexed: successCount, errors, results };
79
93
  } catch (error) {
80
94
  logError('pegasus-sdk', 'ChemicalsService', 'bulkIndexFielded', error);
81
95
  throw error;
@@ -628,121 +642,209 @@ class ChemicalsService {
628
642
  }
629
643
  }
630
644
 
631
- registerElasticsearchHandlers(elasticsearchService) {
632
- const indexPatterns = this.connection.config.indexRoutes?.chemicals || ['chemicals*'];
633
-
634
- indexPatterns.forEach(pattern => {
635
- elasticsearchService.registerIndexRoute(pattern, {
636
- index: async (params) => {
637
- const chemical = params.body;
638
- const result = await this.createChemical(chemical);
639
-
640
- return {
641
- _index: params.index,
642
- _id: result.chemicalId,
643
- _version: 1,
644
- result: 'created',
645
- _source: result
646
- };
647
- },
645
+ _buildEsHandlers() {
646
+ return {
647
+ index: async (params) => {
648
+ const chemical = params.body;
649
+ const result = await this.createChemical(chemical);
648
650
 
649
- bulk: async (params) => {
650
- const operations = params.body || params.operations;
651
- const documents = [];
652
-
653
- for (let i = 0; i < operations.length; i += 2) {
654
- const action = operations[i];
655
- const document = operations[i + 1];
656
-
657
- if (action.index || action.create) {
658
- documents.push(document);
659
- }
660
- }
651
+ return {
652
+ _index: params.index,
653
+ _id: result.chemicalId,
654
+ _version: 1,
655
+ result: 'created',
656
+ _source: result
657
+ };
658
+ },
659
+
660
+ bulk: async (params) => {
661
+ const { logInfo, logError } = require('@toxplanet/tphelper/logging');
662
+ const operations = params.body || params.operations;
663
+
664
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Starting bulk operation with ${operations?.length || 0} total operations`);
665
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Params index: ${params.index}`);
666
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Operations array type: ${Array.isArray(operations) ? 'array' : typeof operations}`);
667
+
668
+ const cdiDocuments = [];
669
+ let cdiOpCount = 0;
670
+ let otherOpCount = 0;
671
+
672
+ for (let i = 0; i < operations.length; i++) {
673
+ const op = operations[i];
674
+ const isIndexOp = !!(op.index || op.create);
675
+ const indexName = op.index?._index || op.create?._index || op.delete?._index || op.update?._index;
661
676
 
662
- const result = await this.bulkIndexFielded(documents);
677
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Op[${i}]: action=${Object.keys(op)[0] || 'unknown'}, index=${indexName}`);
663
678
 
664
- const items = result.results.map((res, idx) => {
665
- if (res.success) {
666
- return {
667
- index: {
668
- _index: params.index || 'chemicals',
669
- _id: documents[idx].source_id || documents[idx]._id,
670
- status: 201,
671
- result: 'created'
672
- }
679
+ if ((op.index || op.create) &&
680
+ (op.index?._index === 'chemical_data_index' || op.create?._index === 'chemical_data_index')) {
681
+ const doc = operations[i + 1];
682
+ const sourceId = op.index?._id || op.create?._id;
683
+
684
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Found CDI entry: sourceId=${sourceId}, hasDoc=${!!doc}`);
685
+
686
+ if (doc && sourceId) {
687
+ const cdiDoc = {
688
+ source_id: sourceId,
689
+ chemical_name: doc.chemical_primary_name || (doc.chemical_names && doc.chemical_names[0]) || null,
690
+ chemical_meta: doc.chemical_meta || {},
691
+ chemical_identifiers: doc.chemical_identifiers || {},
692
+ chemical_synonyms: doc.chemical_synonyms || [],
693
+ chemical_categories: doc.chemical_categories || [],
694
+ created_at: doc.chemical_created_at,
695
+ updated_at: doc.chemical_updated_at
673
696
  };
697
+ cdiDocuments.push(cdiDoc);
698
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Extracted CDI doc: ${JSON.stringify({ source_id: cdiDoc.source_id, chemical_name: cdiDoc.chemical_name })}`);
699
+ i++;
700
+ cdiOpCount++;
674
701
  } else {
675
- return {
676
- index: {
677
- _index: params.index || 'chemicals',
678
- _id: documents[idx].source_id || documents[idx]._id,
679
- status: 400,
680
- error: {
681
- type: 'mapper_parsing_exception',
682
- reason: res.error
683
- }
684
- }
685
- };
702
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] CDI entry incomplete: sourceId=${sourceId}, doc=${!!doc}`);
686
703
  }
687
- });
688
-
689
- return {
690
- took: 1,
691
- errors: result.errors.length > 0,
692
- items
693
- };
694
- },
695
-
696
- get: async (params) => {
697
- const result = await this.getChemicalById(params.id);
704
+ } else {
705
+ otherOpCount++;
706
+ }
707
+ }
708
+
709
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Scan complete: ${cdiOpCount} CDI docs found, ${otherOpCount} other operations skipped`);
710
+
711
+ if (cdiDocuments.length === 0) {
712
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] No CDI documents to index, returning empty no-op response`);
713
+ return { took: 0, errors: false, items: [] };
714
+ }
715
+
716
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] Calling bulkIndexFielded with ${cdiDocuments.length} CDI documents`);
717
+
718
+ try {
719
+ const result = await this.bulkIndexFielded(cdiDocuments);
720
+ logInfo('pegasus-sdk', `[ChemicalsService.bulk] bulkIndexFielded returned: indexed=${result.indexed}, errors=${result.errors.length}`);
698
721
 
699
- if (!result) {
700
- return {
701
- _index: params.index,
702
- _id: params.id,
703
- found: false
704
- };
722
+ if (result.errors.length > 0) {
723
+ logError('pegasus-sdk', 'ChemicalsService.bulk', 'Errors during bulk indexing', result.errors);
705
724
  }
706
725
 
707
726
  return {
708
- _index: params.index,
709
- _id: result.chemicalId,
710
- _version: 1,
711
- found: true,
712
- _source: result
727
+ took: 1,
728
+ errors: result.errors.length > 0,
729
+ items: result.results.map((res, idx) => ({
730
+ index: {
731
+ _index: 'chemical_data_index',
732
+ _id: cdiDocuments[idx].source_id,
733
+ status: res.success ? 200 : 400,
734
+ result: res.success ? 'created' : 'error',
735
+ ...(res.success ? {} : { error: { type: 'mapper_parsing_exception', reason: res.error } })
736
+ }
737
+ }))
713
738
  };
714
- },
739
+ } catch (error) {
740
+ logError('pegasus-sdk', 'ChemicalsService.bulk', 'Fatal error during bulk indexing', error);
741
+ throw error;
742
+ }
743
+ },
744
+
745
+ get: async (params) => {
746
+ const result = await this.getChemicalBySourceId(params.id);
715
747
 
716
- update: async (params) => {
717
- const result = await this.updateChemical(params.id, params.body);
718
-
748
+ if (!result) {
719
749
  return {
720
750
  _index: params.index,
721
751
  _id: params.id,
722
- _version: 2,
723
- result: result ? 'updated' : 'noop',
724
- _source: result
752
+ found: false
725
753
  };
726
- },
754
+ }
755
+
756
+ return {
757
+ _index: params.index,
758
+ _id: params.id,
759
+ _version: 1,
760
+ found: true,
761
+ _source: result
762
+ };
763
+ },
764
+
765
+ update: async (params) => {
766
+ const result = await this.updateChemical(params.id, params.body);
767
+
768
+ return {
769
+ _index: params.index,
770
+ _id: params.id,
771
+ _version: 2,
772
+ result: result ? 'updated' : 'noop',
773
+ _source: result
774
+ };
775
+ },
776
+
777
+ delete: async (params) => {
778
+ if (params.index === 'synonym_lookup_index') {
779
+ return { _index: params.index, _id: params.id, result: 'not_found' };
780
+ }
781
+ const result = await this.deleteBySourceId(params.id);
782
+
783
+ return {
784
+ _index: params.index,
785
+ _id: params.id,
786
+ result: result ? 'deleted' : 'not_found'
787
+ };
788
+ },
789
+
790
+ deleteByQuery: async (params) => {
791
+ const sourceId = params.body?.query?.term?.chemical_set_identifier
792
+ || params.body?.query?.term?.source_id;
793
+ if (!sourceId) {
794
+ return { deleted: 0, failures: [] };
795
+ }
796
+ const result = await this.deleteBySourceId(sourceId);
797
+ return {
798
+ deleted: result ? 1 : 0,
799
+ failures: []
800
+ };
801
+ },
802
+
803
+ search: async (params) => {
804
+ let searchTerm = '';
805
+ let limit = params.body?.size || 10;
727
806
 
728
- delete: async (params) => {
729
- const result = await this.deleteChemical(params.id);
807
+ if (params.index === 'synonym_lookup_index') {
808
+ const query = params.body?.query;
809
+ searchTerm = query?.match?.chemical_name ||
810
+ query?.term?.chemical_name ||
811
+ query?.query_string?.query || '';
812
+ const searchResults = await this.searchBySynonym(searchTerm, limit);
730
813
 
731
814
  return {
732
- _index: params.index,
733
- _id: params.id,
734
- _version: 1,
735
- result: result ? 'deleted' : 'not_found'
815
+ took: 1,
816
+ timed_out: false,
817
+ _shards: {
818
+ total: 1,
819
+ successful: 1,
820
+ skipped: 0,
821
+ failed: 0
822
+ },
823
+ hits: {
824
+ total: {
825
+ value: searchResults.results.length,
826
+ relation: 'eq'
827
+ },
828
+ max_score: searchResults.results[0]?.score || 0,
829
+ hits: searchResults.results.map(result => ({
830
+ _index: params.index,
831
+ _id: result.id,
832
+ _score: result.score,
833
+ _source: {
834
+ postgres_id: result.id,
835
+ chemical_name: result.name,
836
+ cas_numbers: result.cas,
837
+ identifier_values: result.identifiers,
838
+ synonyms: result.synonyms
839
+ }
840
+ }))
841
+ }
736
842
  };
737
- },
738
-
739
- search: async (params) => {
843
+ } else {
740
844
  const query = params.body?.query;
741
- const searchTerm = query?.match?.chemical_name ||
742
- query?.term?.chemical_name ||
743
- query?.query_string?.query || '';
744
- const limit = params.body?.size || 10;
745
-
845
+ searchTerm = query?.match?.chemical_name ||
846
+ query?.term?.chemical_name ||
847
+ query?.query_string?.query || '';
746
848
  const searchResults = await this.searchByName(searchTerm, limit);
747
849
 
748
850
  return {
@@ -774,12 +876,25 @@ class ChemicalsService {
774
876
  }))
775
877
  }
776
878
  };
777
- },
778
-
779
- count: async (params) => {
780
- return await this.countAll();
781
879
  }
782
- });
880
+ },
881
+
882
+ count: async (params) => {
883
+ if (params.index === 'synonym_lookup_index') {
884
+ return await this.getTotalSynonymCount();
885
+ }
886
+ return await this.countAll();
887
+ }
888
+ };
889
+ }
890
+
891
+ registerElasticsearchHandlers(elasticsearchService) {
892
+ const configurablePatterns = this.connection.config.indexRoutes?.chemicals || ['chemicals*'];
893
+ const legacyPatterns = ['synonym_lookup_index', 'chemical_data_index', 'chemical_converter_index'];
894
+ const allPatterns = [...new Set([...configurablePatterns, ...legacyPatterns])];
895
+ const handlers = this._buildEsHandlers();
896
+ allPatterns.forEach(pattern => {
897
+ elasticsearchService.registerIndexRoute(pattern, handlers);
783
898
  });
784
899
  }
785
900
  }
package/lib/db/index.js CHANGED
@@ -1,18 +1,18 @@
1
- const { drizzle } = require('drizzle-orm/node-postgres');
2
- const { logInfo } = require('@toxplanet/tphelper/logging');
3
- const schema = require('./schema');
4
-
5
- const logger = {
6
- logQuery(query, params) {
7
- logInfo('pegasus-sdk', `[SQL] ${query}${params?.length ? ` -- params: ${JSON.stringify(params)}` : ''}`);
8
- }
9
- };
10
-
11
- function getDrizzle(pgPool) {
12
- return drizzle(pgPool, { schema, logger });
13
- }
14
-
15
- module.exports = {
16
- getDrizzle,
17
- schema
18
- };
1
+ const { drizzle } = require('drizzle-orm/node-postgres');
2
+ const { logInfo } = require('@toxplanet/tphelper/logging');
3
+ const schema = require('./schema');
4
+
5
+ const logger = {
6
+ logQuery(query, params) {
7
+ logInfo('pegasus-sdk', `[SQL] ${query}${params?.length ? ` -- params: ${JSON.stringify(params)}` : ''}`);
8
+ }
9
+ };
10
+
11
+ function getDrizzle(pgPool) {
12
+ return drizzle(pgPool, { schema, logger });
13
+ }
14
+
15
+ module.exports = {
16
+ getDrizzle,
17
+ schema
18
+ };
@@ -64,22 +64,45 @@ class ElasticsearchService {
64
64
  }
65
65
 
66
66
  async bulk(params) {
67
+ const { logInfo, logError } = require('@toxplanet/tphelper/logging');
67
68
  const operations = params.body || params.operations;
69
+
70
+ logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Starting bulk routing with ${operations?.length || 0} operations`);
71
+ logInfo('pegasus-sdk', `[ElasticsearchService.bulk] params.index=${params.index}`);
72
+
68
73
  if (!Array.isArray(operations) || operations.length === 0) {
69
74
  throw new Error('Bulk operations must be a non-empty array');
70
75
  }
71
76
 
72
- const firstOp = operations[0];
73
- const indexName = firstOp?.index?._index || firstOp?.index?.index || firstOp?.create?._index || firstOp?.create?.index || params.index;
77
+ let indexName = params.index;
78
+ let cdiIndexName = null;
79
+ const foundIndices = [];
74
80
 
75
- if (!indexName) {
76
- throw new Error('Could not determine index from bulk operations');
81
+ for (const op of operations) {
82
+ const name = op?.index?._index || op?.index?.index ||
83
+ op?.create?._index || op?.create?.index ||
84
+ op?.delete?._index || op?.delete?.index ||
85
+ op?.update?._index || op?.update?.index;
86
+ if (!name) continue;
87
+
88
+ if (!foundIndices.includes(name)) foundIndices.push(name);
89
+
90
+ if (name === 'chemical_data_index') { cdiIndexName = name; break; }
91
+ if (!indexName) indexName = name;
77
92
  }
93
+
94
+ logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Found indices in operations: ${foundIndices.join(', ')}`);
95
+ logInfo('pegasus-sdk', `[ElasticsearchService.bulk] cdiIndexName=${cdiIndexName}, fallback indexName=${indexName}`);
96
+
97
+ indexName = cdiIndexName || indexName;
98
+ if (!indexName) throw new Error('Could not determine index from bulk operations');
78
99
 
100
+ logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Routing to index: ${indexName}`);
101
+
79
102
  const handler = this.getRouteHandler(indexName);
80
- if (handler && handler.bulk) {
81
- return await handler.bulk(params);
82
- }
103
+ logInfo('pegasus-sdk', `[ElasticsearchService.bulk] Handler found: ${!!handler}, has bulk method: ${!!(handler && handler.bulk)}`);
104
+
105
+ if (handler && handler.bulk) return await handler.bulk(params);
83
106
  throw new Error(`No handler registered for index: ${indexName}`);
84
107
  }
85
108
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toxplanet/pegasus-sdk",
3
- "version": "1.1.7",
3
+ "version": "1.1.9",
4
4
  "description": "SDK for migrating chemical data to Pegasus PostgreSQL + OpenSearch architecture with Elasticsearch client compatibility",
5
5
  "main": "index.js",
6
6
  "type": "commonjs",