ddi-fw 0.0.217__tar.gz → 0.0.218__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/core.py +1 -0
  4. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/base.py +22 -7
  5. ddi_fw-0.0.218/src/ddi_fw/datasets/mdf_sa_ddi/base.py +260 -0
  6. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ml/__init__.py +2 -1
  7. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ml/ml_helper.py +26 -30
  8. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ml/model_wrapper.py +0 -1
  9. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ml/tensorflow_wrapper.py +165 -89
  10. ddi_fw-0.0.218/src/ddi_fw/ml/tracking_service.py +194 -0
  11. ddi_fw-0.0.217/src/ddi_fw/pipeline/multi_pipeline_v2.py → ddi_fw-0.0.218/src/ddi_fw/pipeline/multi_pipeline.py +8 -11
  12. ddi_fw-0.0.218/src/ddi_fw/pipeline/pipeline.py +148 -0
  13. ddi_fw-0.0.218/src/ddi_fw/utils/utils.py +117 -0
  14. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  15. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw.egg-info/SOURCES.txt +2 -1
  16. ddi_fw-0.0.217/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -164
  17. ddi_fw-0.0.217/src/ddi_fw/pipeline/pipeline.py +0 -206
  18. ddi_fw-0.0.217/src/ddi_fw/utils/utils.py +0 -117
  19. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/README.md +0 -0
  20. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/setup.cfg +0 -0
  21. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/__init__.py +0 -0
  22. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
  23. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/db_utils.py +0 -0
  24. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  25. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/debug.log +0 -0
  26. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  27. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  28. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  29. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  30. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  31. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  32. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  33. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  34. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  35. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  36. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  37. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  38. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  39. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  40. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  41. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  42. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  43. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  44. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  45. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  46. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  47. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  48. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  49. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  50. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  51. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/base.py +0 -0
  52. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/data/event.db +0 -0
  53. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/test_indexes.txt +0 -0
  54. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_0.txt +0 -0
  55. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_1.txt +0 -0
  56. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_2.txt +0 -0
  57. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_3.txt +0 -0
  58. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_4.txt +0 -0
  59. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt +0 -0
  60. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt +0 -0
  61. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt +0 -0
  62. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt +0 -0
  63. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt +0 -0
  64. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt +0 -0
  65. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  66. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  67. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  68. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  69. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  70. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  71. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  72. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  73. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  74. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  75. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  76. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  77. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  78. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  79. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  80. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  81. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/datasets/setup_._py +0 -0
  82. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/drugbank/__init__.py +0 -0
  83. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  84. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  85. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  86. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  87. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  88. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/langchain/__init__.py +0 -0
  89. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/langchain/embeddings.py +0 -0
  90. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  91. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/langchain/storage.py +0 -0
  92. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  93. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  94. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ner/__init__.py +0 -0
  95. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  96. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/ner/ner.py +0 -0
  97. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/pipeline/__init__.py +0 -0
  98. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  99. /ddi_fw-0.0.217/src/ddi_fw/pipeline/multi_pipeline.py → /ddi_fw-0.0.218/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
  100. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  101. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/__init__.py +0 -0
  102. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
  103. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/enums.py +0 -0
  104. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/json_helper.py +0 -0
  105. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/kaggle.py +0 -0
  106. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/numpy_utils.py +0 -0
  107. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/package_helper.py +0 -0
  108. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  109. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/utils/zip_helper.py +0 -0
  110. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/vectorization/__init__.py +0 -0
  111. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
  112. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw/vectorization/idf_helper.py +0 -0
  113. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  114. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw.egg-info/requires.txt +0 -0
  115. {ddi_fw-0.0.217 → ddi_fw-0.0.218}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.217
3
+ Version: 0.0.218
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "ddi_fw"
9
- version = "0.0.217"
9
+ version = "0.0.218"
10
10
  description = "Do not use :)"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -73,6 +73,7 @@ class BaseDataset(BaseModel, abc.ABC):
73
73
  train_idx_arr: Optional[List[np.ndarray]] = None
74
74
  val_idx_arr: Optional[List[np.ndarray]] = None
75
75
  columns: List[str] = []
76
+ additional_config: Optional[Dict[str, Any]] = None
76
77
 
77
78
  class Config:
78
79
  arbitrary_types_allowed = True
@@ -9,6 +9,8 @@ from abc import ABC, abstractmethod
9
9
  from sklearn.preprocessing import LabelBinarizer
10
10
  import logging
11
11
 
12
+ from ddi_fw.ner.ner import CTakesNER
13
+
12
14
 
13
15
  try:
14
16
  from ddi_fw.vectorization import IDF
@@ -63,6 +65,18 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
63
65
 
64
66
  super().__init__(**kwargs)
65
67
 
68
+ # self.additional_config = kwargs.get('dataset_additional_config', {})
69
+ if self.additional_config:
70
+ ner = self.additional_config.get('ner', {})
71
+ self.ner_data_file = ner.get('data_file', None)
72
+ self.ner_threshold = ner.get('thresholds', None)
73
+ # if self.ner_threshold:
74
+ # for k, v in self.ner_threshold.items():
75
+ # kwargs[k] = v
76
+
77
+ self.ner_df = CTakesNER(df=None).load(
78
+ filename=self.ner_data_file) if self.ner_data_file else None
79
+
66
80
  columns = kwargs['columns']
67
81
  if columns:
68
82
  chemical_property_columns = []
@@ -155,13 +169,14 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
155
169
 
156
170
  # for key in filtered_ner_df.keys():
157
171
  for key in self.ner_columns:
158
- threshold = 0
159
- if key.startswith('tui'):
160
- threshold = self.tui_threshold
161
- if key.startswith('cui'):
162
- threshold = self.cui_threshold
163
- if key.startswith('entities'):
164
- threshold = self.entities_threshold
172
+ threshold = self.ner_threshold.get(key, 0)
173
+ # threshold = 0
174
+ # if key.startswith('tui'):
175
+ # threshold = self.tui_threshold
176
+ # if key.startswith('cui'):
177
+ # threshold = self.cui_threshold
178
+ # if key.startswith('entities'):
179
+ # threshold = self.entities_threshold
165
180
  combined_df[key] = filtered_ner_df[key]
166
181
  valid_codes = idf_scores_df[idf_scores_df[key]
167
182
  > threshold].index
@@ -0,0 +1,260 @@
1
+ import os
2
+ import pathlib
3
+ from typing import List, Optional, Tuple
4
+ from ddi_fw.datasets.core import BaseDataset, TextDatasetMixin, generate_sim_matrices_new, generate_vectors
5
+ from ddi_fw.datasets.db_utils import create_connection
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pydantic import BaseModel, Field, model_validator, root_validator
9
+ from abc import ABC, abstractmethod
10
+ from sklearn.preprocessing import LabelBinarizer
11
+ import logging
12
+
13
+ from ddi_fw.ner.ner import CTakesNER
14
+ from ddi_fw.utils.zip_helper import ZipHelper
15
+
16
+
17
+ try:
18
+ from ddi_fw.vectorization import IDF
19
+ except ImportError:
20
+ raise ImportError(
21
+ "Failed to import vectorization module. Ensure that the module exists and is correctly installed. ")
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Constants for embedding, chemical properties, and NER columns
26
+ LIST_OF_EMBEDDING_COLUMNS = [
27
+ 'all_text', 'description', 'synthesis_reference', 'indication',
28
+ 'pharmacodynamics', 'mechanism_of_action', 'toxicity', 'metabolism',
29
+ 'absorption', 'half_life', 'protein_binding', 'route_of_elimination',
30
+ 'volume_of_distribution', 'clearance'
31
+ ]
32
+
33
+ LIST_OF_CHEMICAL_PROPERTY_COLUMNS = ['enzyme', 'target', 'smile']
34
+ LIST_OF_NER_COLUMNS = ['tui', 'cui', 'entities']
35
+
36
+ HERE = pathlib.Path(__file__).resolve().parent
37
+
38
+ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
39
+ # def __init__(self, embedding_size,
40
+ # embedding_dict,
41
+ # embeddings_pooling_strategy: PoolingStrategy,
42
+ # ner_df,
43
+ # chemical_property_columns=['enzyme',
44
+ # 'target',
45
+ # 'smile'],
46
+ # embedding_columns=[],
47
+ # ner_columns=[],
48
+ # **kwargs):
49
+
50
+ # columns = kwargs['columns']
51
+ # if columns:
52
+ # chemical_property_columns = []
53
+ # embedding_columns=[]
54
+ # ner_columns=[]
55
+ # for column in columns:
56
+ # if column in list_of_chemical_property_columns:
57
+ # chemical_property_columns.append(column)
58
+ # elif column in list_of_embedding_columns:
59
+ # embedding_columns.append(column)
60
+ # elif column in list_of_ner_columns:
61
+ # ner_columns.append(column)
62
+ # # elif column == 'smile_2':
63
+ # # continue
64
+ # else:
65
+ # raise Exception(f"{column} is not related this dataset")
66
+
67
+
68
+ # super().__init__(embedding_size=embedding_size,
69
+ # embedding_dict=embedding_dict,
70
+ # embeddings_pooling_strategy=embeddings_pooling_strategy,
71
+ # ner_df=ner_df,
72
+ # chemical_property_columns=chemical_property_columns,
73
+ # embedding_columns=embedding_columns,
74
+ # ner_columns=ner_columns,
75
+ # **kwargs)
76
+
77
+ # db_zip_path = HERE.joinpath('mdf-sa-ddi.zip')
78
+ # db_path = HERE.joinpath('mdf-sa-ddi.db')
79
+ # if not os.path.exists(db_zip_path):
80
+ # self.__to_db__(db_path)
81
+ # else:
82
+ # ZipHelper().extract(
83
+ # input_path=str(HERE), output_path=str(HERE))
84
+ # conn = create_connection(db_path)
85
+ # self.drugs_df = select_all_drugs_as_dataframe(conn)
86
+ # self.ddis_df = select_all_events_as_dataframe(conn)
87
+ # # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
88
+ # kwargs['index_path'] = str(HERE.joinpath('indexes'))
89
+
90
+ # self.index_path = kwargs.get('index_path')
91
+
92
+ dataset_name: str = "MDFSADDIDataset"
93
+ index_path: str = Field(default_factory=lambda: str(
94
+ pathlib.Path(__file__).resolve().parent.joinpath('indexes')))
95
+ # drugs_df: pd.DataFrame = Field(default_factory=pd.DataFrame)
96
+ # ddis_df: pd.DataFrame = Field(default_factory=pd.DataFrame)
97
+ drugs_df: Optional[pd.DataFrame] = None
98
+ ddis_df: Optional[pd.DataFrame] = None
99
+
100
+ chemical_property_columns: list[str] = Field(
101
+ default_factory=lambda: LIST_OF_CHEMICAL_PROPERTY_COLUMNS)
102
+ embedding_columns: list[str] = Field(default_factory=list)
103
+ ner_columns: list[str] = Field(default_factory=list)
104
+ ner_df: pd.DataFrame | None = None
105
+ tui_threshold: float | None = None
106
+ cui_threshold: float | None = None
107
+ entities_threshold: float | None = None
108
+
109
+ # @model_validator
110
+
111
+ def validate_columns(self, values):
112
+ if not set(values['chemical_property_columns']).issubset(LIST_OF_CHEMICAL_PROPERTY_COLUMNS):
113
+ raise ValueError("Invalid chemical property columns")
114
+ if not set(values['ner_columns']).issubset(LIST_OF_NER_COLUMNS):
115
+ raise ValueError("Invalid NER columns")
116
+ return values
117
+
118
+ def __init__(self, **kwargs):
119
+
120
+ super().__init__(**kwargs)
121
+
122
+ # self.additional_config = kwargs.get('dataset_additional_config', {})
123
+ if self.additional_config:
124
+ ner = self.additional_config.get('ner', {})
125
+ self.ner_data_file = ner.get('data_file', None)
126
+ self.ner_threshold = ner.get('thresholds', None)
127
+ # if self.ner_threshold:
128
+ # for k, v in self.ner_threshold.items():
129
+ # kwargs[k] = v
130
+
131
+ self.ner_df = CTakesNER(df=None).load(
132
+ filename=self.ner_data_file) if self.ner_data_file else None
133
+
134
+ columns = kwargs['columns']
135
+ if columns:
136
+ chemical_property_columns = []
137
+ embedding_columns = []
138
+ ner_columns = []
139
+ for column in columns:
140
+ if column in LIST_OF_CHEMICAL_PROPERTY_COLUMNS:
141
+ chemical_property_columns.append(column)
142
+ elif column in LIST_OF_EMBEDDING_COLUMNS:
143
+ embedding_columns.append(column)
144
+ elif column in LIST_OF_NER_COLUMNS:
145
+ ner_columns.append(column)
146
+ else:
147
+ raise Exception(f"{column} is not related this dataset")
148
+
149
+ self.chemical_property_columns = chemical_property_columns
150
+ self.embedding_columns = embedding_columns
151
+ self.ner_columns = ner_columns
152
+ self.columns = [] # these variable is modified in prep method
153
+
154
+
155
+ db_zip_path = HERE.joinpath('mdf-sa-ddi.zip')
156
+ db_path = HERE.joinpath('mdf-sa-ddi.db')
157
+ if not os.path.exists(db_zip_path):
158
+ self.__to_db__(db_path)
159
+ else:
160
+ ZipHelper().extract(
161
+ input_path=str(HERE), output_path=str(HERE))
162
+ conn = create_connection(db_path.absolute().as_posix())
163
+ self.drugs_df = select_all_drugs_as_dataframe(conn)
164
+ self.ddis_df = select_all_events_as_dataframe(conn)
165
+ # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
166
+
167
+
168
+ self.class_column = 'event_category'
169
+
170
+ self.__similarity_related_columns__ = []
171
+ self.__similarity_related_columns__.extend(
172
+ self.chemical_property_columns)
173
+ self.__similarity_related_columns__.extend(self.ner_columns)
174
+ logger.info(f'{self.dataset_name} is initialized')
175
+
176
+ def __to_db__(self, db_path):
177
+ conn = create_connection(db_path)
178
+ drugs_path = HERE.joinpath('drug_information_del_noDDIxiaoyu50.csv')
179
+ ddis_path = HERE.joinpath('df_extraction_cleanxiaoyu50.csv')
180
+ self.drugs_df = pd.read_csv(drugs_path)
181
+ self.ddis_df = pd.read_csv(ddis_path)
182
+ self.drugs_df.drop(columns="Unnamed: 0", inplace=True)
183
+ self.ddis_df.drop(columns="Unnamed: 0", inplace=True)
184
+
185
+ self.ddis_df.rename(
186
+ columns={"drugA": "name1", "drugB": "name2"}, inplace=True)
187
+ self.ddis_df['event_category'] = self.ddis_df['mechanism'] + \
188
+ ' ' + self.ddis_df['action']
189
+
190
+ reverse_ddis_df = pd.DataFrame()
191
+ reverse_ddis_df['id1'] = self.ddis_df['id2']
192
+ reverse_ddis_df['name1'] = self.ddis_df['name2']
193
+ reverse_ddis_df['id2'] = self.ddis_df['id1']
194
+ reverse_ddis_df['name2'] = self.ddis_df['name1']
195
+ reverse_ddis_df['event_category'] = self.ddis_df['event_category']
196
+
197
+ self.ddis_df = pd.concat(
198
+ [self.ddis_df, reverse_ddis_df], ignore_index=True)
199
+
200
+ drug_name_id_pairs = {}
201
+ for idx, row in self.drugs_df.iterrows():
202
+ drug_name_id_pairs[row['name']] = row['id']
203
+
204
+ # id1,id2
205
+
206
+ def lambda_fnc1(column):
207
+ return drug_name_id_pairs[column]
208
+ # def lambda_fnc2(row):
209
+ # x = self.drugs_df[self.drugs_df['name'] == row['name2']]
210
+ # return x['id']
211
+
212
+ self.ddis_df['id1'] = self.ddis_df['name1'].apply(
213
+ lambda_fnc1) # , axis=1
214
+ self.ddis_df['id2'] = self.ddis_df['name2'].apply(
215
+ lambda_fnc1) # , axis=1
216
+ if conn:
217
+ self.drugs_df.to_sql('drug', conn, if_exists='replace', index=False)
218
+ self.ddis_df.to_sql('event', conn, if_exists='replace', index=False)
219
+ ZipHelper().zip_single_file(
220
+ file_path=db_path, output_path=HERE, zip_name='mdf-sa-ddi')
221
+
222
+
223
+ def select_all_drugs(conn):
224
+ cur = conn.cursor()
225
+ cur.execute(
226
+ '''select "index", id, name, target, enzyme, smile from drug''')
227
+ rows = cur.fetchall()
228
+ return rows
229
+
230
+
231
+ def select_all_drugs_as_dataframe(conn):
232
+ headers = ['index', 'id', 'name', 'target', 'enzyme', 'smile']
233
+ rows = select_all_drugs(conn)
234
+ df = pd.DataFrame(columns=headers, data=rows)
235
+ df['enzyme'] = df['enzyme'].apply(lambda x: x.split('|'))
236
+ df['target'] = df['target'].apply(lambda x: x.split('|'))
237
+ df['smile'] = df['smile'].apply(lambda x: x.split('|'))
238
+ return df
239
+
240
+
241
+ def select_all_events(conn):
242
+ """
243
+ Query all rows in the event table
244
+ :param conn: the Connection object
245
+ :return:
246
+ """
247
+ cur = conn.cursor()
248
+ cur.execute('''
249
+ select event."index", id1, name1, id2, name2, mechanism, action, event_category from event
250
+ ''')
251
+
252
+ rows = cur.fetchall()
253
+ return rows
254
+
255
+
256
+ def select_all_events_as_dataframe(conn):
257
+ headers = ["index", "id1", "name1", "id2",
258
+ "name2", "mechanism", "action", "event_category"]
259
+ rows = select_all_events(conn)
260
+ return pd.DataFrame(columns=headers, data=rows)
@@ -2,4 +2,5 @@ from .ml_helper import MultiModalRunner
2
2
  from .model_wrapper import ModelWrapper,Result
3
3
  from .tensorflow_wrapper import TFModelWrapper
4
4
  from .pytorch_wrapper import PTModelWrapper
5
- from .evaluation_helper import evaluate
5
+ from .evaluation_helper import evaluate
6
+ from .tracking_service import TrackingService
@@ -1,23 +1,9 @@
1
- from typing import Callable, Dict, List, Tuple
2
- from matplotlib import pyplot as plt
3
1
  from ddi_fw.ml.model_wrapper import Result
4
2
  from ddi_fw.ml.pytorch_wrapper import PTModelWrapper
5
3
  from ddi_fw.ml.tensorflow_wrapper import TFModelWrapper
6
4
  from ddi_fw.utils.package_helper import get_import
7
- import tensorflow as tf
8
- from tensorflow.python import keras
9
- from tensorflow.python.keras import Model, Sequential
10
- from tensorflow.python.keras.layers import Dense, Dropout, Input, Activation
11
- from tensorflow.python.keras.callbacks import EarlyStopping
12
- from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
13
5
  import numpy as np
14
-
15
- import mlflow
16
- from mlflow.utils.autologging_utils import batch_metrics_logger
17
- import time
18
-
19
- from mlflow.models import infer_signature
20
- from ddi_fw.ml.evaluation_helper import Metrics, evaluate
6
+ from ddi_fw.ml.evaluation_helper import evaluate
21
7
 
22
8
  # import tf2onnx
23
9
  # import onnx
@@ -32,16 +18,16 @@ import ddi_fw.utils as utils
32
18
 
33
19
  class MultiModalRunner:
34
20
  # todo model related parameters to config
35
- def __init__(self, library, multi_modal, default_model, use_mlflow=False):
21
+ def __init__(self, library, multi_modal, default_model, tracking_service):
36
22
  self.library = library
37
23
  self.multi_modal = multi_modal
38
24
  self.default_model = default_model
39
- self.use_mlflow = use_mlflow
25
+ self.tracking_service = tracking_service
40
26
  self.result = Result()
41
27
 
42
- def _mlflow_(self, func: Callable):
43
- if self.use_mlflow:
44
- func()
28
+ # def _mlflow_(self, func: Callable):
29
+ # if self.use_mlflow:
30
+ # func()
45
31
 
46
32
  def set_data(self, items, train_idx_arr, val_idx_arr, y_test_label):
47
33
  self.items = items
@@ -74,7 +60,7 @@ class MultiModalRunner:
74
60
  kwargs = m.get('params')
75
61
  T = self.__create_model(self.library)
76
62
  single_modal = T(self.date, name, model_type,
77
- use_mlflow=self.use_mlflow, **kwargs)
63
+ tracking_service=self.tracking_service, **kwargs)
78
64
 
79
65
  if input is not None and inputs is not None:
80
66
  raise Exception("input and inputs should not be used together")
@@ -110,7 +96,7 @@ class MultiModalRunner:
110
96
  name = item[0]
111
97
  T = self.__create_model(self.library)
112
98
  single_modal = T(self.date, name, model_type,
113
- use_mlflow=self.use_mlflow, **kwargs)
99
+ tracking_service=self.tracking_service, **kwargs)
114
100
  single_modal.set_data(
115
101
  self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
116
102
 
@@ -130,9 +116,12 @@ class MultiModalRunner:
130
116
  combinations = []
131
117
  for i in range(2, len(l) + 1):
132
118
  combinations.extend(list(itertools.combinations(l, i))) # all
133
- if self.use_mlflow:
134
- with mlflow.start_run(run_name=self.prefix, description="***") as run:
135
- self.__predict(single_results)
119
+
120
+ def _f():
121
+ self.__predict(single_results)
122
+
123
+ if self.tracking_service:
124
+ self.tracking_service.run(run_name=self.prefix, description="***", func = _f , nested_run=False)
136
125
  else:
137
126
  self.__predict(single_results)
138
127
  if combinations:
@@ -143,10 +132,17 @@ class MultiModalRunner:
143
132
  def evaluate_combinations(self, single_results, combinations):
144
133
  for combination in combinations:
145
134
  combination_descriptor = '-'.join(combination)
146
- if self.use_mlflow:
147
- with mlflow.start_run(run_name=combination_descriptor, description="***", nested=True) as combination_run:
135
+ if self.tracking_service:
136
+ def evaluate_combination(artifact_uri=None):
148
137
  self.__evaluate_combinations(
149
- single_results, combination, combination_descriptor, combination_run.info.artifact_uri)
138
+ single_results, combination, combination_descriptor, artifact_uri
139
+ )
140
+
141
+ self.tracking_service.run(run_name=combination_descriptor, description="***", nested_run=True, func=evaluate_combination)
142
+
143
+ # with mlflow.start_run(run_name=combination_descriptor, description="***", nested=True) as combination_run:
144
+ # self.__evaluate_combinations(
145
+ # single_results, combination, combination_descriptor, combination_run.info.artifact_uri)
150
146
  else:
151
147
  self.__evaluate_combinations(
152
148
  single_results, combination, combination_descriptor, None)
@@ -159,8 +155,8 @@ class MultiModalRunner:
159
155
  prediction = utils.to_one_hot_encode(prediction)
160
156
  logs, metrics = evaluate(
161
157
  actual=self.y_test_label, pred=prediction, info=combination_descriptor)
162
- if self.use_mlflow:
163
- mlflow.log_metrics(logs)
158
+ if self.tracking_service:
159
+ self.tracking_service.log_metrics(logs)
164
160
  metrics.format_float()
165
161
  # TODO path bulunamadı hatası aldık
166
162
  if artifact_uri:
@@ -29,7 +29,6 @@ class ModelWrapper:
29
29
  self.train_label = train_label
30
30
  self.test_data = test_data
31
31
  self.test_label = test_label
32
- # https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
33
32
 
34
33
  def predict(self)-> Any:
35
34
  pass