ddi-fw 0.0.153__py3-none-any.whl → 0.0.155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,11 +14,11 @@ class DatasetSplitter(BaseModel):
14
14
  class Config:
15
15
  arbitrary_types_allowed = True
16
16
 
17
- def split(self, X: pd.DataFrame, y: pd.Series)-> Tuple[
18
- pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.Index, pd.Index, List[np.ndarray], List[np.ndarray]]:
17
+ def split(self, X: pd.DataFrame, y: pd.Series) -> Tuple[
18
+ pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.Index, pd.Index, List[np.ndarray], List[np.ndarray]]:
19
19
  print(
20
20
  f"Splitting dataset into {self.fold_size} folds with shuffle={self.shuffle}...")
21
- #TODO check it
21
+ # TODO check it
22
22
  if len(y.shape) == 1:
23
23
  y = pd.Series(np.expand_dims(y.to_numpy(), axis=1).flatten())
24
24
  stacked = np.vstack(tuple(y.to_numpy()))
@@ -15,6 +15,9 @@ from abc import ABC, abstractmethod
15
15
  from sklearn.preprocessing import LabelBinarizer
16
16
 
17
17
  from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
18
+ import logging
19
+
20
+ logger = logging.getLogger(__name__)
18
21
 
19
22
  # Constants for embedding, chemical properties, and NER columns
20
23
  LIST_OF_EMBEDDING_COLUMNS = [
@@ -57,6 +60,8 @@ class DDIMDLDataset(TextDatasetMixin):
57
60
  return values
58
61
 
59
62
  def __init__(self, **kwargs):
63
+ logger.info(f'{self.dataset_name} is being initialized')
64
+
60
65
  super().__init__(**kwargs)
61
66
  self.class_column = 'event_category'
62
67
  _db_path = HERE.joinpath('data/event.db')
@@ -67,6 +72,7 @@ class DDIMDLDataset(TextDatasetMixin):
67
72
  # TODO with resource
68
73
  self._conn = create_connection(_db_path.absolute().as_posix())
69
74
  self.load_drugs_and_events()
75
+ logger.info(f'{self.dataset_name} is initialized')
70
76
 
71
77
  def load_drugs_and_events(self):
72
78
  self.drugs_df = self.__select_all_drugs_as_dataframe__()
@@ -68,6 +68,7 @@ class MultiPipeline():
68
68
  # Dynamically import the model and dataset classes
69
69
  # model_type = get_import(config.get("model_type"))
70
70
  dataset_type = get_import(config.get("dataset_type"))
71
+ dataset_splitter_type = get_import(config.get("dataset_splitter_type"))
71
72
 
72
73
  combination_type = None
73
74
  kwargs_combination_params=None
@@ -89,6 +90,7 @@ class MultiPipeline():
89
90
  artifact_location=artifact_location,
90
91
  tracking_uri=tracking_uri,
91
92
  dataset_type=dataset_type,
93
+ dataset_splitter_type=dataset_splitter_type,
92
94
  columns=columns,
93
95
  column_embedding_configs=column_embedding_configs,
94
96
  vector_db_persist_directory=vector_db_persist_directory,
@@ -1,4 +1,5 @@
1
1
  from typing import Any, Dict, List, Optional, Type, Union
2
+ from ddi_fw.datasets.dataset_splitter import DatasetSplitter
2
3
  import numpy as np
3
4
  import pandas as pd
4
5
  import chromadb
@@ -23,6 +24,7 @@ class Pipeline(BaseModel):
23
24
  artifact_location: Optional[str] = None
24
25
  tracking_uri: Optional[str] = None
25
26
  dataset_type: Type[BaseDataset]
27
+ dataset_splitter_type: Type[DatasetSplitter] = DatasetSplitter
26
28
  columns: Optional[List[str]] = None
27
29
  embedding_dict: Optional[Dict[str, Any]] = None
28
30
  column_embedding_configs: Optional[Dict] = None
@@ -181,6 +183,8 @@ class Pipeline(BaseModel):
181
183
  # self.ner_df = CTakesNER(df=None).load(
182
184
  # filename=self.ner_data_file) if self.ner_data_file else None
183
185
 
186
+ dataset_splitter = self.dataset_splitter_type()
187
+
184
188
  if issubclass(self.dataset_type, TextDatasetMixin):
185
189
  key, value = next(iter(embedding_dict.items()))
186
190
  embedding_size = value[next(iter(value))][0].shape[0]
@@ -190,6 +194,7 @@ class Pipeline(BaseModel):
190
194
  embedding_dict=embedding_dict,
191
195
  embedding_size=embedding_size,
192
196
  embeddings_pooling_strategy=pooling_strategy,
197
+ dataset_splitter = dataset_splitter,
193
198
  **kwargs)
194
199
  else:
195
200
  dataset = self.dataset_type(**kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.153
3
+ Version: 0.0.155
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,12 +1,12 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=yDsRQD_9Ijpm_Rl2wSDwdutG5Q_wca_UBPEvm7nBx04,444
2
2
  ddi_fw/datasets/core.py,sha256=vRMpUsIHIbOKi-6TuUPNu1Ve3ny3cS9hdsydQxFCNvE,8078
3
- ddi_fw/datasets/dataset_splitter.py,sha256=lLIelXv-8rCK0tbwLNgHBHYUO_65HT-_kErAlZhRQVE,1662
3
+ ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/embedding_generator.py,sha256=jiDKwLaPMaQkloxQkuCrhl-A-2OdvocmkSzjWtUnk4g,2255
6
6
  ddi_fw/datasets/feature_vector_generation.py,sha256=gvjpEzkgVV8dp4V8NMMv59u0v-1tNAJ7v83R-keWGoA,4748
7
7
  ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
8
8
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
9
- ddi_fw/datasets/ddi_mdl/base.py,sha256=sj4WfwNmcTFznVxlND6FYoyqnL7VUrjn0TG24LDYk3w,9353
9
+ ddi_fw/datasets/ddi_mdl/base.py,sha256=hk2DC4Xj-7vYu6m1qTYuBS0s3KP7jBrhAjQJofHP0WU,9537
10
10
  ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
11
11
  ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
12
12
  ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
@@ -86,9 +86,9 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
86
86
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
87
87
  ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
88
88
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
89
- ddi_fw/pipeline/multi_pipeline.py,sha256=D_BZ3ciHbVGuuB7m7cEmVQHESruh1gqhA-vxCMfNKj0,5407
89
+ ddi_fw/pipeline/multi_pipeline.py,sha256=NfcH4Ze5U-JRiH3lrxEDWj-VPxYQYtp7tq6bLCImBzs,5550
90
90
  ddi_fw/pipeline/ner_pipeline.py,sha256=q1aKjb54Ra1HzZ7dARvBw6lB37je9R-POEf2h6QT_nU,6018
91
- ddi_fw/pipeline/pipeline.py,sha256=l-T-QnR_cVD590UovmsUlfjivaaZih7j_KMIJvGDAtA,11073
91
+ ddi_fw/pipeline/pipeline.py,sha256=70lYsluAnTWDLTlf6rbecffw3Bl34L1_6ALfLUoSvtY,11324
92
92
  ddi_fw/utils/__init__.py,sha256=77563ikqAtdzjjgRlLp5OAsJBbpLA1Cao8iecGaVUXQ,354
93
93
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
94
94
  ddi_fw/utils/json_helper.py,sha256=BVU6wmJgdXPxyqLPu3Ck_9Es5RrP1PDanKvE-OSj1D4,571
@@ -97,7 +97,7 @@ ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5
97
97
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
98
98
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
99
99
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
100
- ddi_fw-0.0.153.dist-info/METADATA,sha256=Lj5ZaKAa6dL9CvHiGIo1KAUtbB8MA40dVlbpHNUcaHA,2082
101
- ddi_fw-0.0.153.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
102
- ddi_fw-0.0.153.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
103
- ddi_fw-0.0.153.dist-info/RECORD,,
100
+ ddi_fw-0.0.155.dist-info/METADATA,sha256=Eko-GWtTpFIvR_X4flADahxqTxPXYQ5Ei2TS8XuTEto,2082
101
+ ddi_fw-0.0.155.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
102
+ ddi_fw-0.0.155.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
103
+ ddi_fw-0.0.155.dist-info/RECORD,,