ddi-fw 0.0.151__py3-none-any.whl → 0.0.153__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import glob
2
- from typing import List, Optional, Type
2
+ from typing import Any, Dict, List, Optional, Type
3
3
  import numpy as np
4
4
  import pandas as pd
5
5
  from pydantic import BaseModel, Field, computed_field
@@ -194,8 +194,8 @@ class BaseDataset(BaseModel):
194
194
 
195
195
 
196
196
  class TextDatasetMixin(BaseDataset):
197
- embedding_size: int
198
- embedding_dict: dict
197
+ embedding_size: Optional[int] = None
198
+ embedding_dict: Dict[str, Any] = Field(default_factory=dict, description="Dictionary for embeddings")
199
199
  embeddings_pooling_strategy: PoolingStrategy | None = None
200
200
 
201
201
  def process_text(self):
@@ -30,6 +30,7 @@ LIST_OF_NER_COLUMNS = ['tui', 'cui', 'entities']
30
30
  HERE = pathlib.Path(__file__).resolve().parent
31
31
 
32
32
  class DDIMDLDataset(TextDatasetMixin):
33
+ dataset_name: str = "DDIMDLDataset"
33
34
  index_path: str = Field(default_factory=lambda: str(
34
35
  pathlib.Path(__file__).resolve().parent.joinpath('indexes')))
35
36
  # drugs_df: pd.DataFrame = Field(default_factory=pd.DataFrame)
@@ -35,20 +35,18 @@ class Pipeline(BaseModel):
35
35
  model: Optional[Any] = None
36
36
  multi_modal: Optional[Any] = None
37
37
  use_mlflow: bool = True
38
- _items: List = []
39
- _train_idx_arr: List | None = []
40
- _val_idx_arr: List | None = []
41
-
38
+ _items:List=[]
39
+ _train_idx_arr:List|None=[]
40
+ _val_idx_arr:List|None=[]
41
+
42
42
  @property
43
43
  def items(self) -> List:
44
44
  return self._items
45
-
46
45
  @property
47
- def train_idx_arr(self) -> List | None:
46
+ def train_idx_arr(self) -> List|None:
48
47
  return self._train_idx_arr
49
-
50
48
  @property
51
- def val_idx_arr(self) -> List | None:
49
+ def val_idx_arr(self) -> List|None:
52
50
  return self._val_idx_arr
53
51
 
54
52
  class Config:
@@ -127,15 +125,12 @@ class Pipeline(BaseModel):
127
125
  print(
128
126
  f"Embeddings of {column} are calculated from {vector_db_collection_name}")
129
127
 
130
- # if metadatas == None or embeddings == None:
131
- if 'embeddings' not in dictionary or 'metadatas' not in dictionary or not dictionary['embeddings'] or not dictionary['metadatas']:
132
- raise ValueError(
133
- "The collection does not contain embeddings or metadatas.")
134
-
135
128
  # Populate the embedding dictionary with embeddings from the vector database
136
129
  metadatas = dictionary["metadatas"]
137
130
  embeddings = dictionary["embeddings"]
138
-
131
+ if metadatas is None or embeddings is None:
132
+ raise ValueError(
133
+ "The collection does not contain embeddings or metadatas.")
139
134
  for metadata, embedding in zip(metadatas, embeddings):
140
135
  embedding_dict[metadata["type"]
141
136
  ][metadata["id"]].append(embedding)
@@ -189,8 +184,7 @@ class Pipeline(BaseModel):
189
184
  if issubclass(self.dataset_type, TextDatasetMixin):
190
185
  key, value = next(iter(embedding_dict.items()))
191
186
  embedding_size = value[next(iter(value))][0].shape[0]
192
- pooling_strategy = self.embedding_pooling_strategy_type(
193
- ) if self.embedding_pooling_strategy_type else None
187
+ pooling_strategy = self.embedding_pooling_strategy_type() if self.embedding_pooling_strategy_type else None
194
188
 
195
189
  dataset = self.dataset_type(
196
190
  embedding_dict=embedding_dict,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.151
3
+ Version: 0.0.153
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,12 +1,12 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=yDsRQD_9Ijpm_Rl2wSDwdutG5Q_wca_UBPEvm7nBx04,444
2
- ddi_fw/datasets/core.py,sha256=JA6WJz3VCUfxI85rYE7ZBqC4pnn7L8NSS9-EgjLw710,7968
2
+ ddi_fw/datasets/core.py,sha256=vRMpUsIHIbOKi-6TuUPNu1Ve3ny3cS9hdsydQxFCNvE,8078
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=lLIelXv-8rCK0tbwLNgHBHYUO_65HT-_kErAlZhRQVE,1662
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/embedding_generator.py,sha256=jiDKwLaPMaQkloxQkuCrhl-A-2OdvocmkSzjWtUnk4g,2255
6
6
  ddi_fw/datasets/feature_vector_generation.py,sha256=gvjpEzkgVV8dp4V8NMMv59u0v-1tNAJ7v83R-keWGoA,4748
7
7
  ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
8
8
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
9
- ddi_fw/datasets/ddi_mdl/base.py,sha256=usw3AhBCjdYwZx9MMnyNaUYTEyYXoRSO4fNJJHxnPuk,9312
9
+ ddi_fw/datasets/ddi_mdl/base.py,sha256=sj4WfwNmcTFznVxlND6FYoyqnL7VUrjn0TG24LDYk3w,9353
10
10
  ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
11
11
  ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
12
12
  ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
@@ -88,7 +88,7 @@ ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,2
88
88
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
89
89
  ddi_fw/pipeline/multi_pipeline.py,sha256=D_BZ3ciHbVGuuB7m7cEmVQHESruh1gqhA-vxCMfNKj0,5407
90
90
  ddi_fw/pipeline/ner_pipeline.py,sha256=q1aKjb54Ra1HzZ7dARvBw6lB37je9R-POEf2h6QT_nU,6018
91
- ddi_fw/pipeline/pipeline.py,sha256=NPew1lESAiuXUKR4Ob9R4LwRh2Xe1qfnqZDfmuMuC7k,11253
91
+ ddi_fw/pipeline/pipeline.py,sha256=l-T-QnR_cVD590UovmsUlfjivaaZih7j_KMIJvGDAtA,11073
92
92
  ddi_fw/utils/__init__.py,sha256=77563ikqAtdzjjgRlLp5OAsJBbpLA1Cao8iecGaVUXQ,354
93
93
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
94
94
  ddi_fw/utils/json_helper.py,sha256=BVU6wmJgdXPxyqLPu3Ck_9Es5RrP1PDanKvE-OSj1D4,571
@@ -97,7 +97,7 @@ ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5
97
97
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
98
98
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
99
99
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
100
- ddi_fw-0.0.151.dist-info/METADATA,sha256=cTz-LpUrPhCU0uKQ2A9oE3lm5uaI3ra3nFHufSoi8hA,2082
101
- ddi_fw-0.0.151.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
102
- ddi_fw-0.0.151.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
103
- ddi_fw-0.0.151.dist-info/RECORD,,
100
+ ddi_fw-0.0.153.dist-info/METADATA,sha256=Lj5ZaKAa6dL9CvHiGIo1KAUtbB8MA40dVlbpHNUcaHA,2082
101
+ ddi_fw-0.0.153.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
102
+ ddi_fw-0.0.153.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
103
+ ddi_fw-0.0.153.dist-info/RECORD,,