ddi-fw 0.0.195__py3-none-any.whl → 0.0.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -135,7 +135,7 @@ class BaseDataset(BaseModel):
135
135
  skip deriving them. Otherwise, derive them from the dataframe and indices.
136
136
  """
137
137
  self.prep()
138
-
138
+
139
139
  if isinstance(self, TextDatasetMixin):
140
140
  self.process_text()
141
141
 
@@ -258,7 +258,7 @@ class BaseDataset(BaseModel):
258
258
  # return X_train, X_test, y_train, y_test, folds
259
259
 
260
260
 
261
- class TextDatasetMixin(BaseDataset):
261
+ class TextDatasetMixin(BaseModel):
262
262
  embedding_size: Optional[int] = None
263
263
  embedding_dict: Dict[str, Any] | None = Field(
264
264
  default_factory=dict, description="Dictionary for embeddings")
@@ -267,6 +267,9 @@ class TextDatasetMixin(BaseDataset):
267
267
  vector_db_persist_directory: Optional[str] = None
268
268
  vector_db_collection_name: Optional[str] = None
269
269
 
270
+ class Config:
271
+ arbitrary_types_allowed = True
272
+
270
273
  def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
271
274
  """
272
275
  Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
@@ -314,14 +317,13 @@ class TextDatasetMixin(BaseDataset):
314
317
  else:
315
318
  raise ValueError(
316
319
  "Persistent directory for the vector DB is not specified.")
317
-
320
+
318
321
  def process_text(self):
319
322
  # key, value = next(iter(embedding_dict.items()))
320
323
  # embedding_size = value[next(iter(value))][0].shape[0]
321
324
  # pooling_strategy = self.embedding_pooling_strategy_type(
322
- # ) if self.embedding_pooling_strategy_type else None
323
-
324
-
325
+ # ) if self.embedding_pooling_strategy_type else None
326
+
325
327
  # 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
326
328
  # kwargs = {"columns": self.columns}
327
329
  # if self.ner_threshold:
@@ -346,9 +348,10 @@ class TextDatasetMixin(BaseDataset):
346
348
  else:
347
349
  print(
348
350
  f"There is no configuration of Embeddings")
351
+ self.embedding_dict = embedding_dict
349
352
 
350
- else:
351
- embedding_dict = self.embedding_dict
353
+ # else:
354
+ # embedding_dict = self.embedding_dict
352
355
  # TODO make generic
353
356
  # embedding_size = list(embedding_dict['all_text'].values())[
354
357
  # 0][0].shape
@@ -1,6 +1,6 @@
1
1
  import pathlib
2
2
  from typing import List, Optional, Tuple
3
- from ddi_fw.datasets.core import TextDatasetMixin, generate_sim_matrices_new, generate_vectors
3
+ from ddi_fw.datasets.core import BaseDataset, TextDatasetMixin, generate_sim_matrices_new, generate_vectors
4
4
  from ddi_fw.datasets.db_utils import create_connection
5
5
  import numpy as np
6
6
  import pandas as pd
@@ -32,7 +32,7 @@ LIST_OF_NER_COLUMNS = ['tui', 'cui', 'entities']
32
32
  HERE = pathlib.Path(__file__).resolve().parent
33
33
 
34
34
 
35
- class DDIMDLDataset(TextDatasetMixin):
35
+ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
36
36
  dataset_name: str = "DDIMDLDataset"
37
37
  index_path: str = Field(default_factory=lambda: str(
38
38
  pathlib.Path(__file__).resolve().parent.joinpath('indexes')))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.195
3
+ Version: 0.0.197
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,9 +1,9 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=HXU09CTbe3zpdBiUcE2w2Yxx_3yHfY_rqa31oS959jw,15531
2
+ ddi_fw/datasets/core.py,sha256=eLS4TtQN1_1kI0huMt7eTOCz5hY3da9PHhEeiLjWtQg,15605
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
6
- ddi_fw/datasets/ddi_mdl/base.py,sha256=Nltf1P6ZMVT9oVsXMtrtpnlSz5hk_sa8FV2TpFYhSgc,10193
6
+ ddi_fw/datasets/ddi_mdl/base.py,sha256=rS8lSGE-SLeoE3GuElJ-TNaRHIGhaZBeOM2UH3JUS4M,10218
7
7
  ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
8
8
  ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
9
9
  ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
@@ -99,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
99
99
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
100
100
  ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
101
101
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
102
- ddi_fw-0.0.195.dist-info/METADATA,sha256=hUXOkRgECPRDv1EyePv2mcCJRedgmnj6afwpFfwFVhk,2542
103
- ddi_fw-0.0.195.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
104
- ddi_fw-0.0.195.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
105
- ddi_fw-0.0.195.dist-info/RECORD,,
102
+ ddi_fw-0.0.197.dist-info/METADATA,sha256=pVnij5JFvkPUgjVqvHmFLdI2OKSFRYxt7-vLVXhpldU,2542
103
+ ddi_fw-0.0.197.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
104
+ ddi_fw-0.0.197.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
105
+ ddi_fw-0.0.197.dist-info/RECORD,,