ddi-fw 0.0.196__py3-none-any.whl → 0.0.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/core.py +7 -5
- {ddi_fw-0.0.196.dist-info → ddi_fw-0.0.197.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.196.dist-info → ddi_fw-0.0.197.dist-info}/RECORD +5 -5
- {ddi_fw-0.0.196.dist-info → ddi_fw-0.0.197.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.196.dist-info → ddi_fw-0.0.197.dist-info}/top_level.txt +0 -0
ddi_fw/datasets/core.py
CHANGED
@@ -135,7 +135,7 @@ class BaseDataset(BaseModel):
|
|
135
135
|
skip deriving them. Otherwise, derive them from the dataframe and indices.
|
136
136
|
"""
|
137
137
|
self.prep()
|
138
|
-
|
138
|
+
|
139
139
|
if isinstance(self, TextDatasetMixin):
|
140
140
|
self.process_text()
|
141
141
|
|
@@ -267,6 +267,9 @@ class TextDatasetMixin(BaseModel):
|
|
267
267
|
vector_db_persist_directory: Optional[str] = None
|
268
268
|
vector_db_collection_name: Optional[str] = None
|
269
269
|
|
270
|
+
class Config:
|
271
|
+
arbitrary_types_allowed = True
|
272
|
+
|
270
273
|
def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
|
271
274
|
"""
|
272
275
|
Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
|
@@ -314,14 +317,13 @@ class TextDatasetMixin(BaseModel):
|
|
314
317
|
else:
|
315
318
|
raise ValueError(
|
316
319
|
"Persistent directory for the vector DB is not specified.")
|
317
|
-
|
320
|
+
|
318
321
|
def process_text(self):
|
319
322
|
# key, value = next(iter(embedding_dict.items()))
|
320
323
|
# embedding_size = value[next(iter(value))][0].shape[0]
|
321
324
|
# pooling_strategy = self.embedding_pooling_strategy_type(
|
322
|
-
# ) if self.embedding_pooling_strategy_type else None
|
323
|
-
|
324
|
-
|
325
|
+
# ) if self.embedding_pooling_strategy_type else None
|
326
|
+
|
325
327
|
# 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
|
326
328
|
# kwargs = {"columns": self.columns}
|
327
329
|
# if self.ner_threshold:
|
@@ -1,5 +1,5 @@
|
|
1
1
|
ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
|
2
|
-
ddi_fw/datasets/core.py,sha256=
|
2
|
+
ddi_fw/datasets/core.py,sha256=eLS4TtQN1_1kI0huMt7eTOCz5hY3da9PHhEeiLjWtQg,15605
|
3
3
|
ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
@@ -99,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
99
99
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
100
100
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
|
101
101
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
102
|
-
ddi_fw-0.0.
|
103
|
-
ddi_fw-0.0.
|
104
|
-
ddi_fw-0.0.
|
105
|
-
ddi_fw-0.0.
|
102
|
+
ddi_fw-0.0.197.dist-info/METADATA,sha256=pVnij5JFvkPUgjVqvHmFLdI2OKSFRYxt7-vLVXhpldU,2542
|
103
|
+
ddi_fw-0.0.197.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
104
|
+
ddi_fw-0.0.197.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
105
|
+
ddi_fw-0.0.197.dist-info/RECORD,,
|
File without changes
|
File without changes
|