PyPI - ddi-fw - Versions diffs - 0.0.96__py3-none-any.whl → 0.0.98__py3-none-any.whl - Mend

ddi-fw 0.0.96py3-none-any.whl → 0.0.98py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

ddi_fw/datasets/core.py CHANGED Viewed

@@ -268,81 +268,39 @@ class BaseDataset(ABC):
 # her bir metin tipi için embedding oluşturursan burayı düzenle
     def prep(self):
-        # if self.embedding_columns:
-        #     zip_helper = ZipHelper()
-        #     zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
-        #                        str(HERE.joinpath('zips/embeddings')))
-       # embedding_dict = dict()
-        # for embedding_column in self.embedding_columns:
-        #     embedding_file = HERE.joinpath(
-        #         f'zips/embeddings/{embedding_column}_embeddings.pkl')
-        #     embedding_values = pd.read_pickle(embedding_file)
-        #     d = embedding_values.apply(
-        #         lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
-        #     x = {k: v for l in d.values.tolist() for k, v in l.items()}
-        #     embedding_dict[embedding_column] = x
-        # self.ner_df = CTakesNER().load()
         drug_names = self.drugs_df['name'].to_list()
         drug_ids = self.drugs_df['id'].to_list()
-        # self.ddis_df = self.ddis_df[(self.ddis_df['name1'].isin(drug_names)) & (
-        #     self.ddis_df['name2'].isin(drug_names))]
         filtered_df = self.drugs_df
-        filtered_ner_df = self.ner_df[self.ner_df['drugbank_id'].isin(
-            drug_ids)]
-        filtered_ner_df = self.ner_df.copy()
         combined_df = filtered_df.copy()
-        # TODO: eğer kullanılan veri setinde tui, cui veya entity bilgileri yoksa o veri setine bu sütunları eklemek için aşağısı gerekli
-        # idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
-        idf_calc = IDF(filtered_ner_df, self.ner_columns)
-        idf_calc.calculate()
-        idf_scores_df = idf_calc.to_dataframe()
-        # for key in filtered_ner_df.keys():
-        for key in self.ner_columns:
-            threshold = 0
-            if key.startswith('tui'):
-                threshold = self.tui_threshold
-            if key.startswith('cui'):
-                threshold = self.cui_threshold
-            if key.startswith('entities'):
-                threshold = self.entities_threshold
-            combined_df[key] = filtered_ner_df[key]
-            valid_codes = idf_scores_df[idf_scores_df[key] > threshold].index
-            # print(f'{key}: valid code size = {len(valid_codes)}')
-            combined_df[key] = combined_df[key].apply(lambda items:
-                                                      [item for item in items if item in valid_codes])
-        # Yukarıdaki koda evrildi
-        # combined_df['tui_description'] = filtered_ner_df['description_tuis']
-        # combined_df['cui_description'] = filtered_ner_df['description_cuis']
-        # combined_df['entities_description'] = filtered_ner_df['description_entities']
-        # tui_idf = IDF(combined_df['tui_description'], self.tui_threshold)
-        # cui_idf = IDF(combined_df['cui_description'], self.cui_threshold)
-        # entities_idf = IDF(
-        #     combined_df['entities_description'], self.entities_threshold)
-        # tui_idf.calculate()
-        # cui_idf.calculate()
-        # entities_idf.calculate()
-        # valid_tui_codes = tui_idf.find_items_over_threshold()
-        # valid_cui_codes = cui_idf.find_items_over_threshold()
-        # valid_entities_codes = entities_idf.find_items_over_threshold()
-        # combined_df['tui_description'] = combined_df['tui_description'].apply(lambda items:
-        #                                                                       [item for item in items if item in valid_tui_codes])
-        # combined_df['cui_description'] = combined_df['cui_description'].apply(lambda items:
-        #                                                                       [item for item in items if item in valid_cui_codes])
-        # combined_df['entities_description'] = combined_df['entities_description'].apply(lambda items:
-        #                                                                                 [item for item in items if item in valid_entities_codes])
+        if self.ner_df:
+            filtered_ner_df = self.ner_df[self.ner_df['drugbank_id'].isin(
+                drug_ids)]
+            filtered_ner_df = self.ner_df.copy()
+            # TODO: eğer kullanılan veri setinde tui, cui veya entity bilgileri yoksa o veri setine bu sütunları eklemek için aşağısı gerekli
+            # idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
+            idf_calc = IDF(filtered_ner_df, self.ner_columns)
+            idf_calc.calculate()
+            idf_scores_df = idf_calc.to_dataframe()
+            # for key in filtered_ner_df.keys():
+            for key in self.ner_columns:
+                threshold = 0
+                if key.startswith('tui'):
+                    threshold = self.tui_threshold
+                if key.startswith('cui'):
+                    threshold = self.cui_threshold
+                if key.startswith('entities'):
+                    threshold = self.entities_threshold
+                combined_df[key] = filtered_ner_df[key]
+                valid_codes = idf_scores_df[idf_scores_df[key] > threshold].index
+                # print(f'{key}: valid code size = {len(valid_codes)}')
+                combined_df[key] = combined_df[key].apply(lambda items:
+                                                        [item for item in items if item in valid_codes])
         moved_columns = ['id']
         moved_columns.extend(self.__similarity_related_columns__)
@@ -409,7 +367,6 @@ class BaseDataset(ABC):
                 x_fnc, args=(embeddings_after_pooling,), axis=1)
         self.dataframe = self.ddis_df.copy()
-        self.dataframe['class_as_txt'] = labels
         self.dataframe['class'] = list(classes)
         print(self.dataframe.shape)

ddi_fw/pipeline/pipeline.py CHANGED Viewed

@@ -47,7 +47,7 @@ class Pipeline:
         self.combinations = combinations
         self.model = model
-    def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column):
+    def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
         """
         Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
@@ -65,9 +65,11 @@ class Pipeline:
             # Fetch the embeddings and metadata
             if column == None:
-                dictionary = collection.get(include=['embeddings', 'metadatas'])
+                dictionary = collection.get(
+                    include=['embeddings', 'metadatas'])
             else:
-                dictionary = collection.get(include=['embeddings', 'metadatas'], where= {"type": {"$eq": f"{column}"}})
+                dictionary = collection.get(include=['embeddings', 'metadatas'], where={
+                                            "type": {"$eq": f"{column}"}})
             # Populate the embedding dictionary with embeddings from the vector database
             for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
                 embedding_dict[metadata["type"]
@@ -77,7 +79,7 @@ class Pipeline:
         else:
             raise ValueError(
                 "Persistent directory for the vector DB is not specified.")
     def build(self):
         # 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
         kwargs = {"columns": self.columns}
@@ -89,14 +91,16 @@ class Pipeline:
             if self.vector_db_persist_directory:
                 self.__create_or_update_embeddings__(
                     embedding_dict, self.vector_db_persist_directory, self.vector_db_collection_name)
             if self.column_embedding_configs:
                 for item in self.column_embedding_configs:
                     col = item["column"]
                     col_db_dir = item["vector_db_persist_directory"]
                     col_db_collection = item["vector_db_collection_name"]
-                    self.__create_or_update_embeddings__(embedding_dict, col_db_dir, col_db_collection, col)
-                    print(f"Embedings of {col} is calculated from {col_db_collection}")
+                    self.__create_or_update_embeddings__(
+                        embedding_dict, col_db_dir, col_db_collection, col)
+                    print(
+                        f"Embedings of {col} is calculated from {col_db_collection}")
         # if self.embedding_dict == None:
         #     if self.vector_db_persist_directory:

{ddi_fw-0.0.96.dist-info → ddi_fw-0.0.98.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ddi_fw
-Version: 0.0.96
+Version: 0.0.98
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -27,25 +27,25 @@ Requires-Dist: python-stopwatch==1.1.11
 Requires-Dist: lxml==5.3.0
 Requires-Dist: matplotlib==3.8.0
 Requires-Dist: mlflow==2.16.1
-Requires-Dist: nltk==3.8.1
-Requires-Dist: numpy==1.26.4
-Requires-Dist: pandas==2.2.2
+Requires-Dist: nltk>=3.8.1
+Requires-Dist: numpy>=1.26.4
+Requires-Dist: pandas>=2.2.0
 Requires-Dist: plotly==5.24.1
 Requires-Dist: rdkit==2023.3.3
 Requires-Dist: scikit-learn==1.5.2
 Requires-Dist: scipy==1.13.1
-Requires-Dist: accelerate==0.33.0
-Requires-Dist: sentence-transformers==3.0.1
-Requires-Dist: transformers==4.42.4
+Requires-Dist: accelerate>=0.33.0
+Requires-Dist: sentence-transformers>=3.0.1
+Requires-Dist: transformers>=4.42.4
 Requires-Dist: stanza==1.9.2
-Requires-Dist: tokenizers==0.19.1
-Requires-Dist: tqdm==4.66.6
+Requires-Dist: tokenizers>=0.19.1
+Requires-Dist: tqdm>=4.66.6
 Requires-Dist: xmlschema==3.4.2
-Requires-Dist: zipp==3.20.2
+Requires-Dist: zipp>=3.20.2
 Requires-Dist: py7zr==0.22.0
-Requires-Dist: openai==1.52.2
-Requires-Dist: langchain==0.3.4
-Requires-Dist: chromadb==0.5.15
+Requires-Dist: openai>=1.52.2
+Requires-Dist: langchain>=0.3.4
+Requires-Dist: chromadb>=0.5.15
 Requires-Dist: langchain-community==0.3.3
 Requires-Dist: datasets==3.0.2
 Requires-Dist: unstructured==0.16.3

{ddi_fw-0.0.96.dist-info → ddi_fw-0.0.98.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 ddi_fw/datasets/__init__.py,sha256=HSwQrqnzrEjIG4gif41pwJ_cST3t2XHGDxqFyuEBRwo,351
-ddi_fw/datasets/core.py,sha256=cL_H7-osGTNG5W8X8LLpIcSJ-GUXoI3LjNwvffmEGzA,19452
+ddi_fw/datasets/core.py,sha256=9RaUPhAYCn4RDeTZpHATtJaqNWsO17bduYyVqxAZWs0,17001
 ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
 ddi_fw/datasets/embedding_generator.py,sha256=Jqrlv88RCu0Lg812KsA12X0cSaZuxbckJ4LNRKNy_qw,2173
 ddi_fw/datasets/feature_vector_generation.py,sha256=EImavcALxkIB0YG_smOzagMNzuWMbK9SaWSKwARx_qU,3254
@@ -72,7 +72,7 @@ ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,2
 ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=qIst7vxHaOAhRv4lgozszwa3b1QE4aIrN74t41Xnvr4,1637
 ddi_fw/pipeline/multi_pipeline.py,sha256=t_Z7d7xRfDnhpQTlqCf7c0isZ5hZlyXavKhC7ePsnJY,5903
 ddi_fw/pipeline/ner_pipeline.py,sha256=wB7hz4YCOv7UAz6bGE6sSpPXXIdoOflOVK5UCc1fO-o,5586
-ddi_fw/pipeline/pipeline.py,sha256=q7jfTt7ryYa3xBscPtxvanB-j5RzWVZUKir0KmAdTKc,8357
+ddi_fw/pipeline/pipeline.py,sha256=VX3GcoTjY7_ehX_rmpa-uh0NyBFoxF24AxbwruEWw4A,8457
 ddi_fw/test/basic_test.py,sha256=fEOGcZm1ObnsDvMiXNmdmz6YCeUrGc8V0DwlSwGhsq8,376
 ddi_fw/test/combination_test.py,sha256=TWNE8sf-DSh1Q9-yRaRBc774Sn1kSMGXLwQhd2_Qynk,324
 ddi_fw/test/compress_json_test.py,sha256=BGny56YqiG-pzhMoDzLKQBQI1E7o3jU0S7VYWtclAx4,1045
@@ -89,7 +89,7 @@ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
 ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
 ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
 ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
-ddi_fw-0.0.96.dist-info/METADATA,sha256=b2D7e7ub3byUbTwEPdw6FvUqTEK-H_KflNXwqbk4r7s,1966
-ddi_fw-0.0.96.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-ddi_fw-0.0.96.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
-ddi_fw-0.0.96.dist-info/RECORD,,
+ddi_fw-0.0.98.dist-info/METADATA,sha256=W4ZdrQs8YgQp6aHxr4Py5_lO4zrzKnk1XjDfFhrlsq8,1966
+ddi_fw-0.0.98.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+ddi_fw-0.0.98.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
+ddi_fw-0.0.98.dist-info/RECORD,,

{ddi_fw-0.0.96.dist-info → ddi_fw-0.0.98.dist-info}/WHEEL RENAMED Viewed

File without changes

{ddi_fw-0.0.96.dist-info → ddi_fw-0.0.98.dist-info}/top_level.txt RENAMED Viewed

File without changes

ddi-fw 0.0.96__py3-none-any.whl → 0.0.98__py3-none-any.whl

ddi-fw 0.0.96py3-none-any.whl → 0.0.98py3-none-any.whl