PyPI - ddi-fw - Versions diffs - 0.0.54__py3-none-any.whl → 0.0.55__py3-none-any.whl - Mend

ddi-fw 0.0.54py3-none-any.whl → 0.0.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ddi_fw/experiments/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .tensorflow_helper import TFMultiModal, TFSingleModal,Result
 from .evaluation_helper import evaluate, Metrics
-from .pipeline import Experiment
+from .pipeline import Experiment
+from .pipeline_ner import NerParameterSearch

ddi_fw/experiments/pipeline.py CHANGED Viewed

@@ -61,21 +61,21 @@ class Experiment:
         kwargs = {"columns": self.columns}
         for k, v in self.ner_threshold.items():
             kwargs[k] = v
+        if self.vector_db_persist_directory:
+            self.vector_db = chromadb.PersistentClient(
+                path=self.vector_db_persist_directory)
+            self.collection = self.vector_db.get_collection(
+                self.vector_db_collection_name)
+            dictionary = self.collection.get(include=['embeddings', 'metadatas'])
-        self.vector_db = chromadb.PersistentClient(
-            path=self.vector_db_persist_directory)
-        self.collection = self.vector_db.get_collection(
-            self.vector_db_collection_name)
-        dictionary = self.collection.get(include=['embeddings', 'metadatas'])
+            embedding_dict = defaultdict(lambda: defaultdict(list))
-        embedding_dict = defaultdict(lambda: defaultdict(list))
+            for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
+                embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
-        for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
-            embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
+            embedding_size = dictionary['embeddings'].shape[1]
-        embedding_size = dictionary['embeddings'].shape[1]
-        pooling_strategy = self.embedding_pooling_strategy_type()
+            pooling_strategy = self.embedding_pooling_strategy_type()
         self.ner_df = CTakesNER().load(filename=self.ner_data_file)  if self.ner_data_file else None

ddi_fw/experiments/pipeline_ner.py ADDED Viewed

@@ -0,0 +1,109 @@
+from collections import defaultdict
+from enum import Enum
+import numpy as np
+import pandas as pd
+from ddi_fw.datasets.core import BaseDataset
+from ddi_fw.experiments.tensorflow_helper import TFMultiModal
+from ddi_fw.experiments.pipeline import Experiment
+from typing import Dict, List
+from itertools import product
+from ddi_fw.utils.enums import DrugBankTextDataTypes, UMLSCodeTypes
+import mlflow
+from ddi_fw.ner.ner import CTakesNER
+def stack(df_column):
+    return np.stack(df_column.values)
+class NerParameterSearch:
+    def __init__(self,
+                 experiment_name,
+                 experiment_description,
+                 experiment_tags,
+                 tracking_uri,
+                 dataset_type: BaseDataset,
+                 umls_code_types: List[UMLSCodeTypes],
+                 text_types=List[DrugBankTextDataTypes],
+                 min_threshold_dict: Dict[str, float] = defaultdict(float),
+                 max_threshold_dict: Dict[str, float] = defaultdict(float),
+                 increase_step=0.5):
+        self.experiment_name = experiment_name
+        self.experiment_description = experiment_description
+        self.experiment_tags = experiment_tags
+        self.tracking_uri = tracking_uri
+        self.dataset_type = dataset_type
+        self.umls_code_types = umls_code_types
+        self.text_types = text_types
+        self.min_threshold_dict = min_threshold_dict
+        self.max_threshold_dict = max_threshold_dict
+        self.increase_step = increase_step
+    def build(self):
+        self.datasets = {}
+        self.items = []
+        columns = ['tui', 'cui', 'entities']
+        if self.umls_code_types is not None and self.text_types is not None:
+            # add checking statements
+            _umls_codes = [t.value[0] for t in self.umls_code_types]
+            _text_types = [t.value[0] for t in self.text_types]
+            _columns = [f'{item[0]}_{item[1]}' for item in product(
+                _umls_codes, _text_types)]
+            columns.extend(_columns)
+        print(f'Columns: {columns}')
+        self.ner_df = CTakesNER().load(filename=self.ner_data_file)  if self.ner_data_file else None
+        for column in columns:
+            min_threshold = self.min_threshold_dict[column]
+            max_threshold = self.max_threshold_dict[column]
+            kwargs = {}
+            kwargs['threshold_method'] = 'idf'
+            kwargs['tui_threshold'] = 0
+            kwargs['cui_threshold'] = 0
+            kwargs['entities_threshold'] = 0
+            for threshold in np.arange(min_threshold, max_threshold, self.increase_step):
+                print(threshold)
+                if column.startswith('tui'):
+                    kwargs['tui_threshold'] = threshold
+                if column.startswith('cui'):
+                    kwargs['cui_threshold'] = threshold
+                if column.startswith('entities'):
+                    kwargs['entities_threshold'] = threshold
+                dataset = self.dataset_type(
+                    # chemical_property_columns=[],
+                    # embedding_columns=[],
+                    # ner_columns=[column],
+                    columns=[column],
+                    ner_df= self.ner_df,
+                    **kwargs)
+                # train_idx_arr, val_idx_arr  bir kez hesaplanması yeterli aslında
+                X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = dataset.load()
+                group_items = dataset.produce_inputs()
+                for item in group_items:
+                    # item[0] = f'threshold_{threshold}_{item[0]}'
+                    item[0] = f'threshold_{item[0]}_{threshold}'
+                self.datasets[item[0]] = dataset.ddis_df
+                self.items.extend(group_items)
+        self.y_test_label = self.items[0][4]
+        self.train_idx_arr = train_idx_arr
+        self.val_idx_arr = val_idx_arr
+    def run(self, model_func, batch_size=128, epochs=100):
+            mlflow.set_tracking_uri(self.tracking_uri)
+            if mlflow.get_experiment_by_name(self.experiment_name) == None:
+                mlflow.create_experiment(self.experiment_name)
+                mlflow.set_experiment_tags(self.experiment_tags)
+            mlflow.set_experiment(self.experiment_name)
+            y_test_label = self.items[0][4]
+            multi_modal = TFMultiModal(
+                model_func=model_func, batch_size=batch_size,  epochs=epochs)  # 100
+            multi_modal.set_data(
+                self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
+            result = multi_modal.predict(self.combinations)
+            return result

{ddi_fw-0.0.54.dist-info → ddi_fw-0.0.55.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ddi_fw
-Version: 0.0.54
+Version: 0.0.55
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>

{ddi_fw-0.0.54.dist-info → ddi_fw-0.0.55.dist-info}/RECORD RENAMED Viewed

@@ -56,11 +56,12 @@ ddi_fw/drugbank/drugbank_parser.py,sha256=lxUuhB0s8ef_aPNDs0V8ClKF7-KIWugNIV9gVs
 ddi_fw/drugbank/drugbank_processor.py,sha256=vmkt68n9nFLevufgGyXhOSDtTo4G1XzwT9PVncGTXtk,18127
 ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
 ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
-ddi_fw/experiments/__init__.py,sha256=5tOuHtrypRdmJgE5E78YcySbjCdNSGkbY1H5DY_I7gw,149
+ddi_fw/experiments/__init__.py,sha256=5L2xSolpFycNnflqOMdvJSiqRB16ExA5bbVGORKFX04,195
 ddi_fw/experiments/custom_torch_model.py,sha256=iQ_R_EApzD2JCcASN8cie6D21oh7VCxaOQ45_dkiGwc,2576
 ddi_fw/experiments/evaluation_helper.py,sha256=pY69cezV3WzrXw1bduIwRJfah1w3wXJ2YyTNim1J7ko,9349
-ddi_fw/experiments/pipeline.py,sha256=wHovtPbky1mEJCkC0xJnRDWKmPx9THrgKKN4ZnYQU_U,5296
+ddi_fw/experiments/pipeline.py,sha256=wttkvdzGP9d3jC9nx2iZul4hbogXkRho6eDns0yfLiE,5380
 ddi_fw/experiments/pipeline_builder_pattern.py,sha256=q1PNEQFoO5U3UidEoGB8rgLA7KXr4FsJTXEug5c5UJg,5466
+ddi_fw/experiments/pipeline_ner.py,sha256=JERKAaPdgKt2wjfVauOd3HXOGbLLoYLNxNCAv9CO_vg,4757
 ddi_fw/experiments/tensorflow_helper.py,sha256=Y-gD9qyqFFPl6HAvM_tIa5Y6em2YmafPCL1KMrK6eb8,11768
 ddi_fw/experiments/test.py,sha256=z1TfBpK75zGKpp2ZU8f6APjZlgBFthaCBN61YB9ma4o,2049
 ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
@@ -82,7 +83,7 @@ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
 ddi_fw/utils/py7zr_helper.py,sha256=dgfHqXDBWys1hmd1JlHhYyZGxrzYWi6siYiUq3bnLuI,4698
 ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
 ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
-ddi_fw-0.0.54.dist-info/METADATA,sha256=wZNF9K4Iyq_6QcaS-B4akta6i97uWA_jZ2tH9qhcHAk,1565
-ddi_fw-0.0.54.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-ddi_fw-0.0.54.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
-ddi_fw-0.0.54.dist-info/RECORD,,
+ddi_fw-0.0.55.dist-info/METADATA,sha256=736seAJsPdjZQPhFly5pkPPGi7SMWr6XqNgUKKRhC2I,1565
+ddi_fw-0.0.55.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+ddi_fw-0.0.55.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
+ddi_fw-0.0.55.dist-info/RECORD,,

{ddi_fw-0.0.54.dist-info → ddi_fw-0.0.55.dist-info}/WHEEL RENAMED Viewed

File without changes

{ddi_fw-0.0.54.dist-info → ddi_fw-0.0.55.dist-info}/top_level.txt RENAMED Viewed

File without changes

ddi-fw 0.0.54__py3-none-any.whl → 0.0.55__py3-none-any.whl

ddi-fw 0.0.54py3-none-any.whl → 0.0.55py3-none-any.whl