ddi-fw 0.0.54__py3-none-any.whl → 0.0.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
1
  from .tensorflow_helper import TFMultiModal, TFSingleModal,Result
2
2
  from .evaluation_helper import evaluate, Metrics
3
- from .pipeline import Experiment
3
+ from .pipeline import Experiment
4
+ from .pipeline_ner import NerParameterSearch
@@ -61,21 +61,21 @@ class Experiment:
61
61
  kwargs = {"columns": self.columns}
62
62
  for k, v in self.ner_threshold.items():
63
63
  kwargs[k] = v
64
+ if self.vector_db_persist_directory:
65
+ self.vector_db = chromadb.PersistentClient(
66
+ path=self.vector_db_persist_directory)
67
+ self.collection = self.vector_db.get_collection(
68
+ self.vector_db_collection_name)
69
+ dictionary = self.collection.get(include=['embeddings', 'metadatas'])
64
70
 
65
- self.vector_db = chromadb.PersistentClient(
66
- path=self.vector_db_persist_directory)
67
- self.collection = self.vector_db.get_collection(
68
- self.vector_db_collection_name)
69
- dictionary = self.collection.get(include=['embeddings', 'metadatas'])
71
+ embedding_dict = defaultdict(lambda: defaultdict(list))
70
72
 
71
- embedding_dict = defaultdict(lambda: defaultdict(list))
73
+ for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
74
+ embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
72
75
 
73
- for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
74
- embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
76
+ embedding_size = dictionary['embeddings'].shape[1]
75
77
 
76
- embedding_size = dictionary['embeddings'].shape[1]
77
-
78
- pooling_strategy = self.embedding_pooling_strategy_type()
78
+ pooling_strategy = self.embedding_pooling_strategy_type()
79
79
 
80
80
  self.ner_df = CTakesNER().load(filename=self.ner_data_file) if self.ner_data_file else None
81
81
 
@@ -0,0 +1,111 @@
1
+ from collections import defaultdict
2
+ from enum import Enum
3
+ import numpy as np
4
+ import pandas as pd
5
+ from ddi_fw.datasets.core import BaseDataset
6
+ from ddi_fw.experiments.tensorflow_helper import TFMultiModal
7
+ from ddi_fw.experiments.pipeline import Experiment
8
+ from typing import Dict, List
9
+ from itertools import product
10
+
11
+ from ddi_fw.utils.enums import DrugBankTextDataTypes, UMLSCodeTypes
12
+ import mlflow
13
+ from ddi_fw.ner.ner import CTakesNER
14
+
15
+ def stack(df_column):
16
+ return np.stack(df_column.values)
17
+
18
+
19
+ class NerParameterSearch:
20
+ def __init__(self,
21
+ experiment_name,
22
+ experiment_description,
23
+ experiment_tags,
24
+ tracking_uri,
25
+ dataset_type: BaseDataset,
26
+ columns:list,
27
+ umls_code_types: List[UMLSCodeTypes],
28
+ text_types=List[DrugBankTextDataTypes],
29
+ min_threshold_dict: Dict[str, float] = defaultdict(float),
30
+ max_threshold_dict: Dict[str, float] = defaultdict(float),
31
+ increase_step=0.5):
32
+ self.experiment_name = experiment_name
33
+ self.experiment_description = experiment_description
34
+ self.experiment_tags = experiment_tags
35
+ self.tracking_uri = tracking_uri
36
+
37
+ self.dataset_type = dataset_type
38
+ self.columns = columns
39
+ self.umls_code_types = umls_code_types
40
+ self.text_types = text_types
41
+ self.min_threshold_dict = min_threshold_dict
42
+ self.max_threshold_dict = max_threshold_dict
43
+ self.increase_step = increase_step
44
+
45
+ def build(self):
46
+ self.datasets = {}
47
+ self.items = []
48
+ # columns = ['tui', 'cui', 'entities']
49
+ if self.umls_code_types is not None and self.text_types is not None:
50
+ # add checking statements
51
+ _umls_codes = [t.value[0] for t in self.umls_code_types]
52
+ _text_types = [t.value[0] for t in self.text_types]
53
+ _columns = [f'{item[0]}_{item[1]}' for item in product(
54
+ _umls_codes, _text_types)]
55
+ self.columns.extend(_columns)
56
+ print(f'Columns: {self.columns}')
57
+ self.ner_df = CTakesNER().load(filename=self.ner_data_file) if self.ner_data_file else None
58
+ for column in self.columns:
59
+ min_threshold = self.min_threshold_dict[column]
60
+ max_threshold = self.max_threshold_dict[column]
61
+ kwargs = {}
62
+ kwargs['threshold_method'] = 'idf'
63
+ kwargs['tui_threshold'] = 0
64
+ kwargs['cui_threshold'] = 0
65
+ kwargs['entities_threshold'] = 0
66
+
67
+ for threshold in np.arange(min_threshold, max_threshold, self.increase_step):
68
+ print(threshold)
69
+ if column.startswith('tui'):
70
+ kwargs['tui_threshold'] = threshold
71
+ if column.startswith('cui'):
72
+ kwargs['cui_threshold'] = threshold
73
+ if column.startswith('entities'):
74
+ kwargs['entities_threshold'] = threshold
75
+ dataset = self.dataset_type(
76
+ # chemical_property_columns=[],
77
+ # embedding_columns=[],
78
+ # ner_columns=[column],
79
+ columns=[column],
80
+ ner_df= self.ner_df,
81
+ **kwargs)
82
+
83
+ # train_idx_arr, val_idx_arr bir kez hesaplanması yeterli aslında
84
+ X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = dataset.load()
85
+ group_items = dataset.produce_inputs()
86
+ for item in group_items:
87
+ # item[0] = f'threshold_{threshold}_{item[0]}'
88
+ item[0] = f'threshold_{item[0]}_{threshold}'
89
+ self.datasets[item[0]] = dataset.ddis_df
90
+
91
+ self.items.extend(group_items)
92
+ self.y_test_label = self.items[0][4]
93
+ self.train_idx_arr = train_idx_arr
94
+ self.val_idx_arr = val_idx_arr
95
+
96
+
97
+ def run(self, model_func, batch_size=128, epochs=100):
98
+ mlflow.set_tracking_uri(self.tracking_uri)
99
+
100
+ if mlflow.get_experiment_by_name(self.experiment_name) == None:
101
+ mlflow.create_experiment(self.experiment_name)
102
+ mlflow.set_experiment_tags(self.experiment_tags)
103
+ mlflow.set_experiment(self.experiment_name)
104
+
105
+ y_test_label = self.items[0][4]
106
+ multi_modal = TFMultiModal(
107
+ model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
108
+ multi_modal.set_data(
109
+ self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
110
+ result = multi_modal.predict(self.combinations)
111
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.54
3
+ Version: 0.0.56
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -56,11 +56,12 @@ ddi_fw/drugbank/drugbank_parser.py,sha256=lxUuhB0s8ef_aPNDs0V8ClKF7-KIWugNIV9gVs
56
56
  ddi_fw/drugbank/drugbank_processor.py,sha256=vmkt68n9nFLevufgGyXhOSDtTo4G1XzwT9PVncGTXtk,18127
57
57
  ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
58
58
  ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
59
- ddi_fw/experiments/__init__.py,sha256=5tOuHtrypRdmJgE5E78YcySbjCdNSGkbY1H5DY_I7gw,149
59
+ ddi_fw/experiments/__init__.py,sha256=5L2xSolpFycNnflqOMdvJSiqRB16ExA5bbVGORKFX04,195
60
60
  ddi_fw/experiments/custom_torch_model.py,sha256=iQ_R_EApzD2JCcASN8cie6D21oh7VCxaOQ45_dkiGwc,2576
61
61
  ddi_fw/experiments/evaluation_helper.py,sha256=pY69cezV3WzrXw1bduIwRJfah1w3wXJ2YyTNim1J7ko,9349
62
- ddi_fw/experiments/pipeline.py,sha256=wHovtPbky1mEJCkC0xJnRDWKmPx9THrgKKN4ZnYQU_U,5296
62
+ ddi_fw/experiments/pipeline.py,sha256=wttkvdzGP9d3jC9nx2iZul4hbogXkRho6eDns0yfLiE,5380
63
63
  ddi_fw/experiments/pipeline_builder_pattern.py,sha256=q1PNEQFoO5U3UidEoGB8rgLA7KXr4FsJTXEug5c5UJg,5466
64
+ ddi_fw/experiments/pipeline_ner.py,sha256=g-Tp7P7hmFxOvuqBV_Cfxt-4xe6xft9WC43S3NFns2Y,4838
64
65
  ddi_fw/experiments/tensorflow_helper.py,sha256=Y-gD9qyqFFPl6HAvM_tIa5Y6em2YmafPCL1KMrK6eb8,11768
65
66
  ddi_fw/experiments/test.py,sha256=z1TfBpK75zGKpp2ZU8f6APjZlgBFthaCBN61YB9ma4o,2049
66
67
  ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
@@ -82,7 +83,7 @@ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
82
83
  ddi_fw/utils/py7zr_helper.py,sha256=dgfHqXDBWys1hmd1JlHhYyZGxrzYWi6siYiUq3bnLuI,4698
83
84
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
84
85
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
85
- ddi_fw-0.0.54.dist-info/METADATA,sha256=wZNF9K4Iyq_6QcaS-B4akta6i97uWA_jZ2tH9qhcHAk,1565
86
- ddi_fw-0.0.54.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
87
- ddi_fw-0.0.54.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
88
- ddi_fw-0.0.54.dist-info/RECORD,,
86
+ ddi_fw-0.0.56.dist-info/METADATA,sha256=iOaA6X5rIXTUxkNeS0JGXTjvegkL26R6-taf8bRiyIQ,1565
87
+ ddi_fw-0.0.56.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
88
+ ddi_fw-0.0.56.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
89
+ ddi_fw-0.0.56.dist-info/RECORD,,