ddi-fw 0.0.53__tar.gz → 0.0.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/core.py +4 -2
  4. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/experiments/__init__.py +2 -1
  5. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/experiments/pipeline.py +11 -11
  6. ddi_fw-0.0.55/src/ddi_fw/experiments/pipeline_ner.py +109 -0
  7. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  8. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw.egg-info/SOURCES.txt +1 -0
  9. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/README.md +0 -0
  10. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/setup.cfg +0 -0
  11. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/__init__.py +0 -0
  12. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/db_utils.py +0 -0
  13. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
  14. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  15. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  16. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  17. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  18. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  19. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  20. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  21. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  22. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  23. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  24. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  25. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  26. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  27. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  28. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  29. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  30. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  31. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  32. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  33. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  34. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  35. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  36. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  37. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  38. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  39. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  40. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  41. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/embedding_generator_new.py +0 -0
  42. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  43. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/idf_helper.py +0 -0
  44. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  45. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  46. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  47. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  48. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  49. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  50. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  51. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  52. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  53. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  54. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  55. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  56. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  57. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  58. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  59. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  60. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  61. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/datasets/setup_._py +0 -0
  62. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/drugbank/__init__.py +0 -0
  63. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  64. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  65. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  66. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  67. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  68. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/experiments/custom_torch_model.py +0 -0
  69. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/experiments/evaluation_helper.py +0 -0
  70. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/experiments/pipeline_builder_pattern.py +0 -0
  71. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/experiments/tensorflow_helper.py +0 -0
  72. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/experiments/test.py +0 -0
  73. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/ner/__init__.py +0 -0
  74. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  75. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/ner/ner.py +0 -0
  76. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/basic_test.py +0 -0
  77. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/combination_test.py +0 -0
  78. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/compress_json_test.py +0 -0
  79. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/date_test.py +0 -0
  80. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/idf_score.py +0 -0
  81. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  82. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/mlfow_test.py +0 -0
  83. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  84. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/test.py +0 -0
  85. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  86. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/test/type_guarding_test.py +0 -0
  87. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/utils/__init__.py +0 -0
  88. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/utils/enums.py +0 -0
  89. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  90. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/utils/utils.py +0 -0
  91. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw/utils/zip_helper.py +0 -0
  92. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  93. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw.egg-info/requires.txt +0 -0
  94. {ddi_fw-0.0.53 → ddi_fw-0.0.55}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.53
3
+ Version: 0.0.55
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.53"
8
+ version = "0.0.55"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -298,11 +298,13 @@ class BaseDataset(ABC):
298
298
  combined_df = filtered_df.copy()
299
299
  # TODO: eğer kullanılan veri setinde tui, cui veya entity bilgileri yoksa o veri setine bu sütunları eklemek için aşağısı gerekli
300
300
 
301
- idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
301
+ # idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
302
+ idf_calc = IDF(filtered_ner_df, self.ner_columns)
302
303
  idf_calc.calculate()
303
304
  idf_scores_df = idf_calc.to_dataframe()
304
305
 
305
- for key in filtered_ner_df.keys():
306
+ # for key in filtered_ner_df.keys():
307
+ for key in self.ner_columns:
306
308
  threshold = 0
307
309
  if key.startswith('tui'):
308
310
  threshold = self.tui_threshold
@@ -1,3 +1,4 @@
1
1
  from .tensorflow_helper import TFMultiModal, TFSingleModal,Result
2
2
  from .evaluation_helper import evaluate, Metrics
3
- from .pipeline import Experiment
3
+ from .pipeline import Experiment
4
+ from .pipeline_ner import NerParameterSearch
@@ -61,21 +61,21 @@ class Experiment:
61
61
  kwargs = {"columns": self.columns}
62
62
  for k, v in self.ner_threshold.items():
63
63
  kwargs[k] = v
64
+ if self.vector_db_persist_directory:
65
+ self.vector_db = chromadb.PersistentClient(
66
+ path=self.vector_db_persist_directory)
67
+ self.collection = self.vector_db.get_collection(
68
+ self.vector_db_collection_name)
69
+ dictionary = self.collection.get(include=['embeddings', 'metadatas'])
64
70
 
65
- self.vector_db = chromadb.PersistentClient(
66
- path=self.vector_db_persist_directory)
67
- self.collection = self.vector_db.get_collection(
68
- self.vector_db_collection_name)
69
- dictionary = self.collection.get(include=['embeddings', 'metadatas'])
71
+ embedding_dict = defaultdict(lambda: defaultdict(list))
70
72
 
71
- embedding_dict = defaultdict(lambda: defaultdict(list))
73
+ for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
74
+ embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
72
75
 
73
- for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
74
- embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
76
+ embedding_size = dictionary['embeddings'].shape[1]
75
77
 
76
- embedding_size = dictionary['embeddings'].shape[1]
77
-
78
- pooling_strategy = self.embedding_pooling_strategy_type()
78
+ pooling_strategy = self.embedding_pooling_strategy_type()
79
79
 
80
80
  self.ner_df = CTakesNER().load(filename=self.ner_data_file) if self.ner_data_file else None
81
81
 
@@ -0,0 +1,109 @@
1
+ from collections import defaultdict
2
+ from enum import Enum
3
+ import numpy as np
4
+ import pandas as pd
5
+ from ddi_fw.datasets.core import BaseDataset
6
+ from ddi_fw.experiments.tensorflow_helper import TFMultiModal
7
+ from ddi_fw.experiments.pipeline import Experiment
8
+ from typing import Dict, List
9
+ from itertools import product
10
+
11
+ from ddi_fw.utils.enums import DrugBankTextDataTypes, UMLSCodeTypes
12
+ import mlflow
13
+ from ddi_fw.ner.ner import CTakesNER
14
+
15
+ def stack(df_column):
16
+ return np.stack(df_column.values)
17
+
18
+
19
+ class NerParameterSearch:
20
+ def __init__(self,
21
+ experiment_name,
22
+ experiment_description,
23
+ experiment_tags,
24
+ tracking_uri,
25
+ dataset_type: BaseDataset,
26
+ umls_code_types: List[UMLSCodeTypes],
27
+ text_types=List[DrugBankTextDataTypes],
28
+ min_threshold_dict: Dict[str, float] = defaultdict(float),
29
+ max_threshold_dict: Dict[str, float] = defaultdict(float),
30
+ increase_step=0.5):
31
+ self.experiment_name = experiment_name
32
+ self.experiment_description = experiment_description
33
+ self.experiment_tags = experiment_tags
34
+ self.tracking_uri = tracking_uri
35
+
36
+ self.dataset_type = dataset_type
37
+ self.umls_code_types = umls_code_types
38
+ self.text_types = text_types
39
+ self.min_threshold_dict = min_threshold_dict
40
+ self.max_threshold_dict = max_threshold_dict
41
+ self.increase_step = increase_step
42
+
43
+ def build(self):
44
+ self.datasets = {}
45
+ self.items = []
46
+ columns = ['tui', 'cui', 'entities']
47
+ if self.umls_code_types is not None and self.text_types is not None:
48
+ # add checking statements
49
+ _umls_codes = [t.value[0] for t in self.umls_code_types]
50
+ _text_types = [t.value[0] for t in self.text_types]
51
+ _columns = [f'{item[0]}_{item[1]}' for item in product(
52
+ _umls_codes, _text_types)]
53
+ columns.extend(_columns)
54
+ print(f'Columns: {columns}')
55
+ self.ner_df = CTakesNER().load(filename=self.ner_data_file) if self.ner_data_file else None
56
+ for column in columns:
57
+ min_threshold = self.min_threshold_dict[column]
58
+ max_threshold = self.max_threshold_dict[column]
59
+ kwargs = {}
60
+ kwargs['threshold_method'] = 'idf'
61
+ kwargs['tui_threshold'] = 0
62
+ kwargs['cui_threshold'] = 0
63
+ kwargs['entities_threshold'] = 0
64
+
65
+ for threshold in np.arange(min_threshold, max_threshold, self.increase_step):
66
+ print(threshold)
67
+ if column.startswith('tui'):
68
+ kwargs['tui_threshold'] = threshold
69
+ if column.startswith('cui'):
70
+ kwargs['cui_threshold'] = threshold
71
+ if column.startswith('entities'):
72
+ kwargs['entities_threshold'] = threshold
73
+ dataset = self.dataset_type(
74
+ # chemical_property_columns=[],
75
+ # embedding_columns=[],
76
+ # ner_columns=[column],
77
+ columns=[column],
78
+ ner_df= self.ner_df,
79
+ **kwargs)
80
+
81
+ # train_idx_arr, val_idx_arr bir kez hesaplanması yeterli aslında
82
+ X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = dataset.load()
83
+ group_items = dataset.produce_inputs()
84
+ for item in group_items:
85
+ # item[0] = f'threshold_{threshold}_{item[0]}'
86
+ item[0] = f'threshold_{item[0]}_{threshold}'
87
+ self.datasets[item[0]] = dataset.ddis_df
88
+
89
+ self.items.extend(group_items)
90
+ self.y_test_label = self.items[0][4]
91
+ self.train_idx_arr = train_idx_arr
92
+ self.val_idx_arr = val_idx_arr
93
+
94
+
95
+ def run(self, model_func, batch_size=128, epochs=100):
96
+ mlflow.set_tracking_uri(self.tracking_uri)
97
+
98
+ if mlflow.get_experiment_by_name(self.experiment_name) == None:
99
+ mlflow.create_experiment(self.experiment_name)
100
+ mlflow.set_experiment_tags(self.experiment_tags)
101
+ mlflow.set_experiment(self.experiment_name)
102
+
103
+ y_test_label = self.items[0][4]
104
+ multi_modal = TFMultiModal(
105
+ model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
106
+ multi_modal.set_data(
107
+ self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
108
+ result = multi_modal.predict(self.combinations)
109
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.53
3
+ Version: 0.0.55
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -68,6 +68,7 @@ src/ddi_fw/experiments/custom_torch_model.py
68
68
  src/ddi_fw/experiments/evaluation_helper.py
69
69
  src/ddi_fw/experiments/pipeline.py
70
70
  src/ddi_fw/experiments/pipeline_builder_pattern.py
71
+ src/ddi_fw/experiments/pipeline_ner.py
71
72
  src/ddi_fw/experiments/tensorflow_helper.py
72
73
  src/ddi_fw/experiments/test.py
73
74
  src/ddi_fw/ner/__init__.py
File without changes
File without changes
File without changes
File without changes