ddi-fw 0.0.54__tar.gz → 0.0.56__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/experiments/__init__.py +2 -1
  4. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/experiments/pipeline.py +11 -11
  5. ddi_fw-0.0.56/src/ddi_fw/experiments/pipeline_ner.py +111 -0
  6. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  7. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw.egg-info/SOURCES.txt +1 -0
  8. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/README.md +0 -0
  9. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/setup.cfg +0 -0
  10. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/__init__.py +0 -0
  11. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/core.py +0 -0
  12. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/db_utils.py +0 -0
  13. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
  14. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  15. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  16. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  17. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  18. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  19. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  20. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  21. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  22. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  23. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  24. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  25. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  26. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  27. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  28. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  29. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  30. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  31. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  32. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  33. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  34. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  35. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  36. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  37. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  38. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  39. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  40. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  41. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/embedding_generator_new.py +0 -0
  42. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  43. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/idf_helper.py +0 -0
  44. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  45. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  46. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  47. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  48. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  49. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  50. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  51. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  52. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  53. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  54. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  55. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  56. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  57. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  58. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  59. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  60. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  61. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/datasets/setup_._py +0 -0
  62. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/drugbank/__init__.py +0 -0
  63. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  64. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  65. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  66. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  67. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  68. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/experiments/custom_torch_model.py +0 -0
  69. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/experiments/evaluation_helper.py +0 -0
  70. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/experiments/pipeline_builder_pattern.py +0 -0
  71. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/experiments/tensorflow_helper.py +0 -0
  72. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/experiments/test.py +0 -0
  73. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/ner/__init__.py +0 -0
  74. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  75. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/ner/ner.py +0 -0
  76. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/basic_test.py +0 -0
  77. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/combination_test.py +0 -0
  78. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/compress_json_test.py +0 -0
  79. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/date_test.py +0 -0
  80. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/idf_score.py +0 -0
  81. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  82. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/mlfow_test.py +0 -0
  83. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  84. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/test.py +0 -0
  85. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  86. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/test/type_guarding_test.py +0 -0
  87. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/utils/__init__.py +0 -0
  88. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/utils/enums.py +0 -0
  89. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  90. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/utils/utils.py +0 -0
  91. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw/utils/zip_helper.py +0 -0
  92. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  93. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw.egg-info/requires.txt +0 -0
  94. {ddi_fw-0.0.54 → ddi_fw-0.0.56}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.54
3
+ Version: 0.0.56
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.54"
8
+ version = "0.0.56"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -1,3 +1,4 @@
1
1
  from .tensorflow_helper import TFMultiModal, TFSingleModal,Result
2
2
  from .evaluation_helper import evaluate, Metrics
3
- from .pipeline import Experiment
3
+ from .pipeline import Experiment
4
+ from .pipeline_ner import NerParameterSearch
@@ -61,21 +61,21 @@ class Experiment:
61
61
  kwargs = {"columns": self.columns}
62
62
  for k, v in self.ner_threshold.items():
63
63
  kwargs[k] = v
64
+ if self.vector_db_persist_directory:
65
+ self.vector_db = chromadb.PersistentClient(
66
+ path=self.vector_db_persist_directory)
67
+ self.collection = self.vector_db.get_collection(
68
+ self.vector_db_collection_name)
69
+ dictionary = self.collection.get(include=['embeddings', 'metadatas'])
64
70
 
65
- self.vector_db = chromadb.PersistentClient(
66
- path=self.vector_db_persist_directory)
67
- self.collection = self.vector_db.get_collection(
68
- self.vector_db_collection_name)
69
- dictionary = self.collection.get(include=['embeddings', 'metadatas'])
71
+ embedding_dict = defaultdict(lambda: defaultdict(list))
70
72
 
71
- embedding_dict = defaultdict(lambda: defaultdict(list))
73
+ for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
74
+ embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
72
75
 
73
- for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
74
- embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
76
+ embedding_size = dictionary['embeddings'].shape[1]
75
77
 
76
- embedding_size = dictionary['embeddings'].shape[1]
77
-
78
- pooling_strategy = self.embedding_pooling_strategy_type()
78
+ pooling_strategy = self.embedding_pooling_strategy_type()
79
79
 
80
80
  self.ner_df = CTakesNER().load(filename=self.ner_data_file) if self.ner_data_file else None
81
81
 
@@ -0,0 +1,111 @@
1
+ from collections import defaultdict
2
+ from enum import Enum
3
+ import numpy as np
4
+ import pandas as pd
5
+ from ddi_fw.datasets.core import BaseDataset
6
+ from ddi_fw.experiments.tensorflow_helper import TFMultiModal
7
+ from ddi_fw.experiments.pipeline import Experiment
8
+ from typing import Dict, List
9
+ from itertools import product
10
+
11
+ from ddi_fw.utils.enums import DrugBankTextDataTypes, UMLSCodeTypes
12
+ import mlflow
13
+ from ddi_fw.ner.ner import CTakesNER
14
+
15
+ def stack(df_column):
16
+ return np.stack(df_column.values)
17
+
18
+
19
+ class NerParameterSearch:
20
+ def __init__(self,
21
+ experiment_name,
22
+ experiment_description,
23
+ experiment_tags,
24
+ tracking_uri,
25
+ dataset_type: BaseDataset,
26
+ columns:list,
27
+ umls_code_types: List[UMLSCodeTypes],
28
+ text_types=List[DrugBankTextDataTypes],
29
+ min_threshold_dict: Dict[str, float] = defaultdict(float),
30
+ max_threshold_dict: Dict[str, float] = defaultdict(float),
31
+ increase_step=0.5):
32
+ self.experiment_name = experiment_name
33
+ self.experiment_description = experiment_description
34
+ self.experiment_tags = experiment_tags
35
+ self.tracking_uri = tracking_uri
36
+
37
+ self.dataset_type = dataset_type
38
+ self.columns = columns
39
+ self.umls_code_types = umls_code_types
40
+ self.text_types = text_types
41
+ self.min_threshold_dict = min_threshold_dict
42
+ self.max_threshold_dict = max_threshold_dict
43
+ self.increase_step = increase_step
44
+
45
+ def build(self):
46
+ self.datasets = {}
47
+ self.items = []
48
+ # columns = ['tui', 'cui', 'entities']
49
+ if self.umls_code_types is not None and self.text_types is not None:
50
+ # add checking statements
51
+ _umls_codes = [t.value[0] for t in self.umls_code_types]
52
+ _text_types = [t.value[0] for t in self.text_types]
53
+ _columns = [f'{item[0]}_{item[1]}' for item in product(
54
+ _umls_codes, _text_types)]
55
+ self.columns.extend(_columns)
56
+ print(f'Columns: {self.columns}')
57
+ self.ner_df = CTakesNER().load(filename=self.ner_data_file) if self.ner_data_file else None
58
+ for column in self.columns:
59
+ min_threshold = self.min_threshold_dict[column]
60
+ max_threshold = self.max_threshold_dict[column]
61
+ kwargs = {}
62
+ kwargs['threshold_method'] = 'idf'
63
+ kwargs['tui_threshold'] = 0
64
+ kwargs['cui_threshold'] = 0
65
+ kwargs['entities_threshold'] = 0
66
+
67
+ for threshold in np.arange(min_threshold, max_threshold, self.increase_step):
68
+ print(threshold)
69
+ if column.startswith('tui'):
70
+ kwargs['tui_threshold'] = threshold
71
+ if column.startswith('cui'):
72
+ kwargs['cui_threshold'] = threshold
73
+ if column.startswith('entities'):
74
+ kwargs['entities_threshold'] = threshold
75
+ dataset = self.dataset_type(
76
+ # chemical_property_columns=[],
77
+ # embedding_columns=[],
78
+ # ner_columns=[column],
79
+ columns=[column],
80
+ ner_df= self.ner_df,
81
+ **kwargs)
82
+
83
+ # train_idx_arr, val_idx_arr bir kez hesaplanması yeterli aslında
84
+ X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = dataset.load()
85
+ group_items = dataset.produce_inputs()
86
+ for item in group_items:
87
+ # item[0] = f'threshold_{threshold}_{item[0]}'
88
+ item[0] = f'threshold_{item[0]}_{threshold}'
89
+ self.datasets[item[0]] = dataset.ddis_df
90
+
91
+ self.items.extend(group_items)
92
+ self.y_test_label = self.items[0][4]
93
+ self.train_idx_arr = train_idx_arr
94
+ self.val_idx_arr = val_idx_arr
95
+
96
+
97
+ def run(self, model_func, batch_size=128, epochs=100):
98
+ mlflow.set_tracking_uri(self.tracking_uri)
99
+
100
+ if mlflow.get_experiment_by_name(self.experiment_name) == None:
101
+ mlflow.create_experiment(self.experiment_name)
102
+ mlflow.set_experiment_tags(self.experiment_tags)
103
+ mlflow.set_experiment(self.experiment_name)
104
+
105
+ y_test_label = self.items[0][4]
106
+ multi_modal = TFMultiModal(
107
+ model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
108
+ multi_modal.set_data(
109
+ self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
110
+ result = multi_modal.predict(self.combinations)
111
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.54
3
+ Version: 0.0.56
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -68,6 +68,7 @@ src/ddi_fw/experiments/custom_torch_model.py
68
68
  src/ddi_fw/experiments/evaluation_helper.py
69
69
  src/ddi_fw/experiments/pipeline.py
70
70
  src/ddi_fw/experiments/pipeline_builder_pattern.py
71
+ src/ddi_fw/experiments/pipeline_ner.py
71
72
  src/ddi_fw/experiments/tensorflow_helper.py
72
73
  src/ddi_fw/experiments/test.py
73
74
  src/ddi_fw/ner/__init__.py
File without changes
File without changes
File without changes
File without changes