ddi-fw 0.0.62__tar.gz → 0.0.63__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/evaluation_helper.py +96 -2
  4. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/pipeline.py +15 -12
  5. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  6. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/README.md +0 -0
  7. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/setup.cfg +0 -0
  8. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/__init__.py +0 -0
  9. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/core.py +0 -0
  10. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/db_utils.py +0 -0
  11. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
  12. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  13. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  14. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  15. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  16. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  17. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  18. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  19. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  20. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  21. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  22. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  23. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  24. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  25. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  26. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  27. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  28. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  29. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  30. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  31. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  32. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  33. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  34. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  35. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  36. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  37. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  38. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  39. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/embedding_generator_new.py +0 -0
  40. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  41. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/idf_helper.py +0 -0
  42. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  43. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  44. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  45. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  46. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  47. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  48. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  49. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  50. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  51. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  52. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  53. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  54. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  55. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  56. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  57. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  58. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  59. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/datasets/setup_._py +0 -0
  60. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/drugbank/__init__.py +0 -0
  61. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  62. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  63. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  64. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  65. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  66. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/__init__.py +0 -0
  67. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/custom_torch_model.py +0 -0
  68. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/pipeline_builder_pattern.py +0 -0
  69. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/pipeline_ner.py +0 -0
  70. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/tensorflow_helper.py +0 -0
  71. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/test.py +0 -0
  72. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/ner/__init__.py +0 -0
  73. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  74. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/ner/ner.py +0 -0
  75. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/basic_test.py +0 -0
  76. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/combination_test.py +0 -0
  77. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/compress_json_test.py +0 -0
  78. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/date_test.py +0 -0
  79. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/idf_score.py +0 -0
  80. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  81. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/mlfow_test.py +0 -0
  82. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  83. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/test.py +0 -0
  84. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  85. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/test/type_guarding_test.py +0 -0
  86. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/utils/__init__.py +0 -0
  87. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/utils/enums.py +0 -0
  88. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  89. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/utils/utils.py +0 -0
  90. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/utils/zip_helper.py +0 -0
  91. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  92. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  93. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw.egg-info/requires.txt +0 -0
  94. {ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.62
3
+ Version: 0.0.63
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.62"
8
+ version = "0.0.63"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -85,10 +85,104 @@ def roc_aupr_score(y_true, y_score, average="macro"):
85
85
 
86
86
  return _average_binary_score(_binary_roc_aupr_score, y_true, y_score, average)
87
87
 
88
- # actual and pred are one-hot encoded
88
+
89
+ def evaluate(actual, pred, info='', print=False):
90
+ # Precompute y_true and y_pred
91
+ y_true = np.argmax(actual, axis=1)
92
+ y_pred = np.argmax(pred, axis=1)
93
+
94
+ # Generate classification report
95
+ c_report = classification_report(y_true, y_pred, output_dict=True)
96
+
97
+ # Metrics initialization
98
+ metrics = Metrics(info)
99
+
100
+ n_classes = actual.shape[1]
101
+
102
+ precision = {}
103
+ recall = {}
104
+ f_score = {}
105
+ roc_aupr = {}
106
+ roc_auc = {
107
+ "weighted": 0,
108
+ "macro": 0,
109
+ "micro": 0
110
+ }
111
+
112
+ # Preallocate lists
113
+ precision_vals = [[] for _ in range(n_classes)]
114
+ recall_vals = [[] for _ in range(n_classes)]
115
+
116
+ # Compute metrics for each class
117
+ for i in range(n_classes):
118
+ precision_vals[i], recall_vals[i], _ = precision_recall_curve(
119
+ actual[:, i], pred[:, i])
120
+ roc_aupr[i] = auc(recall_vals[i], precision_vals[i])
121
+
122
+ # Calculate ROC AUC scores
123
+ roc_auc["weighted"] = roc_auc_score(actual, pred, multi_class='ovr', average='weighted')
124
+ roc_auc["macro"] = roc_auc_score(actual, pred, multi_class='ovr', average='macro')
125
+ roc_auc["micro"] = roc_auc_score(actual, pred, multi_class='ovr', average='micro')
126
+
127
+ # Micro-average Precision-Recall curve and ROC-AUPR
128
+ precision["micro_event"], recall["micro_event"], _ = precision_recall_curve(actual.ravel(), pred.ravel())
129
+ roc_aupr["micro"] = auc(recall["micro_event"], precision["micro_event"])
130
+
131
+ # Convert lists to numpy arrays for better performance
132
+ precision["micro_event"] = precision["micro_event"].tolist()
133
+ recall["micro_event"] = recall["micro_event"].tolist()
134
+
135
+ # Overall accuracy
136
+ acc = accuracy_score(y_true, y_pred)
137
+
138
+ # Aggregate precision, recall, and f_score
139
+ for avg_type in ['weighted', 'macro', 'micro']:
140
+ precision[avg_type] = precision_score(y_true, y_pred, average=avg_type)
141
+ recall[avg_type] = recall_score(y_true, y_pred, average=avg_type)
142
+ f_score[avg_type] = f1_score(y_true, y_pred, average=avg_type)
143
+
144
+ if print:
145
+ print(
146
+ f'''Accuracy: {acc}
147
+ , Precision:{precision['weighted']}
148
+ , Recall: {recall['weighted']}
149
+ , F1-score: {f_score['weighted']}
150
+ ''')
151
+
152
+ logs = {'accuracy': acc,
153
+ 'weighted_precision': precision['weighted'],
154
+ 'macro_precision': precision['macro'],
155
+ 'micro_precision': precision['micro'],
156
+ 'weighted_recall_score': recall['weighted'],
157
+ 'macro_recall_score': recall['macro'],
158
+ 'micro_recall_score': recall['micro'],
159
+ 'weighted_f1_score': f_score['weighted'],
160
+ 'macro_f1_score': f_score['macro'],
161
+ 'micro_f1_score': f_score['micro'],
162
+ # 'weighted_roc_auc_score': weighted_roc_auc_score,
163
+ # 'macro_roc_auc_score': macro_roc_auc_score,
164
+ # 'micro_roc_auc_score': micro_roc_auc_score,
165
+ # 'macro_aupr_score': macro_aupr_score,
166
+ # 'micro_aupr_score': micro_aupr_score
167
+ "micro_roc_aupr": roc_aupr['micro'],
168
+ # "micro_precision_from_precision_recall_curve":precision["micro"],
169
+ # "micro_recall_from_precision_recall_curve":recall["micro"],
170
+ "weighted_roc_auc": roc_auc['weighted'],
171
+ "macro_roc_auc": roc_auc['macro'],
172
+ "micro_roc_auc": roc_auc['micro']
173
+ }
174
+ metrics.accuracy(acc)
175
+ metrics.precision(precision)
176
+ metrics.recall(recall)
177
+ metrics.f1_score(f_score)
178
+ metrics.roc_auc(roc_auc)
179
+ metrics.roc_aupr(roc_aupr)
180
+ metrics.classification_report(c_report)
181
+ return logs, metrics
89
182
 
90
183
 
91
- def evaluate(actual, pred, info = '' ,print=False):
184
+ # actual and pred are one-hot encoded
185
+ def evaluate_ex(actual, pred, info = '' ,print=False):
92
186
 
93
187
  y_pred = np.argmax(pred, axis=1)
94
188
  y_true = np.argmax(actual, axis=1)
@@ -33,7 +33,8 @@ class Experiment:
33
33
  experiment_tags=None,
34
34
  tracking_uri=None,
35
35
  dataset_type:BaseDataset=None,
36
- columns=None,
36
+ columns=None,
37
+ embedding_dict = None,
37
38
  vector_db_persist_directory=None,
38
39
  vector_db_collection_name=None,
39
40
  embedding_pooling_strategy_type:PoolingStrategy=None,
@@ -48,6 +49,7 @@ class Experiment:
48
49
  self.tracking_uri = tracking_uri
49
50
  self.dataset_type = dataset_type
50
51
  self.columns = columns
52
+ self.embedding_dict = embedding_dict
51
53
  self.vector_db_persist_directory = vector_db_persist_directory
52
54
  self.vector_db_collection_name = vector_db_collection_name
53
55
  self.embedding_pooling_strategy_type = embedding_pooling_strategy_type
@@ -61,21 +63,22 @@ class Experiment:
61
63
  kwargs = {"columns": self.columns}
62
64
  for k, v in self.ner_threshold.items():
63
65
  kwargs[k] = v
64
- if self.vector_db_persist_directory:
65
- self.vector_db = chromadb.PersistentClient(
66
- path=self.vector_db_persist_directory)
67
- self.collection = self.vector_db.get_collection(
68
- self.vector_db_collection_name)
69
- dictionary = self.collection.get(include=['embeddings', 'metadatas'])
66
+ if self.embedding_dict == None:
67
+ if self.vector_db_persist_directory:
68
+ self.vector_db = chromadb.PersistentClient(
69
+ path=self.vector_db_persist_directory)
70
+ self.collection = self.vector_db.get_collection(
71
+ self.vector_db_collection_name)
72
+ dictionary = self.collection.get(include=['embeddings', 'metadatas'])
70
73
 
71
- embedding_dict = defaultdict(lambda: defaultdict(list))
74
+ embedding_dict = defaultdict(lambda: defaultdict(list))
72
75
 
73
- for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
74
- embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
76
+ for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
77
+ embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
75
78
 
76
- embedding_size = dictionary['embeddings'].shape[1]
79
+ embedding_size = dictionary['embeddings'].shape[1]
77
80
 
78
- pooling_strategy = self.embedding_pooling_strategy_type()
81
+ pooling_strategy = self.embedding_pooling_strategy_type()
79
82
 
80
83
  self.ner_df = CTakesNER().load(filename=self.ner_data_file) if self.ner_data_file else None
81
84
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.62
3
+ Version: 0.0.63
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
File without changes
File without changes
File without changes
File without changes