PyPI - ddi-fw - Versions diffs - 0.0.62__tar.gz → 0.0.63__tar.gz - Mend

ddi-fw 0.0.62tar.gz → 0.0.63tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

{ddi_fw-0.0.62 → ddi_fw-0.0.63}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ddi_fw
-Version: 0.0.62
+Version: 0.0.63
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>

{ddi_fw-0.0.62 → ddi_fw-0.0.63}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ddi_fw"
-version = "0.0.62"
+version = "0.0.63"
 description = "Do not use :)"
 readme = "README.md"
 authors = [

{ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/evaluation_helper.py RENAMED Viewed

@@ -85,10 +85,104 @@ def roc_aupr_score(y_true, y_score, average="macro"):
     return _average_binary_score(_binary_roc_aupr_score, y_true, y_score, average)
-# actual and pred are one-hot encoded
+def evaluate(actual, pred, info='', print=False):
+    # Precompute y_true and y_pred
+    y_true = np.argmax(actual, axis=1)
+    y_pred = np.argmax(pred, axis=1)
+    # Generate classification report
+    c_report = classification_report(y_true, y_pred, output_dict=True)
+    # Metrics initialization
+    metrics = Metrics(info)
+    n_classes = actual.shape[1]
+    precision = {}
+    recall = {}
+    f_score = {}
+    roc_aupr = {}
+    roc_auc = {
+        "weighted": 0,
+        "macro": 0,
+        "micro": 0
+    }
+    # Preallocate lists
+    precision_vals = [[] for _ in range(n_classes)]
+    recall_vals = [[] for _ in range(n_classes)]
+    # Compute metrics for each class
+    for i in range(n_classes):
+        precision_vals[i], recall_vals[i], _ = precision_recall_curve(
+            actual[:, i], pred[:, i])
+        roc_aupr[i] = auc(recall_vals[i], precision_vals[i])
+    # Calculate ROC AUC scores
+    roc_auc["weighted"] = roc_auc_score(actual, pred, multi_class='ovr', average='weighted')
+    roc_auc["macro"] = roc_auc_score(actual, pred, multi_class='ovr', average='macro')
+    roc_auc["micro"] = roc_auc_score(actual, pred, multi_class='ovr', average='micro')
+    # Micro-average Precision-Recall curve and ROC-AUPR
+    precision["micro_event"], recall["micro_event"], _ = precision_recall_curve(actual.ravel(), pred.ravel())
+    roc_aupr["micro"] = auc(recall["micro_event"], precision["micro_event"])
+    # Convert lists to numpy arrays for better performance
+    precision["micro_event"] = precision["micro_event"].tolist()
+    recall["micro_event"] = recall["micro_event"].tolist()
+    # Overall accuracy
+    acc = accuracy_score(y_true, y_pred)
+    # Aggregate precision, recall, and f_score
+    for avg_type in ['weighted', 'macro', 'micro']:
+        precision[avg_type] = precision_score(y_true, y_pred, average=avg_type)
+        recall[avg_type] = recall_score(y_true, y_pred, average=avg_type)
+        f_score[avg_type] = f1_score(y_true, y_pred, average=avg_type)
+    if print:
+        print(
+            f'''Accuracy: {acc}
+            , Precision:{precision['weighted']}
+            , Recall: {recall['weighted']}
+            , F1-score: {f_score['weighted']}
+            ''')
+    logs = {'accuracy': acc,
+            'weighted_precision': precision['weighted'],
+            'macro_precision': precision['macro'],
+            'micro_precision': precision['micro'],
+            'weighted_recall_score': recall['weighted'],
+            'macro_recall_score': recall['macro'],
+            'micro_recall_score': recall['micro'],
+            'weighted_f1_score': f_score['weighted'],
+            'macro_f1_score': f_score['macro'],
+            'micro_f1_score': f_score['micro'],
+            # 'weighted_roc_auc_score': weighted_roc_auc_score,
+            # 'macro_roc_auc_score': macro_roc_auc_score,
+            # 'micro_roc_auc_score': micro_roc_auc_score,
+            # 'macro_aupr_score': macro_aupr_score,
+            # 'micro_aupr_score': micro_aupr_score
+            "micro_roc_aupr": roc_aupr['micro'],
+            # "micro_precision_from_precision_recall_curve":precision["micro"],
+            # "micro_recall_from_precision_recall_curve":recall["micro"],
+            "weighted_roc_auc": roc_auc['weighted'],
+            "macro_roc_auc": roc_auc['macro'],
+            "micro_roc_auc": roc_auc['micro']
+            }
+    metrics.accuracy(acc)
+    metrics.precision(precision)
+    metrics.recall(recall)
+    metrics.f1_score(f_score)
+    metrics.roc_auc(roc_auc)
+    metrics.roc_aupr(roc_aupr)
+    metrics.classification_report(c_report)
+    return logs, metrics
-def evaluate(actual, pred, info = '' ,print=False):
+# actual and pred are one-hot encoded
+def evaluate_ex(actual, pred, info = '' ,print=False):
     y_pred = np.argmax(pred, axis=1)
     y_true = np.argmax(actual, axis=1)

{ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw/experiments/pipeline.py RENAMED Viewed

@@ -33,7 +33,8 @@ class Experiment:
                  experiment_tags=None,
                  tracking_uri=None,
                  dataset_type:BaseDataset=None,
-                 columns=None,
+                 columns=None,
+                 embedding_dict = None,
                  vector_db_persist_directory=None,
                  vector_db_collection_name=None,
                  embedding_pooling_strategy_type:PoolingStrategy=None,
@@ -48,6 +49,7 @@ class Experiment:
         self.tracking_uri = tracking_uri
         self.dataset_type = dataset_type
         self.columns = columns
+        self.embedding_dict = embedding_dict
         self.vector_db_persist_directory = vector_db_persist_directory
         self.vector_db_collection_name = vector_db_collection_name
         self.embedding_pooling_strategy_type = embedding_pooling_strategy_type
@@ -61,21 +63,22 @@ class Experiment:
         kwargs = {"columns": self.columns}
         for k, v in self.ner_threshold.items():
             kwargs[k] = v
-        if self.vector_db_persist_directory:
-            self.vector_db = chromadb.PersistentClient(
-                path=self.vector_db_persist_directory)
-            self.collection = self.vector_db.get_collection(
-                self.vector_db_collection_name)
-            dictionary = self.collection.get(include=['embeddings', 'metadatas'])
+        if self.embedding_dict == None:
+            if self.vector_db_persist_directory:
+                self.vector_db = chromadb.PersistentClient(
+                    path=self.vector_db_persist_directory)
+                self.collection = self.vector_db.get_collection(
+                    self.vector_db_collection_name)
+                dictionary = self.collection.get(include=['embeddings', 'metadatas'])
-            embedding_dict = defaultdict(lambda: defaultdict(list))
+                embedding_dict = defaultdict(lambda: defaultdict(list))
-            for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
-                embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
+                for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
+                    embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
-            embedding_size = dictionary['embeddings'].shape[1]
+                embedding_size = dictionary['embeddings'].shape[1]
-            pooling_strategy = self.embedding_pooling_strategy_type()
+        pooling_strategy = self.embedding_pooling_strategy_type()
         self.ner_df = CTakesNER().load(filename=self.ner_data_file)  if self.ner_data_file else None

{ddi_fw-0.0.62 → ddi_fw-0.0.63}/src/ddi_fw.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ddi_fw
-Version: 0.0.62
+Version: 0.0.63
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>