PyPI - active-vision - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

active-vision 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

active_vision/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-__version__ = "0.1.1"
+__version__ = "0.2.0"
 from .core import *

active_vision/core.py CHANGED Viewed

@@ -2,6 +2,7 @@ import pandas as pd
 from loguru import logger
 from fastai.vision.all import *
 import torch
+import numpy as np
 import warnings
 from typing import Callable
@@ -189,37 +190,61 @@ class ActiveLearner:
         df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
         if strategy == "least-confidence":
-            logger.info(f"Getting top {num_samples} low confidence samples")
+            logger.info(
+                f"Using least confidence strategy to get top {num_samples} samples"
+            )
             df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
                 self.num_classes - (self.num_classes - 1)
             )
-            # Sort by descending uncertainty score
-            uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
-                num_samples
+        elif strategy == "margin-of-confidence":
+            logger.info(
+                f"Using margin of confidence strategy to get top {num_samples} samples"
             )
-            return uncertain_df
+            if len(df["pred_raw"].iloc[0]) < 2:
+                logger.error("pred_raw has less than 2 elements")
+                raise ValueError("pred_raw has less than 2 elements")
-        # TODO: Implement margin of confidence strategy
-        elif strategy == "margin-of-confidence":
-            logger.error("Margin of confidence strategy not implemented")
-            raise NotImplementedError("Margin of confidence strategy not implemented")
+            # Calculate uncertainty score as 1 - (difference between top two predictions)
+            df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
+                lambda x: 1 - (np.sort(x)[-1] - np.sort(x)[-2])
+            )
-        # TODO: Implement ratio of confidence strategy
         elif strategy == "ratio-of-confidence":
-            logger.error("Ratio of confidence strategy not implemented")
-            raise NotImplementedError("Ratio of confidence strategy not implemented")
+            logger.info(
+                f"Using ratio of confidence strategy to get top {num_samples} samples"
+            )
+            if len(df["pred_raw"].iloc[0]) < 2:
+                logger.error("pred_raw has less than 2 elements")
+                raise ValueError("pred_raw has less than 2 elements")
+            # Calculate uncertainty score as ratio of top two predictions
+            df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
+                lambda x: np.sort(x)[-2] / np.sort(x)[-1]
+            )
-        # TODO: Implement entropy strategy
         elif strategy == "entropy":
-            logger.error("Entropy strategy not implemented")
-            raise NotImplementedError("Entropy strategy not implemented")
+            logger.info(f"Using entropy strategy to get top {num_samples} samples")
+            # Calculate uncertainty score as entropy of the prediction
+            df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
+                lambda x: -np.sum(x * np.log2(x))
+            )
+            # Normalize the uncertainty score to be between 0 and 1 by dividing by log2 of the number of classes
+            df.loc[:, "uncertainty_score"] = df["uncertainty_score"] / np.log2(
+                self.num_classes
+            )
         else:
             logger.error(f"Unknown strategy: {strategy}")
             raise ValueError(f"Unknown strategy: {strategy}")
+        df = df[
+            ["filepath", "pred_label", "pred_conf", "uncertainty_score", "pred_raw"]
+        ]
+        return df.sort_values(by="uncertainty_score", ascending=False).head(num_samples)
     def sample_diverse(self, df: pd.DataFrame, num_samples: int):
         """
         Sample top `num_samples` diverse samples. Returns a df with filepaths and predicted labels, and confidence scores.
@@ -258,7 +283,7 @@ class ActiveLearner:
                 return;
             }
-            if (e.key === "ArrowUp" || e.key === "Enter") {
+            if (e.key === "ArrowUp") {
                 document.getElementById("submit_btn").click();
             } else if (e.key === "ArrowRight") {
                 document.getElementById("next_btn").click();
@@ -284,7 +309,7 @@ class ActiveLearner:
                             type="filepath",
                             label="Image",
                             value=filepaths[0],
-                            height=500
+                            height=500,
                         )
                         # Add bar plot with top 5 predictions
@@ -334,7 +359,7 @@ class ActiveLearner:
                     with gr.Row():
                         back_btn = gr.Button("← Previous", elem_id="back_btn")
                         submit_btn = gr.Button(
-                            "Submit (↑/Enter)",
+                            "Submit ↑",
                             variant="primary",
                             elem_id="submit_btn",
                         )
@@ -344,8 +369,25 @@ class ActiveLearner:
                         minimum=0,
                         maximum=len(filepaths) - 1,
                         value=0,
+                        step=1,
                         label="Progress",
-                        interactive=False,
+                        interactive=True,
+                    )
+                    # Add event handler for slider changes
+                    progress.change(
+                        fn=lambda idx: navigate(idx, 0),
+                        inputs=[progress],
+                        outputs=[
+                            filename,
+                            image,
+                            pred_label,
+                            pred_conf,
+                            category,
+                            current_index,
+                            progress,
+                            pred_plot,
+                        ],
                     )
                     finish_btn = gr.Button("Finish Labeling", variant="primary")
@@ -490,18 +532,28 @@ class ActiveLearner:
                 current_idx = int(current_idx)
                 if selected_category is None:
-                    plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
-                        {
-                            "class": self.class_names,
-                            "probability": df["pred_raw"].iloc[current_idx],
-                        }
-                    ).nlargest(5, "probability")
+                    plot_data = (
+                        None
+                        if "pred_raw" not in df.columns
+                        else pd.DataFrame(
+                            {
+                                "class": self.class_names,
+                                "probability": df["pred_raw"].iloc[current_idx],
+                            }
+                        ).nlargest(5, "probability")
+                    )
                     return (
                         filepaths[current_idx],
                         filepaths[current_idx],
-                        df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
-                        f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
-                        df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else "",
+                        f"{df['pred_conf'].iloc[current_idx]:.2%}"
+                        if "pred_conf" in df.columns
+                        else "",
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else None,
                         current_idx,
                         current_idx,
                         plot_data,
@@ -514,35 +566,55 @@ class ActiveLearner:
                 # Move to next image if not at the end
                 next_idx = current_idx + 1
                 if next_idx >= len(filepaths):
-                    plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
-                        {
-                            "class": self.class_names,
-                            "probability": df["pred_raw"].iloc[current_idx],
-                        }
-                    ).nlargest(5, "probability")
+                    plot_data = (
+                        None
+                        if "pred_raw" not in df.columns
+                        else pd.DataFrame(
+                            {
+                                "class": self.class_names,
+                                "probability": df["pred_raw"].iloc[current_idx],
+                            }
+                        ).nlargest(5, "probability")
+                    )
                     return (
                         filepaths[current_idx],
                         filepaths[current_idx],
-                        df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
-                        f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
-                        df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else "",
+                        f"{df['pred_conf'].iloc[current_idx]:.2%}"
+                        if "pred_conf" in df.columns
+                        else "",
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else None,
                         current_idx,
                         current_idx,
                         plot_data,
                     )
-                plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
-                    {
-                        "class": self.class_names,
-                        "probability": df["pred_raw"].iloc[next_idx],
-                    }
-                ).nlargest(5, "probability")
+                plot_data = (
+                    None
+                    if "pred_raw" not in df.columns
+                    else pd.DataFrame(
+                        {
+                            "class": self.class_names,
+                            "probability": df["pred_raw"].iloc[next_idx],
+                        }
+                    ).nlargest(5, "probability")
+                )
                 return (
                     filepaths[next_idx],
                     filepaths[next_idx],
-                    df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else "",
-                    f"{df['pred_conf'].iloc[next_idx]:.2%}" if "pred_conf" in df.columns else "",
-                    df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else None,
+                    df["pred_label"].iloc[next_idx]
+                    if "pred_label" in df.columns
+                    else "",
+                    f"{df['pred_conf'].iloc[next_idx]:.2%}"
+                    if "pred_conf" in df.columns
+                    else "",
+                    df["pred_label"].iloc[next_idx]
+                    if "pred_label" in df.columns
+                    else None,
                     next_idx,
                     next_idx,
                     plot_data,

{active_vision-0.1.1.dist-info → active_vision-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: active-vision
-Version: 0.1.1
+Version: 0.2.0
 Summary: Active learning for edge vision.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -17,10 +17,10 @@ Requires-Dist: timm>=1.0.13
 Requires-Dist: transformers>=4.48.0
 Requires-Dist: xinfer>=0.3.2
-![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
-![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
-[![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge)](https://pypi.org/project/active-vision/)
-![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)
+[![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge&logo=python&logoColor=white)](https://pypi.org/project/active-vision/)
+[![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/active-vision/)
+[![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)](https://pypi.org/project/active-vision/)
+[![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge&logo=apache&logoColor=white)](https://github.com/dnth/active-vision/blob/main/LICENSE)
 <p align="center">
   <img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/logo.png" alt="active-vision">
@@ -47,9 +47,9 @@ The goal of this project is to create a framework for the active learning loop f
 Uncertainty Sampling:
 - [X] Least confidence
-- [ ] Margin of confidence
-- [ ] Ratio of confidence
-- [ ] Entropy
+- [X] Margin of confidence
+- [X] Ratio of confidence
+- [X] Entropy
 Diverse Sampling:
 - [X] Random sampling
@@ -172,7 +172,7 @@ The active learning loop is a iterative process and can keep going until you hit
 - You hit a budget.
 - Other criteria.
-For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
+For this dataset, I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set exceeds the top performing model on the leaderboard.
 | #Labeled Images 	| Evaluation Accuracy 	| Train Epochs 	| Model                	| Active Learning 	| Source                                                                              	|

active_vision-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+active_vision/__init__.py,sha256=SxR6MPyULKlvx-86S3NIk46Tz1xlN-g_vI_aW3LitG4,43
+active_vision/core.py,sha256=4Nl8e3isinIlzcD6bCbG9TTGiuG0PQkKNUIvnAsbaTY,27373
+active_vision-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+active_vision-0.2.0.dist-info/METADATA,sha256=3XvDTC1Cnxd3rIUUXyY8MwTgKGcnncN9D2VvKnkw1jQ,15675
+active_vision-0.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+active_vision-0.2.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
+active_vision-0.2.0.dist-info/RECORD,,

active_vision-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-active_vision/__init__.py,sha256=xWa6YKvR3wF8p_D9PprKNGP3VnxjbyVpcwnPCMhhaHM,43
-active_vision/core.py,sha256=jWzTOx3GCB2Sq5-JGgoi-ZD2teoIGTYas9StqZxXefo,24999
-active_vision-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-active_vision-0.1.1.dist-info/METADATA,sha256=U8-IH0WJnPj6KPBsfsxcW4GZCTDY0KFxrqz7migcnro,15454
-active_vision-0.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-active_vision-0.1.1.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
-active_vision-0.1.1.dist-info/RECORD,,

{active_vision-0.1.1.dist-info → active_vision-0.2.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{active_vision-0.1.1.dist-info → active_vision-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{active_vision-0.1.1.dist-info → active_vision-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

active-vision 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

active-vision 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl