PyPI - active-vision - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

active-vision 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

active_vision/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 from .core import *

active_vision/core.py CHANGED Viewed

@@ -2,7 +2,7 @@ import pandas as pd
 from loguru import logger
 from fastai.vision.all import *
 import torch
-import torch.nn.functional as F
+import numpy as np
 import warnings
 from typing import Callable
@@ -142,7 +142,8 @@ class ActiveLearner:
             {
                 "filepath": filepaths,
                 "pred_label": [self.learn.dls.vocab[i] for i in cls_preds.numpy()],
-                "pred_conf": torch.max(F.softmax(preds, dim=1), dim=1)[0].numpy(),
+                "pred_conf": torch.max(preds, dim=1)[0].numpy(),
+                "pred_raw": preds.numpy().tolist(),
             }
         )
         return self.pred_df
@@ -189,37 +190,61 @@ class ActiveLearner:
         df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
         if strategy == "least-confidence":
-            logger.info(f"Getting top {num_samples} low confidence samples")
+            logger.info(
+                f"Using least confidence strategy to get top {num_samples} samples"
+            )
             df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
                 self.num_classes - (self.num_classes - 1)
             )
-            # Sort by descending uncertainty score
-            uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
-                num_samples
+        elif strategy == "margin-of-confidence":
+            logger.info(
+                f"Using margin of confidence strategy to get top {num_samples} samples"
             )
-            return uncertain_df
+            if len(df["pred_raw"].iloc[0]) < 2:
+                logger.error("pred_raw has less than 2 elements")
+                raise ValueError("pred_raw has less than 2 elements")
-        # TODO: Implement margin of confidence strategy
-        elif strategy == "margin-of-confidence":
-            logger.error("Margin of confidence strategy not implemented")
-            raise NotImplementedError("Margin of confidence strategy not implemented")
+            # Calculate uncertainty score as 1 - (difference between top two predictions)
+            df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
+                lambda x: 1 - (np.sort(x)[-1] - np.sort(x)[-2])
+            )
-        # TODO: Implement ratio of confidence strategy
         elif strategy == "ratio-of-confidence":
-            logger.error("Ratio of confidence strategy not implemented")
-            raise NotImplementedError("Ratio of confidence strategy not implemented")
+            logger.info(
+                f"Using ratio of confidence strategy to get top {num_samples} samples"
+            )
+            if len(df["pred_raw"].iloc[0]) < 2:
+                logger.error("pred_raw has less than 2 elements")
+                raise ValueError("pred_raw has less than 2 elements")
+            # Calculate uncertainty score as ratio of top two predictions
+            df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
+                lambda x: np.sort(x)[-2] / np.sort(x)[-1]
+            )
-        # TODO: Implement entropy strategy
         elif strategy == "entropy":
-            logger.error("Entropy strategy not implemented")
-            raise NotImplementedError("Entropy strategy not implemented")
+            logger.info(f"Using entropy strategy to get top {num_samples} samples")
+            # Calculate uncertainty score as entropy of the prediction
+            df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
+                lambda x: -np.sum(x * np.log2(x))
+            )
+            # Normalize the uncertainty score to be between 0 and 1 by dividing by log2 of the number of classes
+            df.loc[:, "uncertainty_score"] = df["uncertainty_score"] / np.log2(
+                self.num_classes
+            )
         else:
             logger.error(f"Unknown strategy: {strategy}")
             raise ValueError(f"Unknown strategy: {strategy}")
+        df = df[
+            ["filepath", "pred_label", "pred_conf", "uncertainty_score", "pred_raw"]
+        ]
+        return df.sort_values(by="uncertainty_score", ascending=False).head(num_samples)
     def sample_diverse(self, df: pd.DataFrame, num_samples: int):
         """
         Sample top `num_samples` diverse samples. Returns a df with filepaths and predicted labels, and confidence scores.
@@ -258,7 +283,7 @@ class ActiveLearner:
                 return;
             }
-            if (e.key === "ArrowUp" || e.key === "Enter") {
+            if (e.key === "ArrowUp") {
                 document.getElementById("submit_btn").click();
             } else if (e.key === "ArrowRight") {
                 document.getElementById("next_btn").click();
@@ -275,107 +300,149 @@ class ActiveLearner:
         filepaths = df["filepath"].tolist()
         with gr.Blocks(head=shortcut_js) as demo:
-            current_index = gr.State(value=0)
-            image = gr.Image(
-                type="filepath", label="Image", value=filepaths[0], height=500
-            )
-            with gr.Row():
-                filename = gr.Textbox(
-                    label="Filename", value=filepaths[0], interactive=False
-                )
+            with gr.Tabs():
+                with gr.Tab("Labeling"):
+                    current_index = gr.State(value=0)
+                    with gr.Row(min_height=500):
+                        image = gr.Image(
+                            type="filepath",
+                            label="Image",
+                            value=filepaths[0],
+                            height=500,
+                        )
-                pred_label = gr.Textbox(
-                    label="Predicted Label",
-                    value=df["pred_label"].iloc[0]
-                    if "pred_label" in df.columns
-                    else "",
-                    interactive=False,
-                )
-                pred_conf = gr.Textbox(
-                    label="Confidence",
-                    value=f"{df['pred_conf'].iloc[0]:.2%}"
-                    if "pred_conf" in df.columns
-                    else "",
-                    interactive=False,
-                )
+                        # Add bar plot with top 5 predictions
+                        with gr.Column():
+                            pred_plot = gr.BarPlot(
+                                x="probability",
+                                y="class",
+                                title="Top 5 Predictions",
+                                x_lim=[0, 1],
+                                value=None
+                                if "pred_raw" not in df.columns
+                                else pd.DataFrame(
+                                    {
+                                        "class": self.class_names,
+                                        "probability": df["pred_raw"].iloc[0],
+                                    }
+                                ).nlargest(5, "probability"),
+                            )
+                            filename = gr.Textbox(
+                                label="Filename", value=filepaths[0], interactive=False
+                            )
+                            pred_label = gr.Textbox(
+                                label="Predicted Label",
+                                value=df["pred_label"].iloc[0]
+                                if "pred_label" in df.columns
+                                else "",
+                                interactive=False,
+                            )
+                            pred_conf = gr.Textbox(
+                                label="Confidence",
+                                value=f"{df['pred_conf'].iloc[0]:.2%}"
+                                if "pred_conf" in df.columns
+                                else "",
+                                interactive=False,
+                            )
+                    category = gr.Radio(
+                        choices=self.class_names,
+                        label="Select Category",
+                        value=df["pred_label"].iloc[0]
+                        if "pred_label" in df.columns
+                        else None,
+                    )
-            category = gr.Radio(
-                choices=self.class_names,
-                label="Select Category",
-                value=df["pred_label"].iloc[0] if "pred_label" in df.columns else None,
-            )
+                    with gr.Row():
+                        back_btn = gr.Button("← Previous", elem_id="back_btn")
+                        submit_btn = gr.Button(
+                            "Submit ↑",
+                            variant="primary",
+                            elem_id="submit_btn",
+                        )
+                        next_btn = gr.Button("Next →", elem_id="next_btn")
+                    progress = gr.Slider(
+                        minimum=0,
+                        maximum=len(filepaths) - 1,
+                        value=0,
+                        step=1,
+                        label="Progress",
+                        interactive=True,
+                    )
-            with gr.Row():
-                back_btn = gr.Button("← Previous", elem_id="back_btn")
-                submit_btn = gr.Button(
-                    "Submit (↑/Enter)",
-                    variant="primary",
-                    elem_id="submit_btn",
-                    interactive=False,
-                )
-                next_btn = gr.Button("Next →", elem_id="next_btn")
-            progress = gr.Slider(
-                minimum=0,
-                maximum=len(filepaths) - 1,
-                value=0,
-                label="Progress",
-                interactive=False,
-            )
+                    # Add event handler for slider changes
+                    progress.change(
+                        fn=lambda idx: navigate(idx, 0),
+                        inputs=[progress],
+                        outputs=[
+                            filename,
+                            image,
+                            pred_label,
+                            pred_conf,
+                            category,
+                            current_index,
+                            progress,
+                            pred_plot,
+                        ],
+                    )
-            finish_btn = gr.Button("Finish Labeling", variant="primary")
+                    finish_btn = gr.Button("Finish Labeling", variant="primary")
-            with gr.Accordion("Zero-Shot Inference", open=False) as zero_shot_accordion:
-                gr.Markdown("""
-                Uses a VLM to predict the label of the image.
-                """)
+                with gr.Tab("Zero-Shot Inference"):
+                    gr.Markdown("""
+                    Uses a VLM to predict the label of the image.
+                    """)
-                import xinfer
-                from xinfer.model_registry import model_registry
-                from xinfer.types import ModelInputOutput
+                    import xinfer
+                    from xinfer.model_registry import model_registry
+                    from xinfer.types import ModelInputOutput
-                # Get models and filter for image-to-text models
-                all_models = model_registry.list_models()
-                model_list = [
-                    model.id
-                    for model in all_models
-                    if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
-                ]
+                    # Get models and filter for image-to-text models
+                    all_models = model_registry.list_models()
+                    model_list = [
+                        model.id
+                        for model in all_models
+                        if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
+                    ]
-                with gr.Row():
                     with gr.Row():
-                        model_dropdown = gr.Dropdown(
-                            choices=model_list,
-                            label="Select a model",
-                            value="vikhyatk/moondream2",
-                        )
-                        device_dropdown = gr.Dropdown(
-                            choices=["cuda", "cpu"],
-                            label="Device",
-                            value="cuda" if torch.cuda.is_available() else "cpu",
+                        with gr.Row():
+                            model_dropdown = gr.Dropdown(
+                                choices=model_list,
+                                label="Select a model",
+                                value="vikhyatk/moondream2",
+                            )
+                            device_dropdown = gr.Dropdown(
+                                choices=["cuda", "cpu"],
+                                label="Device",
+                                value="cuda" if torch.cuda.is_available() else "cpu",
+                            )
+                            dtype_dropdown = gr.Dropdown(
+                                choices=["float32", "float16", "bfloat16"],
+                                label="Data Type",
+                                value="float16"
+                                if torch.cuda.is_available()
+                                else "float32",
+                            )
+                    with gr.Column():
+                        prompt_textbox = gr.Textbox(
+                            label="Prompt",
+                            lines=5,
+                            value=f"Classify the image into one of the following categories: {self.class_names}. Answer with the category name only.",
+                            interactive=True,
                         )
-                        dtype_dropdown = gr.Dropdown(
-                            choices=["float32", "float16", "bfloat16"],
-                            label="Data Type",
-                            value="float16" if torch.cuda.is_available() else "float32",
-                        )
-                with gr.Column():
-                    prompt_textbox = gr.Textbox(
-                        label="Prompt",
-                        lines=3,
-                        value=f"Classify the image into one of the following categories: {self.class_names}",
-                        interactive=True,
-                    )
-                    inference_btn = gr.Button("Run Inference", variant="primary")
+                        inference_btn = gr.Button("Run Inference", variant="primary")
-                    result_textbox = gr.Textbox(
-                        label="Result",
-                        lines=3,
-                        interactive=False,
-                    )
+                        result_textbox = gr.Textbox(
+                            label="Result",
+                            lines=3,
+                            interactive=False,
+                        )
             def run_zero_shot_inference(prompt, model, device, dtype, current_filename):
                 model = xinfer.create_model(model, device=device, dtype=dtype)
@@ -407,6 +474,16 @@ class ActiveLearner:
                 next_idx = current_idx + direction
                 if 0 <= next_idx < len(filepaths):
+                    plot_data = (
+                        None
+                        if "pred_raw" not in df.columns
+                        else pd.DataFrame(
+                            {
+                                "class": self.class_names,
+                                "probability": df["pred_raw"].iloc[next_idx],
+                            }
+                        ).nlargest(5, "probability")
+                    )
                     return (
                         filepaths[next_idx],
                         filepaths[next_idx],
@@ -421,7 +498,18 @@ class ActiveLearner:
                         else None,
                         next_idx,
                         next_idx,
+                        plot_data,
                     )
+                plot_data = (
+                    None
+                    if "pred_raw" not in df.columns
+                    else pd.DataFrame(
+                        {
+                            "class": self.class_names,
+                            "probability": df["pred_raw"].iloc[current_idx],
+                        }
+                    ).nlargest(5, "probability")
+                )
                 return (
                     filepaths[current_idx],
                     filepaths[current_idx],
@@ -436,6 +524,7 @@ class ActiveLearner:
                     else None,
                     current_idx,
                     current_idx,
+                    plot_data,
                 )
             def save_and_next(current_idx, selected_category):
@@ -443,6 +532,16 @@ class ActiveLearner:
                 current_idx = int(current_idx)
                 if selected_category is None:
+                    plot_data = (
+                        None
+                        if "pred_raw" not in df.columns
+                        else pd.DataFrame(
+                            {
+                                "class": self.class_names,
+                                "probability": df["pred_raw"].iloc[current_idx],
+                            }
+                        ).nlargest(5, "probability")
+                    )
                     return (
                         filepaths[current_idx],
                         filepaths[current_idx],
@@ -457,6 +556,7 @@ class ActiveLearner:
                         else None,
                         current_idx,
                         current_idx,
+                        plot_data,
                     )
                 # Save the current annotation
@@ -466,6 +566,16 @@ class ActiveLearner:
                 # Move to next image if not at the end
                 next_idx = current_idx + 1
                 if next_idx >= len(filepaths):
+                    plot_data = (
+                        None
+                        if "pred_raw" not in df.columns
+                        else pd.DataFrame(
+                            {
+                                "class": self.class_names,
+                                "probability": df["pred_raw"].iloc[current_idx],
+                            }
+                        ).nlargest(5, "probability")
+                    )
                     return (
                         filepaths[current_idx],
                         filepaths[current_idx],
@@ -480,7 +590,19 @@ class ActiveLearner:
                         else None,
                         current_idx,
                         current_idx,
+                        plot_data,
                     )
+                plot_data = (
+                    None
+                    if "pred_raw" not in df.columns
+                    else pd.DataFrame(
+                        {
+                            "class": self.class_names,
+                            "probability": df["pred_raw"].iloc[next_idx],
+                        }
+                    ).nlargest(5, "probability")
+                )
                 return (
                     filepaths[next_idx],
                     filepaths[next_idx],
@@ -495,6 +617,7 @@ class ActiveLearner:
                     else None,
                     next_idx,
                     next_idx,
+                    plot_data,
                 )
             def convert_csv_to_parquet():
@@ -519,6 +642,7 @@ class ActiveLearner:
                     category,
                     current_index,
                     progress,
+                    pred_plot,
                 ],
             )
@@ -533,6 +657,7 @@ class ActiveLearner:
                     category,
                     current_index,
                     progress,
+                    pred_plot,
                 ],
             )
@@ -547,6 +672,7 @@ class ActiveLearner:
                     category,
                     current_index,
                     progress,
+                    pred_plot,
                 ],
             )

{active_vision-0.1.0.dist-info → active_vision-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: active-vision
-Version: 0.1.0
+Version: 0.2.0
 Summary: Active learning for edge vision.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -17,10 +17,10 @@ Requires-Dist: timm>=1.0.13
 Requires-Dist: transformers>=4.48.0
 Requires-Dist: xinfer>=0.3.2
-![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
-![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
-[![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge)](https://pypi.org/project/active-vision/)
-![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)
+[![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge&logo=python&logoColor=white)](https://pypi.org/project/active-vision/)
+[![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/active-vision/)
+[![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)](https://pypi.org/project/active-vision/)
+[![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge&logo=apache&logoColor=white)](https://github.com/dnth/active-vision/blob/main/LICENSE)
 <p align="center">
   <img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/logo.png" alt="active-vision">
@@ -47,9 +47,9 @@ The goal of this project is to create a framework for the active learning loop f
 Uncertainty Sampling:
 - [X] Least confidence
-- [ ] Margin of confidence
-- [ ] Ratio of confidence
-- [ ] Entropy
+- [X] Margin of confidence
+- [X] Ratio of confidence
+- [X] Entropy
 Diverse Sampling:
 - [X] Random sampling
@@ -71,17 +71,18 @@ cd active-vision
 pip install -e .
 ```
-I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
-If you're using uv:
-```bash
-uv venv
-uv sync
-```
-Once the virtual environment is created, you can install the package using pip.
 > [!TIP]
+> I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
+>
+> If you're using uv:
+>
+> ```bash
+> uv venv
+> uv sync
+> ```
+> Once the virtual environment is created, you can install the package using pip.
+>
 > If you're using uv add a `uv` before the pip install command to install into your virtual environment. Eg:
 > ```bash
 > uv pip install active-vision
@@ -120,12 +121,16 @@ pred_df = al.predict(filepaths)
 # Sample low confidence predictions from unlabeled set
 uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
-# Launch a Gradio UI to label the low confidence samples
+# Launch a Gradio UI to label the low confidence samples, save the labeled samples to a file
 al.label(uncertain_df, output_filename="uncertain")
 ```
 ![Gradio UI](https://raw.githubusercontent.com/dnth/active-vision/main/assets/labeling_ui.png)
+In the UI, you can optionally run zero-shot inference on the image. This will use a VLM to predict the label of the image. There are a dozen VLM models as supported in the [x.infer project](https://github.com/dnth/x.infer).
+![Zero-Shot Inference](https://raw.githubusercontent.com/dnth/active-vision/main/assets/zero_shot_ui.png)
 Once complete, the labeled samples will be save into a new df.
 We can now add the newly labeled data to the training set.
@@ -167,15 +172,15 @@ The active learning loop is a iterative process and can keep going until you hit
 - You hit a budget.
 - Other criteria.
-For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
+For this dataset, I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set exceeds the top performing model on the leaderboard.
-| #Labeled Images | Evaluation Accuracy | Train Epochs | Model                | Active Learning | Source |
-|-----------------|---------------------|--------------|----------------------|----------------|--------|
-| 9469            | 94.90%              | 80           | xse_resnext50        | ❌             | [Link](https://github.com/fastai/imagenette) |
-| 9469            | 95.11%              | 200          | xse_resnext50        | ❌             | [Link](https://github.com/fastai/imagenette) |
-| 275             | 99.33%               | 6            | convnext_small_in22k | ✓              | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
-| 275             | 93.40%               | 4            | resnet18             | ✓              | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
+| #Labeled Images 	| Evaluation Accuracy 	| Train Epochs 	| Model                	| Active Learning 	| Source                                                                              	|
+|----------------:	|--------------------:	|-------------:	|----------------------	|:---------------:	|-------------------------------------------------------------------------------------	|
+| 9469            	| 94.90%              	| 80           	| xse_resnext50        	| ❌                | [Link](https://github.com/fastai/imagenette)                                        	 |
+| 9469            	| 95.11%              	| 200          	| xse_resnext50        	| ❌                | [Link](https://github.com/fastai/imagenette)                                        	 |
+| 275             	| 99.33%              	| 6            	| convnext_small_in22k 	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) 	|
+| 275             	| 93.40%              	| 4            	| resnet18             	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb)   	|
 ### Dog Food
 - num classes: 2
@@ -185,11 +190,11 @@ To start the active learning loop, I labeled 20 images (10 images from each clas
 I decided to stop the active learning loop at 160 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
-| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
-|-----------------|---------------------|--------------|-------|----------------|--------|
-| 2100            | 99.70%              | ?            | vit-base-patch16-224   | ❌             | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
-| 160             | 100.00%             | 6            | convnext_small_in22k   | ✓              | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
-| 160             | 97.60%              | 4            | resnet18              | ✓              | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
+| #Labeled Images 	| Evaluation Accuracy 	| Train Epochs 	| Model                	| Active Learning 	| Source                                                                                      	|
+|----------------:	|--------------------:	|-------------:	|----------------------	|:---------------:	|---------------------------------------------------------------------------------------------	|
+| 2100            	| 99.70%              	| ?            	| vit-base-patch16-224 	| ❌               	| [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food)                               	|
+| 160             	| 100.00%             	| 6            	| convnext_small_in22k 	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) 	|
+| 160             	| 97.60%              	| 4            	| resnet18             	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) 	|
 ### Oxford-IIIT Pet
 - num classes: 37
@@ -199,13 +204,27 @@ To start the active learning loop, I labeled 370 images (10 images from each cla
 I decided to stop the active learning loop at 612 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
-| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
-|-----------------|---------------------|--------------|-------|----------------|--------|
-| 3680             | 95.40%              | 5           | vit-base-patch16-224   | ❌              | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
-| 612             | 90.26%              | 11            | convnext_small_in22k              | ✓              | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
-| 612             | 91.38%              | 11            | vit-base-patch16-224              | ✓              | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
+| #Labeled Images 	| Evaluation Accuracy 	| Train Epochs 	| Model                	| Active Learning 	| Source                                                                                          	|
+|----------------:	|--------------------:	|-------------:	|----------------------	|:---------------:	|-------------------------------------------------------------------------------------------------	|
+| 3680            	| 95.40%              	| 5            	| vit-base-patch16-224 	| ❌                | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets)                             	 |
+| 612             	| 90.26%              	| 11           	| convnext_small_in22k 	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb)     	|
+| 612             	| 91.38%              	| 11           	| vit-base-patch16-224 	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) 	|
+### Eurosat RGB
+- num classes: 10
+- num images: 16100
+To start the active learning loop, I labeled 100 images (10 images from each class) and iteratively labeled the most informative images until I hit 1188 labeled images.
+I decided to stop the active learning loop at 1188 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
+| #Labeled Images 	| Evaluation Accuracy 	| Train Epochs 	| Model                	| Active Learning 	| Source                                                                                     	|
+|----------------:	|--------------------:	|-------------:	|----------------------	|:---------------:	|--------------------------------------------------------------------------------------------	|
+| 16100           	| 98.55%              	| 6            	| vit-base-patch16-224 	| ❌                | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/03_train_all.ipynb)  |
+| 1188            	| 94.59%              	| 6            	| vit-base-patch16-224 	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb)     	|
+| 1188            	| 96.57%              	| 13           	| vit-base-patch16-224 	| ✓               	| [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb)     	|
 ## ➿ Workflow
 This section describes a more detailed workflow for active learning. There are two workflows for active learning that we can use depending on the availability of labeled data.
@@ -273,55 +292,21 @@ graph TD
-<!-- ## Methodology
-To test out the workflows we will use the [imagenette dataset](https://huggingface.co/datasets/frgfm/imagenette). But this will be applicable to any dataset.
-Imagenette is a subset of the ImageNet dataset with 10 classes. We will use this dataset to test out the workflows. Additionally, Imagenette has an existing leaderboard which we can use to evaluate the performance of the models.
-### Step 1: Download the dataset
-Download the imagenette dataset. The imagenette dataset has a train and validation split. Since the leaderboard is based on the validation set, we will evalutate the performance of our model on the validation set to make it easier to compare to the leaderboard.
-We will treat the imagenette train set as a unlabeled set and iteratively sample from it while monitoring the performance on the validation set. Ideally we will be able to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
+## 🧱 Sampling Approaches
-I've processed the imagenette dataset and uploaded it to the hub. You can download it from [here](https://huggingface.co/datasets/dnth/active-learning-imagenette).
+Recommendation 1:
+- 10% randomly selected from unlabeled items.
+- 80% selected from the lowest confidence items.
+- 10% selected as outliers.
-To load the dataset, you can use the following code:
-```python
-from datasets import load_dataset
-unlabeled_dataset = load_dataset("dnth/active-learning-imagenette", "unlabeled")
-eval_dataset = load_dataset("dnth/active-learning-imagenette", "evaluation")
-```
+Recommendation 2:
-### Step 2: Initial Sampling
-Label an initial dataset of 10 images from each class. This will give us a small proxy dataset to train our model on. The sampling will be done randomly. There are more intelligent sampling strategies but we will start with random sampling.
+- Sample 100 predicted images at 10–20% confidence.
+- Sample 100 predicted images at 20–30% confidence.
+- Sample 100 predicted images at 30–40% confidence, and so on.
-### Step 3: Training the proxy model
-Train a proxy model on the initial dataset. The proxy model will be a small model that is easy to train and deploy. We will use the fastai framework to train the model. We will use the resnet18 architecture as a starting point. Once training is complete, compute the accuracy of the proxy model on the validation set and compare it to the leaderboard.
-> [!TIP]
-> With the initial model we got 91.24% accuracy on the validation set. See the [notebook](./nbs/01_initial_sampling.ipynb) for more details.
-> | Train Epochs | Number of Images | Validation Accuracy |      Source      |
-> |--------------|-----------------|----------------------|------------------|
-> | 10           | 100             | 91.24%               | Initial sampling [notebook](./nbs/01_initial_sampling.ipynb) |
-> | 80           | 9469            | 94.90%               | fastai |
-> | 200          | 9469            | 95.11%               | fastai |
+Uncertainty and diversity sampling are most effective when combined. For instance, you could first sample the most uncertain items using an uncertainty sampling method, then apply a diversity sampling method such as clustering to select a diverse set from the uncertain items.
+Ultimately, the right ratios can depend on the specific task and dataset.
-### Step 4: Inference on the unlabeled dataset
-Run inference on the unlabeled dataset (the remaining imagenette train set) and evaluate the performance of the proxy model.
-### Step 5: Active learning
-Use active learning to select the most informative images to label from the unlabeled set. Pick the top 10 images from the unlabeled set that the proxy model is least confident about and label them.
-### Step 6: Repeat
-Repeat step 3 - 5 until the performance on the validation set is close to the leaderboard. Note the number of labeled images vs the performance on the validation set. Ideally we want to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
-After the first iteration we got 94.57% accuracy on the validation set. See the [notebook](./nbs/03_retrain_model.ipynb) for more details.
-> [!TIP]
-> | Train Epochs | Number of Images | Validation Accuracy |      Source      |
-> |--------------|-----------------|----------------------|------------------|
-> | 10           | 200             | 94.57%               | First relabeling [notebook](./nbs/03_retrain_model.ipynb) | -->

active_vision-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+active_vision/__init__.py,sha256=SxR6MPyULKlvx-86S3NIk46Tz1xlN-g_vI_aW3LitG4,43
+active_vision/core.py,sha256=4Nl8e3isinIlzcD6bCbG9TTGiuG0PQkKNUIvnAsbaTY,27373
+active_vision-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+active_vision-0.2.0.dist-info/METADATA,sha256=3XvDTC1Cnxd3rIUUXyY8MwTgKGcnncN9D2VvKnkw1jQ,15675
+active_vision-0.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+active_vision-0.2.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
+active_vision-0.2.0.dist-info/RECORD,,

active_vision-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-active_vision/__init__.py,sha256=dDQijes3C7zAUc_08TyblLSP6Lk0PcPPI8PYgEliKCI,43
-active_vision/core.py,sha256=D_ve-nMv2EWSaQCOBTggleo-1op8JHXchk0QLicGDqg,21715
-active_vision-0.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-active_vision-0.1.0.dist-info/METADATA,sha256=aA793OK3PGKnKVchMQthXl1H14xcBh_kq9tAO9o6jf0,15944
-active_vision-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-active_vision-0.1.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
-active_vision-0.1.0.dist-info/RECORD,,

{active_vision-0.1.0.dist-info → active_vision-0.2.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{active_vision-0.1.0.dist-info → active_vision-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{active_vision-0.1.0.dist-info → active_vision-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

active-vision 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

active-vision 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl