PyPI - active-vision - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

active-vision 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

active_vision/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-__version__ = "0.0.5"
+__version__ = "0.1.0"
 from .core import *

active_vision/core.py CHANGED Viewed

@@ -1,17 +1,6 @@
 import pandas as pd
 from loguru import logger
-from fastai.callback.all import ShowGraphCallback
-from fastai.vision.all import (
-    ImageDataLoaders,
-    aug_transforms,
-    Resize,
-    vision_learner,
-    accuracy,
-    valley,
-    slide,
-    minimum,
-    steep,
-)
+from fastai.vision.all import *
 import torch
 import torch.nn.functional as F
@@ -22,7 +11,28 @@ warnings.filterwarnings("ignore", category=FutureWarning)
 class ActiveLearner:
-    def __init__(self, model_name: str):
+    """
+    Active Learning framework for computer vision tasks.
+    Attributes:
+        Model Related:
+            model: The base model architecture (str or Callable)
+            learn: fastai Learner object for training
+            lrs: Learning rate finder results
+        Data Related:
+            train_set (pd.DataFrame): Training dataset
+            eval_set (pd.DataFrame): Evaluation dataset with ground truth labels
+            dls: fastai DataLoaders object
+            class_names: List of class names from the dataset
+            num_classes (int): Number of classes in the dataset
+        Prediction Related:
+            pred_df (pd.DataFrame): Predictions on a dataframe
+            eval_df (pd.DataFrame): Predictions on evaluation data
+    """
+    def __init__(self, model_name: str | Callable):
         self.model = self.load_model(model_name)
     def load_model(self, model_name: str | Callable):
@@ -43,6 +53,7 @@ class ActiveLearner:
         batch_size: int = 16,
         image_size: int = 224,
         batch_tfms: Callable = None,
+        learner_path: str = None,
     ):
         logger.info(f"Loading dataset from {filepath_col} and {label_col}")
         self.train_set = df.copy()
@@ -58,22 +69,66 @@ class ActiveLearner:
             item_tfms=Resize(image_size),
             batch_tfms=batch_tfms,
         )
-        logger.info("Creating learner")
-        self.learn = vision_learner(self.dls, self.model, metrics=accuracy).to_fp16()
+        if learner_path:
+            logger.info(f"Loading learner from {learner_path}")
+            gpu_available = torch.cuda.is_available()
+            if gpu_available:
+                logger.info(f"Loading learner on GPU.")
+            else:
+                logger.info(f"Loading learner on CPU.")
+            self.learn = load_learner(learner_path, cpu=not gpu_available)
+        else:
+            logger.info("Creating learner")
+            self.learn = vision_learner(
+                self.dls, self.model, metrics=accuracy
+            ).to_fp16()
         self.class_names = self.dls.vocab
+        self.num_classes = self.dls.c
         logger.info("Done. Ready to train.")
-    def show_batch(self):
-        self.dls.show_batch()
+    def show_batch(
+        self,
+        num_samples: int = 9,
+        unique: bool = False,
+        num_rows: int = None,
+        num_cols: int = None,
+    ):
+        """
+        Show a batch of images from the dataset.
+        Args:
+            num_samples: Number of samples to show.
+            unique: Whether to show unique samples.
+            num_rows: Number of rows in the grid.
+            num_cols: Number of columns in the grid.
+        """
+        self.dls.show_batch(
+            max_n=num_samples, unique=unique, nrows=num_rows, ncols=num_cols
+        )
     def lr_find(self):
         logger.info("Finding optimal learning rate")
         self.lrs = self.learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
         logger.info(f"Optimal learning rate: {self.lrs.valley}")
-    def train(self, epochs: int, lr: float):
-        logger.info(f"Training for {epochs} epochs with learning rate: {lr}")
-        self.learn.fine_tune(epochs, lr, cbs=[ShowGraphCallback()])
+    def train(self, epochs: int, lr: float, head_tuning_epochs: int = 1):
+        """
+        Train the model.
+        Args:
+            epochs: Number of epochs to train for.
+            lr: Learning rate.
+            head_tuning_epochs: Number of epochs to train the head.
+        """
+        logger.info(f"Training head for {head_tuning_epochs} epochs")
+        logger.info(f"Training model end-to-end for {epochs} epochs")
+        logger.info(f"Learning rate: {lr} with one-cycle learning rate scheduler")
+        self.learn.fine_tune(
+            epochs, lr, freeze_epochs=head_tuning_epochs, cbs=[ShowGraphCallback()]
+        )
     def predict(self, filepaths: list[str], batch_size: int = 16):
         """
@@ -131,11 +186,17 @@ class ActiveLearner:
         """
         # Remove samples that is already in the training set
-        df = df[~df["filepath"].isin(self.train_set["filepath"])]
+        df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
         if strategy == "least-confidence":
             logger.info(f"Getting top {num_samples} low confidence samples")
-            uncertain_df = df.sort_values(by="pred_conf", ascending=True).head(
+            df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
+                self.num_classes - (self.num_classes - 1)
+            )
+            # Sort by descending uncertainty score
+            uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
                 num_samples
             )
             return uncertain_df
@@ -197,15 +258,15 @@ class ActiveLearner:
                 return;
             }
-            if (e.key.toLowerCase() == "w") {
+            if (e.key === "ArrowUp" || e.key === "Enter") {
                 document.getElementById("submit_btn").click();
-            } else if (e.key.toLowerCase() == "d") {
+            } else if (e.key === "ArrowRight") {
                 document.getElementById("next_btn").click();
-            } else if (e.key.toLowerCase() == "a") {
+            } else if (e.key === "ArrowLeft") {
                 document.getElementById("back_btn").click();
             }
         }
-        document.addEventListener('keypress', shortcuts, false);
+        document.addEventListener('keydown', shortcuts, false);
         </script>
         """
@@ -216,24 +277,45 @@ class ActiveLearner:
         with gr.Blocks(head=shortcut_js) as demo:
             current_index = gr.State(value=0)
-            filename = gr.Textbox(
-                label="Filename", value=filepaths[0], interactive=False
-            )
             image = gr.Image(
                 type="filepath", label="Image", value=filepaths[0], height=500
             )
-            category = gr.Radio(choices=self.class_names, label="Select Category")
             with gr.Row():
-                back_btn = gr.Button("← Previous (A)", elem_id="back_btn")
+                filename = gr.Textbox(
+                    label="Filename", value=filepaths[0], interactive=False
+                )
+                pred_label = gr.Textbox(
+                    label="Predicted Label",
+                    value=df["pred_label"].iloc[0]
+                    if "pred_label" in df.columns
+                    else "",
+                    interactive=False,
+                )
+                pred_conf = gr.Textbox(
+                    label="Confidence",
+                    value=f"{df['pred_conf'].iloc[0]:.2%}"
+                    if "pred_conf" in df.columns
+                    else "",
+                    interactive=False,
+                )
+            category = gr.Radio(
+                choices=self.class_names,
+                label="Select Category",
+                value=df["pred_label"].iloc[0] if "pred_label" in df.columns else None,
+            )
+            with gr.Row():
+                back_btn = gr.Button("← Previous", elem_id="back_btn")
                 submit_btn = gr.Button(
-                    "Submit (W)",
+                    "Submit (↑/Enter)",
                     variant="primary",
                     elem_id="submit_btn",
                     interactive=False,
                 )
-                next_btn = gr.Button("Next → (D)", elem_id="next_btn")
+                next_btn = gr.Button("Next →", elem_id="next_btn")
             progress = gr.Slider(
                 minimum=0,
@@ -245,6 +327,73 @@ class ActiveLearner:
             finish_btn = gr.Button("Finish Labeling", variant="primary")
+            with gr.Accordion("Zero-Shot Inference", open=False) as zero_shot_accordion:
+                gr.Markdown("""
+                Uses a VLM to predict the label of the image.
+                """)
+                import xinfer
+                from xinfer.model_registry import model_registry
+                from xinfer.types import ModelInputOutput
+                # Get models and filter for image-to-text models
+                all_models = model_registry.list_models()
+                model_list = [
+                    model.id
+                    for model in all_models
+                    if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
+                ]
+                with gr.Row():
+                    with gr.Row():
+                        model_dropdown = gr.Dropdown(
+                            choices=model_list,
+                            label="Select a model",
+                            value="vikhyatk/moondream2",
+                        )
+                        device_dropdown = gr.Dropdown(
+                            choices=["cuda", "cpu"],
+                            label="Device",
+                            value="cuda" if torch.cuda.is_available() else "cpu",
+                        )
+                        dtype_dropdown = gr.Dropdown(
+                            choices=["float32", "float16", "bfloat16"],
+                            label="Data Type",
+                            value="float16" if torch.cuda.is_available() else "float32",
+                        )
+                with gr.Column():
+                    prompt_textbox = gr.Textbox(
+                        label="Prompt",
+                        lines=3,
+                        value=f"Classify the image into one of the following categories: {self.class_names}",
+                        interactive=True,
+                    )
+                    inference_btn = gr.Button("Run Inference", variant="primary")
+                    result_textbox = gr.Textbox(
+                        label="Result",
+                        lines=3,
+                        interactive=False,
+                    )
+            def run_zero_shot_inference(prompt, model, device, dtype, current_filename):
+                model = xinfer.create_model(model, device=device, dtype=dtype)
+                result = model.infer(current_filename, prompt).text
+                return result
+            inference_btn.click(
+                fn=run_zero_shot_inference,
+                inputs=[
+                    prompt_textbox,
+                    model_dropdown,
+                    device_dropdown,
+                    dtype_dropdown,
+                    filename,
+                ],
+                outputs=[result_textbox],
+            )
             def update_submit_btn(choice):
                 return gr.Button(interactive=choice is not None)
@@ -253,21 +402,59 @@ class ActiveLearner:
             )
             def navigate(current_idx, direction):
+                # Convert current_idx to int before arithmetic
+                current_idx = int(current_idx)
                 next_idx = current_idx + direction
                 if 0 <= next_idx < len(filepaths):
-                    return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
+                    return (
+                        filepaths[next_idx],
+                        filepaths[next_idx],
+                        df["pred_label"].iloc[next_idx]
+                        if "pred_label" in df.columns
+                        else "",
+                        f"{df['pred_conf'].iloc[next_idx]:.2%}"
+                        if "pred_conf" in df.columns
+                        else "",
+                        df["pred_label"].iloc[next_idx]
+                        if "pred_label" in df.columns
+                        else None,
+                        next_idx,
+                        next_idx,
+                    )
                 return (
                     filepaths[current_idx],
                     filepaths[current_idx],
+                    df["pred_label"].iloc[current_idx]
+                    if "pred_label" in df.columns
+                    else "",
+                    f"{df['pred_conf'].iloc[current_idx]:.2%}"
+                    if "pred_conf" in df.columns
+                    else "",
+                    df["pred_label"].iloc[current_idx]
+                    if "pred_label" in df.columns
+                    else None,
                     current_idx,
                     current_idx,
                 )
             def save_and_next(current_idx, selected_category):
+                # Convert current_idx to int before arithmetic
+                current_idx = int(current_idx)
                 if selected_category is None:
                     return (
                         filepaths[current_idx],
                         filepaths[current_idx],
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else "",
+                        f"{df['pred_conf'].iloc[current_idx]:.2%}"
+                        if "pred_conf" in df.columns
+                        else "",
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else None,
                         current_idx,
                         current_idx,
                     )
@@ -282,10 +469,33 @@ class ActiveLearner:
                     return (
                         filepaths[current_idx],
                         filepaths[current_idx],
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else "",
+                        f"{df['pred_conf'].iloc[current_idx]:.2%}"
+                        if "pred_conf" in df.columns
+                        else "",
+                        df["pred_label"].iloc[current_idx]
+                        if "pred_label" in df.columns
+                        else None,
                         current_idx,
                         current_idx,
                     )
-                return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
+                return (
+                    filepaths[next_idx],
+                    filepaths[next_idx],
+                    df["pred_label"].iloc[next_idx]
+                    if "pred_label" in df.columns
+                    else "",
+                    f"{df['pred_conf'].iloc[next_idx]:.2%}"
+                    if "pred_conf" in df.columns
+                    else "",
+                    df["pred_label"].iloc[next_idx]
+                    if "pred_label" in df.columns
+                    else None,
+                    next_idx,
+                    next_idx,
+                )
             def convert_csv_to_parquet():
                 try:
@@ -301,19 +511,43 @@ class ActiveLearner:
             back_btn.click(
                 fn=lambda idx: navigate(idx, -1),
                 inputs=[current_index],
-                outputs=[filename, image, current_index, progress],
+                outputs=[
+                    filename,
+                    image,
+                    pred_label,
+                    pred_conf,
+                    category,
+                    current_index,
+                    progress,
+                ],
             )
             next_btn.click(
                 fn=lambda idx: navigate(idx, 1),
                 inputs=[current_index],
-                outputs=[filename, image, current_index, progress],
+                outputs=[
+                    filename,
+                    image,
+                    pred_label,
+                    pred_conf,
+                    category,
+                    current_index,
+                    progress,
+                ],
             )
             submit_btn.click(
                 fn=save_and_next,
                 inputs=[current_index, category],
-                outputs=[filename, image, current_index, progress],
+                outputs=[
+                    filename,
+                    image,
+                    pred_label,
+                    pred_conf,
+                    category,
+                    current_index,
+                    progress,
+                ],
             )
             finish_btn.click(fn=convert_csv_to_parquet)
@@ -325,10 +559,6 @@ class ActiveLearner:
         Add samples to the training set.
         """
         new_train_set = df.copy()
-        # new_train_set.drop(columns=["pred_conf"], inplace=True)
-        # new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
-        # len_old = len(self.train_set)
         logger.info(f"Adding {len(new_train_set)} samples to training set")
         self.train_set = pd.concat([self.train_set, new_train_set])
@@ -340,13 +570,3 @@ class ActiveLearner:
         self.train_set.to_parquet(f"{output_filename}.parquet")
         logger.info(f"Saved training set to {output_filename}.parquet")
-        # if len(self.train_set) == len_old:
-        #     logger.warning("No new samples added to training set")
-        # elif len_old + len(new_train_set) < len(self.train_set):
-        #     logger.warning("Some samples were duplicates and removed from training set")
-        # else:
-        #     logger.info("All new samples added to training set")
-        #     logger.info(f"Training set now has {len(self.train_set)} samples")

{active_vision-0.0.5.dist-info → active_vision-0.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,11 @@
 Metadata-Version: 2.2
 Name: active-vision
-Version: 0.0.5
+Version: 0.1.0
 Summary: Active learning for edge vision.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: accelerate>=1.2.1
 Requires-Dist: datasets>=3.2.0
 Requires-Dist: fastai>=2.7.18
 Requires-Dist: gradio>=5.12.0
@@ -13,6 +14,8 @@ Requires-Dist: ipywidgets>=8.1.5
 Requires-Dist: loguru>=0.7.3
 Requires-Dist: seaborn>=0.13.2
 Requires-Dist: timm>=1.0.13
+Requires-Dist: transformers>=4.48.0
+Requires-Dist: xinfer>=0.3.2
 ![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
 ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)

active_vision-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+active_vision/__init__.py,sha256=dDQijes3C7zAUc_08TyblLSP6Lk0PcPPI8PYgEliKCI,43
+active_vision/core.py,sha256=D_ve-nMv2EWSaQCOBTggleo-1op8JHXchk0QLicGDqg,21715
+active_vision-0.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+active_vision-0.1.0.dist-info/METADATA,sha256=aA793OK3PGKnKVchMQthXl1H14xcBh_kq9tAO9o6jf0,15944
+active_vision-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+active_vision-0.1.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
+active_vision-0.1.0.dist-info/RECORD,,

active_vision-0.0.5.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-active_vision/__init__.py,sha256=u-7eEtxmLFoQfY0fM9JSs_lWb4e1c7WxR3cC619BTXE,43
-active_vision/core.py,sha256=mKS-ZZunjPgXuavm_J4oYiO9lm6UNRjFEzIn4kNfdVA,13421
-active_vision-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-active_vision-0.0.5.dist-info/METADATA,sha256=mSFB-DeJ43roTwswTp3oHcG3CIyKnO-7ZCqaYbw26eQ,15846
-active_vision-0.0.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-active_vision-0.0.5.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
-active_vision-0.0.5.dist-info/RECORD,,

{active_vision-0.0.5.dist-info → active_vision-0.1.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{active_vision-0.0.5.dist-info → active_vision-0.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{active_vision-0.0.5.dist-info → active_vision-0.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

active-vision 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl

active-vision 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl