PyPI - active-vision - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

active-vision 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

active_vision/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
-__version__ = "0.0.1"
+__version__ = "0.0.2"
+from .core import *

active_vision/core.py ADDED Viewed

@@ -0,0 +1,149 @@
+import pandas as pd
+from loguru import logger
+from fastai.vision.models import resnet18, resnet34
+from fastai.callback.all import ShowGraphCallback
+from fastai.vision.all import (
+    ImageDataLoaders,
+    aug_transforms,
+    Resize,
+    vision_learner,
+    accuracy,
+    valley,
+    slide,
+    minimum,
+    steep,
+)
+import torch
+import torch.nn.functional as F
+import warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+class ActiveLearner:
+    def __init__(self, model_name: str):
+        self.model = self.load_model(model_name)
+    def load_model(self, model_name: str):
+        models = {"resnet18": resnet18, "resnet34": resnet34}
+        logger.info(f"Loading model {model_name}")
+        if model_name not in models:
+            logger.error(f"Model {model_name} not found")
+            raise ValueError(f"Model {model_name} not found")
+        return models[model_name]
+    def load_dataset(
+        self,
+        df: pd.DataFrame,
+        filepath_col: str,
+        label_col: str,
+        valid_pct: float = 0.2,
+        batch_size: int = 16,
+        image_size: int = 224,
+    ):
+        logger.info(f"Loading dataset from {filepath_col} and {label_col}")
+        self.train_set = df.copy()
+        logger.info("Creating dataloaders")
+        self.dls = ImageDataLoaders.from_df(
+            df,
+            path=".",
+            valid_pct=valid_pct,
+            fn_col=filepath_col,
+            label_col=label_col,
+            bs=batch_size,
+            item_tfms=Resize(image_size),
+            batch_tfms=aug_transforms(size=image_size, min_scale=0.75),
+        )
+        logger.info("Creating learner")
+        self.learn = vision_learner(self.dls, self.model, metrics=accuracy).to_fp16()
+        self.class_names = self.dls.vocab
+        logger.info("Done. Ready to train.")
+    def lr_find(self):
+        logger.info("Finding optimal learning rate")
+        self.lrs = self.learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
+        logger.info(f"Optimal learning rate: {self.lrs.valley}")
+    def train(self, epochs: int, lr: float):
+        logger.info(f"Training for {epochs} epochs with learning rate: {lr}")
+        self.learn.fine_tune(epochs, lr, cbs=[ShowGraphCallback()])
+    def predict(self, filepaths: list[str], batch_size: int = 16):
+        """
+        Run inference on an unlabeled dataset. Returns a df with filepaths and predicted labels, and confidence scores.
+        """
+        logger.info(f"Running inference on {len(filepaths)} samples")
+        test_dl = self.dls.test_dl(filepaths, bs=batch_size)
+        preds, _, cls_preds = self.learn.get_preds(dl=test_dl, with_decoded=True)
+        self.pred_df = pd.DataFrame(
+            {
+                "filepath": filepaths,
+                "pred_label": [self.learn.dls.vocab[i] for i in cls_preds.numpy()],
+                "pred_conf": torch.max(F.softmax(preds, dim=1), dim=1)[0].numpy(),
+            }
+        )
+        return self.pred_df
+    def evaluate(self, df: pd.DataFrame, filepath_col: str, label_col: str, batch_size: int = 16):
+        """
+        Evaluate on a labeled dataset. Returns a score.
+        """
+        self.eval_set = df.copy()
+        filepaths = self.eval_set[filepath_col].tolist()
+        labels = self.eval_set[label_col].tolist()
+        test_dl = self.dls.test_dl(filepaths, bs=batch_size)
+        preds, _, cls_preds = self.learn.get_preds(dl=test_dl, with_decoded=True)
+        self.eval_df = pd.DataFrame(
+            {
+                "filepath": filepaths,
+                "label": labels,
+                "pred_label": [self.learn.dls.vocab[i] for i in cls_preds.numpy()],
+            }
+        )
+        accuracy = float((self.eval_df["label"] == self.eval_df["pred_label"]).mean())
+        logger.info(f"Accuracy: {accuracy:.2%}")
+        return accuracy
+    def sample_uncertain(self, df: pd.DataFrame, num_samples: int):
+        """
+        Sample top `num_samples` low confidence samples. Returns a df with filepaths and predicted labels, and confidence scores.
+        """
+        uncertain_df = df.sort_values(
+            by="pred_conf", ascending=True
+        ).head(num_samples)
+        return uncertain_df
+    def add_to_train_set(self, df: pd.DataFrame):
+        """
+        Add samples to the training set.
+        """
+        new_train_set = df.copy()
+        new_train_set.drop(columns=["pred_conf"], inplace=True)
+        new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
+        len_old = len(self.train_set)
+        logger.info(f"Adding {len(new_train_set)} samples to training set")
+        self.train_set = pd.concat([self.train_set, new_train_set])
+        self.train_set = self.train_set.drop_duplicates(
+            subset=["filepath"], keep="last"
+        )
+        self.train_set.reset_index(drop=True, inplace=True)
+        if len(self.train_set) == len_old:
+            logger.warning("No new samples added to training set")
+        elif len_old + len(new_train_set) < len(self.train_set):
+            logger.warning("Some samples were duplicates and removed from training set")
+        else:
+            logger.info("All new samples added to training set")
+            logger.info(f"Training set now has {len(self.train_set)} samples")

{active_vision-0.0.1.dist-info → active_vision-0.0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: active-vision
-Version: 0.0.1
+Version: 0.0.2
 Summary: Active learning for edge vision.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -14,9 +14,11 @@ Requires-Dist: seaborn>=0.13.2
 ![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
 ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
+![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge)
+![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)
 <p align="center">
-  <img src="./assets/logo.png" alt="active-vision">
+  <img src="https://github.com/dnth/active-vision/blob/main/assets/logo.png" alt="active-vision">
 </p>
 Active learning at the edge for computer vision.
@@ -44,43 +46,37 @@ cd active-vision
 pip install -e .
 ```
-## Usage [WIP]
+## Usage
+See the [notebook](./nbs/end-to-end.ipynb) for a complete example.
 ```python
-import active_vision as av
+from active_vision import ActiveLearner
+import pandas as pd
-# Load a model
-model = av.load_model("resnet18")
+# Create an active learner instance with a model
+al = ActiveLearner("resnet18")
-# Load a dataset
-dataset = av.load_dataset(df)
-# Inital sampling
-dataset = av.initial_sampling(dataset, n_samples=10)
+# Load the dataset into the active learner
+train_df = pd.read_parquet("training_samples.parquet")
+al.load_dataset(train_df, "filepath", "label")
 # Train the model
-model.train()
-# Save the model
-model.save()
-# Evaluate the model
-model.evaluate(df)
+al.train(epochs=3, lr=1e-3)
-# Uncertainty sampling to get the lowest confidence images
-model.uncertainty_sampling()
+# Load evaluation data
+eval_df = pd.read_parquet("evaluation_samples.parquet")
-# Diversity sampling to get the most diverse images (outliers)
-model.diversity_sampling()
+# Evaluate the model on a labeled evaluation set
+accuracy = al.evaluate(eval_df, "filepath", "label")
-# Random sampling
-model.random_sampling()
+# Get predictions from an unlabeled set
+pred_df = al.predict(filepaths)
-# Merge the datasets
-dataset = av.merge_datasets(dataset, dataset_2)
+# Sample low confidence predictions
+uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
-# Launch a streamlit app to label the images
-av.label_images(dataset)
+# Add newly labeled data to training set
+al.add_to_train_set(uncertain_df)
 ```
 ## Workflow

active_vision-0.0.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+active_vision/__init__.py,sha256=5VE_DRQ_Rgbo7NlPh3-rP2pUClK48jGxPqAcptBscZ8,43
+active_vision/core.py,sha256=RBVabC350wucYl7KJgIp3fc1pS9pxtG14iDb-ZyBJxI,5262
+active_vision-0.0.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+active_vision-0.0.2.dist-info/METADATA,sha256=7_eqZJnGeIPjb4LLZ-Bqu1AMJ_h77_0bNRyS_COEv5w,8350
+active_vision-0.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+active_vision-0.0.2.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
+active_vision-0.0.2.dist-info/RECORD,,

active_vision-0.0.1.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-active_vision/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
-active_vision-0.0.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-active_vision-0.0.1.dist-info/METADATA,sha256=lPOTTVSPAaX3Rn9Q1ci_jgoQOC-HFpQIyTNqrouOYEs,7936
-active_vision-0.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-active_vision-0.0.1.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
-active_vision-0.0.1.dist-info/RECORD,,

{active_vision-0.0.1.dist-info → active_vision-0.0.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{active_vision-0.0.1.dist-info → active_vision-0.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{active_vision-0.0.1.dist-info → active_vision-0.0.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

active-vision 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

active-vision 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl