active-vision 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
active_vision/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- __version__ = "0.0.4"
1
+ __version__ = "0.1.0"
2
2
 
3
3
  from .core import *
active_vision/core.py CHANGED
@@ -1,37 +1,48 @@
1
1
  import pandas as pd
2
2
  from loguru import logger
3
- from fastai.vision.models import resnet18, resnet34
4
- from fastai.callback.all import ShowGraphCallback
5
- from fastai.vision.all import (
6
- ImageDataLoaders,
7
- aug_transforms,
8
- Resize,
9
- vision_learner,
10
- accuracy,
11
- valley,
12
- slide,
13
- minimum,
14
- steep,
15
- )
3
+ from fastai.vision.all import *
16
4
  import torch
17
5
  import torch.nn.functional as F
18
6
 
19
7
  import warnings
8
+ from typing import Callable
20
9
 
21
10
  warnings.filterwarnings("ignore", category=FutureWarning)
22
11
 
23
12
 
24
13
  class ActiveLearner:
25
- def __init__(self, model_name: str):
14
+ """
15
+ Active Learning framework for computer vision tasks.
16
+
17
+ Attributes:
18
+ Model Related:
19
+ model: The base model architecture (str or Callable)
20
+ learn: fastai Learner object for training
21
+ lrs: Learning rate finder results
22
+
23
+ Data Related:
24
+ train_set (pd.DataFrame): Training dataset
25
+ eval_set (pd.DataFrame): Evaluation dataset with ground truth labels
26
+ dls: fastai DataLoaders object
27
+ class_names: List of class names from the dataset
28
+ num_classes (int): Number of classes in the dataset
29
+
30
+ Prediction Related:
31
+ pred_df (pd.DataFrame): Predictions on a dataframe
32
+ eval_df (pd.DataFrame): Predictions on evaluation data
33
+ """
34
+
35
+ def __init__(self, model_name: str | Callable):
26
36
  self.model = self.load_model(model_name)
27
37
 
28
- def load_model(self, model_name: str):
29
- models = {"resnet18": resnet18, "resnet34": resnet34}
30
- logger.info(f"Loading model {model_name}")
31
- if model_name not in models:
32
- logger.error(f"Model {model_name} not found")
33
- raise ValueError(f"Model {model_name} not found")
34
- return models[model_name]
38
+ def load_model(self, model_name: str | Callable):
39
+ if isinstance(model_name, Callable):
40
+ logger.info(f"Loading fastai model {model_name.__name__}")
41
+ return model_name
42
+
43
+ if isinstance(model_name, str):
44
+ logger.info(f"Loading timm model {model_name}")
45
+ return model_name
35
46
 
36
47
  def load_dataset(
37
48
  self,
@@ -41,6 +52,8 @@ class ActiveLearner:
41
52
  valid_pct: float = 0.2,
42
53
  batch_size: int = 16,
43
54
  image_size: int = 224,
55
+ batch_tfms: Callable = None,
56
+ learner_path: str = None,
44
57
  ):
45
58
  logger.info(f"Loading dataset from {filepath_col} and {label_col}")
46
59
  self.train_set = df.copy()
@@ -54,21 +67,68 @@ class ActiveLearner:
54
67
  label_col=label_col,
55
68
  bs=batch_size,
56
69
  item_tfms=Resize(image_size),
57
- batch_tfms=aug_transforms(size=image_size, min_scale=0.75),
70
+ batch_tfms=batch_tfms,
58
71
  )
59
- logger.info("Creating learner")
60
- self.learn = vision_learner(self.dls, self.model, metrics=accuracy).to_fp16()
72
+
73
+ if learner_path:
74
+ logger.info(f"Loading learner from {learner_path}")
75
+ gpu_available = torch.cuda.is_available()
76
+ if gpu_available:
77
+ logger.info(f"Loading learner on GPU.")
78
+ else:
79
+ logger.info(f"Loading learner on CPU.")
80
+
81
+ self.learn = load_learner(learner_path, cpu=not gpu_available)
82
+ else:
83
+ logger.info("Creating learner")
84
+ self.learn = vision_learner(
85
+ self.dls, self.model, metrics=accuracy
86
+ ).to_fp16()
87
+
61
88
  self.class_names = self.dls.vocab
89
+ self.num_classes = self.dls.c
62
90
  logger.info("Done. Ready to train.")
63
91
 
92
+ def show_batch(
93
+ self,
94
+ num_samples: int = 9,
95
+ unique: bool = False,
96
+ num_rows: int = None,
97
+ num_cols: int = None,
98
+ ):
99
+ """
100
+ Show a batch of images from the dataset.
101
+
102
+ Args:
103
+ num_samples: Number of samples to show.
104
+ unique: Whether to show unique samples.
105
+ num_rows: Number of rows in the grid.
106
+ num_cols: Number of columns in the grid.
107
+ """
108
+ self.dls.show_batch(
109
+ max_n=num_samples, unique=unique, nrows=num_rows, ncols=num_cols
110
+ )
111
+
64
112
  def lr_find(self):
65
113
  logger.info("Finding optimal learning rate")
66
114
  self.lrs = self.learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
67
115
  logger.info(f"Optimal learning rate: {self.lrs.valley}")
68
116
 
69
- def train(self, epochs: int, lr: float):
70
- logger.info(f"Training for {epochs} epochs with learning rate: {lr}")
71
- self.learn.fine_tune(epochs, lr, cbs=[ShowGraphCallback()])
117
+ def train(self, epochs: int, lr: float, head_tuning_epochs: int = 1):
118
+ """
119
+ Train the model.
120
+
121
+ Args:
122
+ epochs: Number of epochs to train for.
123
+ lr: Learning rate.
124
+ head_tuning_epochs: Number of epochs to train the head.
125
+ """
126
+ logger.info(f"Training head for {head_tuning_epochs} epochs")
127
+ logger.info(f"Training model end-to-end for {epochs} epochs")
128
+ logger.info(f"Learning rate: {lr} with one-cycle learning rate scheduler")
129
+ self.learn.fine_tune(
130
+ epochs, lr, freeze_epochs=head_tuning_epochs, cbs=[ShowGraphCallback()]
131
+ )
72
132
 
73
133
  def predict(self, filepaths: list[str], batch_size: int = 16):
74
134
  """
@@ -112,13 +172,75 @@ class ActiveLearner:
112
172
  logger.info(f"Accuracy: {accuracy:.2%}")
113
173
  return accuracy
114
174
 
115
- def sample_uncertain(self, df: pd.DataFrame, num_samples: int):
175
+ def sample_uncertain(
176
+ self, df: pd.DataFrame, num_samples: int, strategy: str = "least-confidence"
177
+ ):
116
178
  """
117
179
  Sample top `num_samples` low confidence samples. Returns a df with filepaths and predicted labels, and confidence scores.
180
+
181
+ Strategies:
182
+ - least-confidence: Get top `num_samples` low confidence samples.
183
+ - margin-of-confidence: Get top `num_samples` samples with the smallest margin between the top two predictions.
184
+ - ratio-of-confidence: Get top `num_samples` samples with the highest ratio between the top two predictions.
185
+ - entropy: Get top `num_samples` samples with the highest entropy.
186
+ """
187
+
188
+ # Remove samples that is already in the training set
189
+ df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
190
+
191
+ if strategy == "least-confidence":
192
+ logger.info(f"Getting top {num_samples} low confidence samples")
193
+
194
+ df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
195
+ self.num_classes - (self.num_classes - 1)
196
+ )
197
+
198
+ # Sort by descending uncertainty score
199
+ uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
200
+ num_samples
201
+ )
202
+ return uncertain_df
203
+
204
+ # TODO: Implement margin of confidence strategy
205
+ elif strategy == "margin-of-confidence":
206
+ logger.error("Margin of confidence strategy not implemented")
207
+ raise NotImplementedError("Margin of confidence strategy not implemented")
208
+
209
+ # TODO: Implement ratio of confidence strategy
210
+ elif strategy == "ratio-of-confidence":
211
+ logger.error("Ratio of confidence strategy not implemented")
212
+ raise NotImplementedError("Ratio of confidence strategy not implemented")
213
+
214
+ # TODO: Implement entropy strategy
215
+ elif strategy == "entropy":
216
+ logger.error("Entropy strategy not implemented")
217
+ raise NotImplementedError("Entropy strategy not implemented")
218
+
219
+ else:
220
+ logger.error(f"Unknown strategy: {strategy}")
221
+ raise ValueError(f"Unknown strategy: {strategy}")
222
+
223
+ def sample_diverse(self, df: pd.DataFrame, num_samples: int):
224
+ """
225
+ Sample top `num_samples` diverse samples. Returns a df with filepaths and predicted labels, and confidence scores.
226
+
227
+ Strategies:
228
+ - model-based-outlier: Get top `num_samples` samples with lowest activation of the model's last layer.
229
+ - cluster-based: Get top `num_samples` samples with the highest distance to the nearest neighbor.
230
+ - representative: Get top `num_samples` samples with the highest distance to the centroid of the training set.
231
+ """
232
+ logger.error("Diverse sampling strategy not implemented")
233
+ raise NotImplementedError("Diverse sampling strategy not implemented")
234
+
235
+ def sample_random(self, df: pd.DataFrame, num_samples: int, seed: int = None):
236
+ """
237
+ Sample `num_samples` random samples. Returns a df with filepaths and predicted labels, and confidence scores.
118
238
  """
119
- logger.info(f"Getting top {num_samples} low confidence samples")
120
- uncertain_df = df.sort_values(by="pred_conf", ascending=True).head(num_samples)
121
- return uncertain_df
239
+
240
+ logger.info(f"Sampling {num_samples} random samples")
241
+ if seed is not None:
242
+ logger.info(f"Using seed: {seed}")
243
+ return df.sample(n=num_samples, random_state=seed)
122
244
 
123
245
  def label(self, df: pd.DataFrame, output_filename: str = "labeled"):
124
246
  """
@@ -136,15 +258,15 @@ class ActiveLearner:
136
258
  return;
137
259
  }
138
260
 
139
- if (e.key.toLowerCase() == "w") {
261
+ if (e.key === "ArrowUp" || e.key === "Enter") {
140
262
  document.getElementById("submit_btn").click();
141
- } else if (e.key.toLowerCase() == "d") {
263
+ } else if (e.key === "ArrowRight") {
142
264
  document.getElementById("next_btn").click();
143
- } else if (e.key.toLowerCase() == "a") {
265
+ } else if (e.key === "ArrowLeft") {
144
266
  document.getElementById("back_btn").click();
145
267
  }
146
268
  }
147
- document.addEventListener('keypress', shortcuts, false);
269
+ document.addEventListener('keydown', shortcuts, false);
148
270
  </script>
149
271
  """
150
272
 
@@ -155,24 +277,45 @@ class ActiveLearner:
155
277
  with gr.Blocks(head=shortcut_js) as demo:
156
278
  current_index = gr.State(value=0)
157
279
 
158
- filename = gr.Textbox(
159
- label="Filename", value=filepaths[0], interactive=False
160
- )
161
-
162
280
  image = gr.Image(
163
281
  type="filepath", label="Image", value=filepaths[0], height=500
164
282
  )
165
- category = gr.Radio(choices=self.class_names, label="Select Category")
166
283
 
167
284
  with gr.Row():
168
- back_btn = gr.Button("← Previous (A)", elem_id="back_btn")
285
+ filename = gr.Textbox(
286
+ label="Filename", value=filepaths[0], interactive=False
287
+ )
288
+
289
+ pred_label = gr.Textbox(
290
+ label="Predicted Label",
291
+ value=df["pred_label"].iloc[0]
292
+ if "pred_label" in df.columns
293
+ else "",
294
+ interactive=False,
295
+ )
296
+ pred_conf = gr.Textbox(
297
+ label="Confidence",
298
+ value=f"{df['pred_conf'].iloc[0]:.2%}"
299
+ if "pred_conf" in df.columns
300
+ else "",
301
+ interactive=False,
302
+ )
303
+
304
+ category = gr.Radio(
305
+ choices=self.class_names,
306
+ label="Select Category",
307
+ value=df["pred_label"].iloc[0] if "pred_label" in df.columns else None,
308
+ )
309
+
310
+ with gr.Row():
311
+ back_btn = gr.Button("← Previous", elem_id="back_btn")
169
312
  submit_btn = gr.Button(
170
- "Submit (W)",
313
+ "Submit (↑/Enter)",
171
314
  variant="primary",
172
315
  elem_id="submit_btn",
173
316
  interactive=False,
174
317
  )
175
- next_btn = gr.Button("Next → (D)", elem_id="next_btn")
318
+ next_btn = gr.Button("Next →", elem_id="next_btn")
176
319
 
177
320
  progress = gr.Slider(
178
321
  minimum=0,
@@ -184,6 +327,73 @@ class ActiveLearner:
184
327
 
185
328
  finish_btn = gr.Button("Finish Labeling", variant="primary")
186
329
 
330
+ with gr.Accordion("Zero-Shot Inference", open=False) as zero_shot_accordion:
331
+ gr.Markdown("""
332
+ Uses a VLM to predict the label of the image.
333
+ """)
334
+
335
+ import xinfer
336
+ from xinfer.model_registry import model_registry
337
+ from xinfer.types import ModelInputOutput
338
+
339
+ # Get models and filter for image-to-text models
340
+ all_models = model_registry.list_models()
341
+ model_list = [
342
+ model.id
343
+ for model in all_models
344
+ if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
345
+ ]
346
+
347
+ with gr.Row():
348
+ with gr.Row():
349
+ model_dropdown = gr.Dropdown(
350
+ choices=model_list,
351
+ label="Select a model",
352
+ value="vikhyatk/moondream2",
353
+ )
354
+ device_dropdown = gr.Dropdown(
355
+ choices=["cuda", "cpu"],
356
+ label="Device",
357
+ value="cuda" if torch.cuda.is_available() else "cpu",
358
+ )
359
+ dtype_dropdown = gr.Dropdown(
360
+ choices=["float32", "float16", "bfloat16"],
361
+ label="Data Type",
362
+ value="float16" if torch.cuda.is_available() else "float32",
363
+ )
364
+
365
+ with gr.Column():
366
+ prompt_textbox = gr.Textbox(
367
+ label="Prompt",
368
+ lines=3,
369
+ value=f"Classify the image into one of the following categories: {self.class_names}",
370
+ interactive=True,
371
+ )
372
+ inference_btn = gr.Button("Run Inference", variant="primary")
373
+
374
+ result_textbox = gr.Textbox(
375
+ label="Result",
376
+ lines=3,
377
+ interactive=False,
378
+ )
379
+
380
+ def run_zero_shot_inference(prompt, model, device, dtype, current_filename):
381
+ model = xinfer.create_model(model, device=device, dtype=dtype)
382
+ result = model.infer(current_filename, prompt).text
383
+ return result
384
+
385
+ inference_btn.click(
386
+ fn=run_zero_shot_inference,
387
+ inputs=[
388
+ prompt_textbox,
389
+ model_dropdown,
390
+ device_dropdown,
391
+ dtype_dropdown,
392
+ filename,
393
+ ],
394
+ outputs=[result_textbox],
395
+ )
396
+
187
397
  def update_submit_btn(choice):
188
398
  return gr.Button(interactive=choice is not None)
189
399
 
@@ -192,21 +402,59 @@ class ActiveLearner:
192
402
  )
193
403
 
194
404
  def navigate(current_idx, direction):
405
+ # Convert current_idx to int before arithmetic
406
+ current_idx = int(current_idx)
195
407
  next_idx = current_idx + direction
408
+
196
409
  if 0 <= next_idx < len(filepaths):
197
- return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
410
+ return (
411
+ filepaths[next_idx],
412
+ filepaths[next_idx],
413
+ df["pred_label"].iloc[next_idx]
414
+ if "pred_label" in df.columns
415
+ else "",
416
+ f"{df['pred_conf'].iloc[next_idx]:.2%}"
417
+ if "pred_conf" in df.columns
418
+ else "",
419
+ df["pred_label"].iloc[next_idx]
420
+ if "pred_label" in df.columns
421
+ else None,
422
+ next_idx,
423
+ next_idx,
424
+ )
198
425
  return (
199
426
  filepaths[current_idx],
200
427
  filepaths[current_idx],
428
+ df["pred_label"].iloc[current_idx]
429
+ if "pred_label" in df.columns
430
+ else "",
431
+ f"{df['pred_conf'].iloc[current_idx]:.2%}"
432
+ if "pred_conf" in df.columns
433
+ else "",
434
+ df["pred_label"].iloc[current_idx]
435
+ if "pred_label" in df.columns
436
+ else None,
201
437
  current_idx,
202
438
  current_idx,
203
439
  )
204
440
 
205
441
  def save_and_next(current_idx, selected_category):
442
+ # Convert current_idx to int before arithmetic
443
+ current_idx = int(current_idx)
444
+
206
445
  if selected_category is None:
207
446
  return (
208
447
  filepaths[current_idx],
209
448
  filepaths[current_idx],
449
+ df["pred_label"].iloc[current_idx]
450
+ if "pred_label" in df.columns
451
+ else "",
452
+ f"{df['pred_conf'].iloc[current_idx]:.2%}"
453
+ if "pred_conf" in df.columns
454
+ else "",
455
+ df["pred_label"].iloc[current_idx]
456
+ if "pred_label" in df.columns
457
+ else None,
210
458
  current_idx,
211
459
  current_idx,
212
460
  )
@@ -221,10 +469,33 @@ class ActiveLearner:
221
469
  return (
222
470
  filepaths[current_idx],
223
471
  filepaths[current_idx],
472
+ df["pred_label"].iloc[current_idx]
473
+ if "pred_label" in df.columns
474
+ else "",
475
+ f"{df['pred_conf'].iloc[current_idx]:.2%}"
476
+ if "pred_conf" in df.columns
477
+ else "",
478
+ df["pred_label"].iloc[current_idx]
479
+ if "pred_label" in df.columns
480
+ else None,
224
481
  current_idx,
225
482
  current_idx,
226
483
  )
227
- return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
484
+ return (
485
+ filepaths[next_idx],
486
+ filepaths[next_idx],
487
+ df["pred_label"].iloc[next_idx]
488
+ if "pred_label" in df.columns
489
+ else "",
490
+ f"{df['pred_conf'].iloc[next_idx]:.2%}"
491
+ if "pred_conf" in df.columns
492
+ else "",
493
+ df["pred_label"].iloc[next_idx]
494
+ if "pred_label" in df.columns
495
+ else None,
496
+ next_idx,
497
+ next_idx,
498
+ )
228
499
 
229
500
  def convert_csv_to_parquet():
230
501
  try:
@@ -240,19 +511,43 @@ class ActiveLearner:
240
511
  back_btn.click(
241
512
  fn=lambda idx: navigate(idx, -1),
242
513
  inputs=[current_index],
243
- outputs=[filename, image, current_index, progress],
514
+ outputs=[
515
+ filename,
516
+ image,
517
+ pred_label,
518
+ pred_conf,
519
+ category,
520
+ current_index,
521
+ progress,
522
+ ],
244
523
  )
245
524
 
246
525
  next_btn.click(
247
526
  fn=lambda idx: navigate(idx, 1),
248
527
  inputs=[current_index],
249
- outputs=[filename, image, current_index, progress],
528
+ outputs=[
529
+ filename,
530
+ image,
531
+ pred_label,
532
+ pred_conf,
533
+ category,
534
+ current_index,
535
+ progress,
536
+ ],
250
537
  )
251
538
 
252
539
  submit_btn.click(
253
540
  fn=save_and_next,
254
541
  inputs=[current_index, category],
255
- outputs=[filename, image, current_index, progress],
542
+ outputs=[
543
+ filename,
544
+ image,
545
+ pred_label,
546
+ pred_conf,
547
+ category,
548
+ current_index,
549
+ progress,
550
+ ],
256
551
  )
257
552
 
258
553
  finish_btn.click(fn=convert_csv_to_parquet)
@@ -264,10 +559,6 @@ class ActiveLearner:
264
559
  Add samples to the training set.
265
560
  """
266
561
  new_train_set = df.copy()
267
- # new_train_set.drop(columns=["pred_conf"], inplace=True)
268
- # new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
269
-
270
- # len_old = len(self.train_set)
271
562
 
272
563
  logger.info(f"Adding {len(new_train_set)} samples to training set")
273
564
  self.train_set = pd.concat([self.train_set, new_train_set])
@@ -279,13 +570,3 @@ class ActiveLearner:
279
570
 
280
571
  self.train_set.to_parquet(f"{output_filename}.parquet")
281
572
  logger.info(f"Saved training set to {output_filename}.parquet")
282
-
283
- # if len(self.train_set) == len_old:
284
- # logger.warning("No new samples added to training set")
285
-
286
- # elif len_old + len(new_train_set) < len(self.train_set):
287
- # logger.warning("Some samples were duplicates and removed from training set")
288
-
289
- # else:
290
- # logger.info("All new samples added to training set")
291
- # logger.info(f"Training set now has {len(self.train_set)} samples")
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: active-vision
3
- Version: 0.0.4
3
+ Version: 0.1.0
4
4
  Summary: Active learning for edge vision.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
7
7
  License-File: LICENSE
8
+ Requires-Dist: accelerate>=1.2.1
8
9
  Requires-Dist: datasets>=3.2.0
9
10
  Requires-Dist: fastai>=2.7.18
10
11
  Requires-Dist: gradio>=5.12.0
@@ -13,6 +14,8 @@ Requires-Dist: ipywidgets>=8.1.5
13
14
  Requires-Dist: loguru>=0.7.3
14
15
  Requires-Dist: seaborn>=0.13.2
15
16
  Requires-Dist: timm>=1.0.13
17
+ Requires-Dist: transformers>=4.48.0
18
+ Requires-Dist: xinfer>=0.3.2
16
19
 
17
20
  ![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
18
21
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
@@ -23,17 +26,38 @@ Requires-Dist: timm>=1.0.13
23
26
  <img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/logo.png" alt="active-vision">
24
27
  </p>
25
28
 
26
- Active learning at the edge for computer vision.
29
+ The goal of this project is to create a framework for the active learning loop for computer vision. The diagram below shows a general workflow of how the active learning loop works.
27
30
 
28
- The goal of this project is to create a framework for the active learning loop for computer vision deployed on edge devices.
31
+ <p align="center">
32
+ <img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/data_flywheel.gif" alt="active-vision", width="700">
33
+ </p>
29
34
 
30
- Supported tasks:
35
+ ### Supported tasks:
31
36
  - [X] Image classification
32
37
  - [ ] Object detection
33
38
  - [ ] Segmentation
34
39
 
40
+ ### Supported models:
41
+ - [X] Fastai models
42
+ - [X] Torchvision models
43
+ - [X] Timm models
44
+ - [ ] Hugging Face models
45
+
46
+ ### Supported Active Learning Strategies:
47
+
48
+ Uncertainty Sampling:
49
+ - [X] Least confidence
50
+ - [ ] Margin of confidence
51
+ - [ ] Ratio of confidence
52
+ - [ ] Entropy
35
53
 
36
- ## Installation
54
+ Diverse Sampling:
55
+ - [X] Random sampling
56
+ - [ ] Model-based outlier
57
+ - [ ] Cluster-based
58
+ - [ ] Representative
59
+
60
+ ## 📦 Installation
37
61
 
38
62
  Get a release from PyPI
39
63
  ```bash
@@ -58,18 +82,18 @@ uv sync
58
82
  Once the virtual environment is created, you can install the package using pip.
59
83
 
60
84
  > [!TIP]
61
- > If you're using uv add a uv before the pip install command to install into your virtual environment. Eg:
85
+ > If you're using uv add a `uv` before the pip install command to install into your virtual environment. Eg:
62
86
  > ```bash
63
87
  > uv pip install active-vision
64
88
  > ```
65
89
 
66
- ## Usage
90
+ ## 🛠️ Usage
67
91
  See the [notebook](./nbs/04_relabel_loop.ipynb) for a complete example.
68
92
 
69
- Be sure to prepared 3 datasets:
70
- - [initial_samples](./nbs/initial_samples.parquet): A dataframe of an existing labeled training dataset to seed the training set.
71
- - [unlabeled](./nbs/unlabeled_samples.parquet): A dataframe of unlabeled data which we will sample from using active learning.
72
- - [eval](./nbs/evaluation_samples.parquet): A dataframe of labeled data which we will use to evaluate the performance of the model.
93
+ Be sure to prepared 3 subsets of the dataset:
94
+ - [Initial samples](./nbs/initial_samples.parquet): A dataframe of a labeled images to train an initial model. If you don't have any labeled data, you can label some images yourself.
95
+ - [Unlabeled samples](./nbs/unlabeled_samples.parquet): A dataframe of *unlabeled* images. We will continuously sample from this set using active learning strategies.
96
+ - [Evaluation samples](./nbs/evaluation_samples.parquet): A dataframe of *labeled* images. We will use this set to evaluate the performance of the model. This is the test set, DO NOT use it for active learning. Split this out in the beginning.
73
97
 
74
98
  As a toy example I created the above 3 datasets from the imagenette dataset.
75
99
 
@@ -100,7 +124,7 @@ uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
100
124
  al.label(uncertain_df, output_filename="uncertain")
101
125
  ```
102
126
 
103
- ![Gradio UI](./assets/labeling_ui.png)
127
+ ![Gradio UI](https://raw.githubusercontent.com/dnth/active-vision/main/assets/labeling_ui.png)
104
128
 
105
129
  Once complete, the labeled samples will be save into a new df.
106
130
  We can now add the newly labeled data to the training set.
@@ -119,11 +143,77 @@ Repeat the process until the model is good enough. Use the dataset to train a la
119
143
  >
120
144
  > But using the dataset of 200+ images, I trained a more capable model (convnext_small_in22k) and got 99.3% accuracy on the evaluation set. See the [notebook](./nbs/05_retrain_larger.ipynb) for more details.
121
145
 
122
- ## Workflow
123
- There are two workflows for active learning at the edge that we can use depending on the availability of labeled data.
146
+
147
+ ## 📊 Benchmarks
148
+ This section contains the benchmarks I ran using the active learning loop on various datasets.
149
+
150
+ Column description:
151
+ - `#Labeled Images`: The number of labeled images used to train the model.
152
+ - `Evaluation Accuracy`: The accuracy of the model on the evaluation set.
153
+ - `Train Epochs`: The number of epochs used to train the model.
154
+ - `Model`: The model used to train.
155
+ - `Active Learning`: Whether active learning was used to train the model.
156
+ - `Source`: The source of the results.
157
+
158
+ ### Imagenette
159
+ - num classes: 10
160
+ - num images: 9469
161
+
162
+ To start the active learning loop, I labeled 100 images (10 images from each class) and iteratively relabeled the most informative images until I hit 275 labeled images.
163
+
164
+ The active learning loop is a iterative process and can keep going until you hit a stopping point. You can decide your own stopping point based on your use case. It could be:
165
+ - You ran out of data to label.
166
+ - You hit a performance goal.
167
+ - You hit a budget.
168
+ - Other criteria.
169
+
170
+ For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
171
+
172
+
173
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
174
+ |-----------------|---------------------|--------------|----------------------|----------------|--------|
175
+ | 9469 | 94.90% | 80 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
176
+ | 9469 | 95.11% | 200 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
177
+ | 275 | 99.33% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
178
+ | 275 | 93.40% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
179
+
180
+ ### Dog Food
181
+ - num classes: 2
182
+ - num images: 2100
183
+
184
+ To start the active learning loop, I labeled 20 images (10 images from each class) and iteratively relabeled the most informative images until I hit 160 labeled images.
185
+
186
+ I decided to stop the active learning loop at 160 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
187
+
188
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
189
+ |-----------------|---------------------|--------------|-------|----------------|--------|
190
+ | 2100 | 99.70% | ? | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
191
+ | 160 | 100.00% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
192
+ | 160 | 97.60% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
193
+
194
+ ### Oxford-IIIT Pet
195
+ - num classes: 37
196
+ - num images: 3680
197
+
198
+ To start the active learning loop, I labeled 370 images (10 images from each class) and iteratively relabeled the most informative images until I hit 612 labeled images.
199
+
200
+ I decided to stop the active learning loop at 612 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
201
+
202
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
203
+ |-----------------|---------------------|--------------|-------|----------------|--------|
204
+ | 3680 | 95.40% | 5 | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
205
+ | 612 | 90.26% | 11 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
206
+ | 612 | 91.38% | 11 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
207
+
208
+
209
+
210
+ ## ➿ Workflow
211
+ This section describes a more detailed workflow for active learning. There are two workflows for active learning that we can use depending on the availability of labeled data.
124
212
 
125
213
  ### With unlabeled data
126
- If we have no labeled data, we can use active learning to iteratively improve the model and build a labeled dataset.
214
+ If we have no labeled data, the goal of the active learning loop is to build a resonably good labeled dataset to train a larger model.
215
+
216
+ Steps:
127
217
 
128
218
  1. Load a small proxy model.
129
219
  2. Label an initial dataset. If there is none, you'll have to label some images.
@@ -155,24 +245,25 @@ graph TD
155
245
  ```
156
246
 
157
247
  ### With labeled data
158
- If we have a labeled dataset, we can use active learning to iteratively improve the dataset and the model by fixing the most important label errors.
248
+ If we already have a labeled dataset, the goal of the active learning loop is to iteratively improve the dataset and the model by fixing the most important label errors.
249
+
250
+ Steps:
159
251
 
160
252
  1. Load a small proxy model.
161
253
  2. Train the proxy model on the labeled dataset.
162
254
  3. Run inference on the entire labeled dataset.
163
- 4. Get the most important label errors with active learning.
255
+ 4. Get the most impactful label errors with active learning.
164
256
  5. Fix the label errors.
165
257
  6. Repeat steps 2-5 until the dataset is good enough.
166
258
  7. Save the labeled dataset.
167
259
  8. Train a larger model on the saved labeled dataset.
168
260
 
169
261
 
170
-
171
262
  ```mermaid
172
263
  graph TD
173
264
  A[Load a small proxy model] --> B[Train proxy model on labeled dataset]
174
265
  B --> C[Run inference on labeled dataset]
175
- C --> D[Get important label errors using active learning]
266
+ C --> D[Get label errors using active learning]
176
267
  D --> E[Fix label errors]
177
268
  E --> F{Dataset good enough?}
178
269
  F -->|No| B
@@ -181,6 +272,7 @@ graph TD
181
272
  ```
182
273
 
183
274
 
275
+
184
276
  <!-- ## Methodology
185
277
  To test out the workflows we will use the [imagenette dataset](https://huggingface.co/datasets/frgfm/imagenette). But this will be applicable to any dataset.
186
278
 
@@ -0,0 +1,7 @@
1
+ active_vision/__init__.py,sha256=dDQijes3C7zAUc_08TyblLSP6Lk0PcPPI8PYgEliKCI,43
2
+ active_vision/core.py,sha256=D_ve-nMv2EWSaQCOBTggleo-1op8JHXchk0QLicGDqg,21715
3
+ active_vision-0.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
+ active_vision-0.1.0.dist-info/METADATA,sha256=aA793OK3PGKnKVchMQthXl1H14xcBh_kq9tAO9o6jf0,15944
5
+ active_vision-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ active_vision-0.1.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
+ active_vision-0.1.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- active_vision/__init__.py,sha256=XITukjUU49hPFzxCzmxqJAUWh3YE8sWQzmyZ5bVra88,43
2
- active_vision/core.py,sha256=0aXDI5Gpj0Spk7TSIxJf8aJDDBgZh0-jkWdYyZ1Zric,10713
3
- active_vision-0.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
- active_vision-0.0.4.dist-info/METADATA,sha256=WlvtrzUy8m2nr8izUuTtysdQXO4ZjCO9vGWt2i_GMUI,10421
5
- active_vision-0.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- active_vision-0.0.4.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
- active_vision-0.0.4.dist-info/RECORD,,