active-vision 0.0.5__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
active_vision/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- __version__ = "0.0.5"
1
+ __version__ = "0.1.1"
2
2
 
3
3
  from .core import *
active_vision/core.py CHANGED
@@ -1,19 +1,7 @@
1
1
  import pandas as pd
2
2
  from loguru import logger
3
- from fastai.callback.all import ShowGraphCallback
4
- from fastai.vision.all import (
5
- ImageDataLoaders,
6
- aug_transforms,
7
- Resize,
8
- vision_learner,
9
- accuracy,
10
- valley,
11
- slide,
12
- minimum,
13
- steep,
14
- )
3
+ from fastai.vision.all import *
15
4
  import torch
16
- import torch.nn.functional as F
17
5
 
18
6
  import warnings
19
7
  from typing import Callable
@@ -22,7 +10,28 @@ warnings.filterwarnings("ignore", category=FutureWarning)
22
10
 
23
11
 
24
12
  class ActiveLearner:
25
- def __init__(self, model_name: str):
13
+ """
14
+ Active Learning framework for computer vision tasks.
15
+
16
+ Attributes:
17
+ Model Related:
18
+ model: The base model architecture (str or Callable)
19
+ learn: fastai Learner object for training
20
+ lrs: Learning rate finder results
21
+
22
+ Data Related:
23
+ train_set (pd.DataFrame): Training dataset
24
+ eval_set (pd.DataFrame): Evaluation dataset with ground truth labels
25
+ dls: fastai DataLoaders object
26
+ class_names: List of class names from the dataset
27
+ num_classes (int): Number of classes in the dataset
28
+
29
+ Prediction Related:
30
+ pred_df (pd.DataFrame): Predictions on a dataframe
31
+ eval_df (pd.DataFrame): Predictions on evaluation data
32
+ """
33
+
34
+ def __init__(self, model_name: str | Callable):
26
35
  self.model = self.load_model(model_name)
27
36
 
28
37
  def load_model(self, model_name: str | Callable):
@@ -43,6 +52,7 @@ class ActiveLearner:
43
52
  batch_size: int = 16,
44
53
  image_size: int = 224,
45
54
  batch_tfms: Callable = None,
55
+ learner_path: str = None,
46
56
  ):
47
57
  logger.info(f"Loading dataset from {filepath_col} and {label_col}")
48
58
  self.train_set = df.copy()
@@ -58,22 +68,66 @@ class ActiveLearner:
58
68
  item_tfms=Resize(image_size),
59
69
  batch_tfms=batch_tfms,
60
70
  )
61
- logger.info("Creating learner")
62
- self.learn = vision_learner(self.dls, self.model, metrics=accuracy).to_fp16()
71
+
72
+ if learner_path:
73
+ logger.info(f"Loading learner from {learner_path}")
74
+ gpu_available = torch.cuda.is_available()
75
+ if gpu_available:
76
+ logger.info(f"Loading learner on GPU.")
77
+ else:
78
+ logger.info(f"Loading learner on CPU.")
79
+
80
+ self.learn = load_learner(learner_path, cpu=not gpu_available)
81
+ else:
82
+ logger.info("Creating learner")
83
+ self.learn = vision_learner(
84
+ self.dls, self.model, metrics=accuracy
85
+ ).to_fp16()
86
+
63
87
  self.class_names = self.dls.vocab
88
+ self.num_classes = self.dls.c
64
89
  logger.info("Done. Ready to train.")
65
90
 
66
- def show_batch(self):
67
- self.dls.show_batch()
91
+ def show_batch(
92
+ self,
93
+ num_samples: int = 9,
94
+ unique: bool = False,
95
+ num_rows: int = None,
96
+ num_cols: int = None,
97
+ ):
98
+ """
99
+ Show a batch of images from the dataset.
100
+
101
+ Args:
102
+ num_samples: Number of samples to show.
103
+ unique: Whether to show unique samples.
104
+ num_rows: Number of rows in the grid.
105
+ num_cols: Number of columns in the grid.
106
+ """
107
+ self.dls.show_batch(
108
+ max_n=num_samples, unique=unique, nrows=num_rows, ncols=num_cols
109
+ )
68
110
 
69
111
  def lr_find(self):
70
112
  logger.info("Finding optimal learning rate")
71
113
  self.lrs = self.learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
72
114
  logger.info(f"Optimal learning rate: {self.lrs.valley}")
73
115
 
74
- def train(self, epochs: int, lr: float):
75
- logger.info(f"Training for {epochs} epochs with learning rate: {lr}")
76
- self.learn.fine_tune(epochs, lr, cbs=[ShowGraphCallback()])
116
+ def train(self, epochs: int, lr: float, head_tuning_epochs: int = 1):
117
+ """
118
+ Train the model.
119
+
120
+ Args:
121
+ epochs: Number of epochs to train for.
122
+ lr: Learning rate.
123
+ head_tuning_epochs: Number of epochs to train the head.
124
+ """
125
+ logger.info(f"Training head for {head_tuning_epochs} epochs")
126
+ logger.info(f"Training model end-to-end for {epochs} epochs")
127
+ logger.info(f"Learning rate: {lr} with one-cycle learning rate scheduler")
128
+ self.learn.fine_tune(
129
+ epochs, lr, freeze_epochs=head_tuning_epochs, cbs=[ShowGraphCallback()]
130
+ )
77
131
 
78
132
  def predict(self, filepaths: list[str], batch_size: int = 16):
79
133
  """
@@ -87,7 +141,8 @@ class ActiveLearner:
87
141
  {
88
142
  "filepath": filepaths,
89
143
  "pred_label": [self.learn.dls.vocab[i] for i in cls_preds.numpy()],
90
- "pred_conf": torch.max(F.softmax(preds, dim=1), dim=1)[0].numpy(),
144
+ "pred_conf": torch.max(preds, dim=1)[0].numpy(),
145
+ "pred_raw": preds.numpy().tolist(),
91
146
  }
92
147
  )
93
148
  return self.pred_df
@@ -131,11 +186,17 @@ class ActiveLearner:
131
186
  """
132
187
 
133
188
  # Remove samples that is already in the training set
134
- df = df[~df["filepath"].isin(self.train_set["filepath"])]
189
+ df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
135
190
 
136
191
  if strategy == "least-confidence":
137
192
  logger.info(f"Getting top {num_samples} low confidence samples")
138
- uncertain_df = df.sort_values(by="pred_conf", ascending=True).head(
193
+
194
+ df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
195
+ self.num_classes - (self.num_classes - 1)
196
+ )
197
+
198
+ # Sort by descending uncertainty score
199
+ uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
139
200
  num_samples
140
201
  )
141
202
  return uncertain_df
@@ -197,15 +258,15 @@ class ActiveLearner:
197
258
  return;
198
259
  }
199
260
 
200
- if (e.key.toLowerCase() == "w") {
261
+ if (e.key === "ArrowUp" || e.key === "Enter") {
201
262
  document.getElementById("submit_btn").click();
202
- } else if (e.key.toLowerCase() == "d") {
263
+ } else if (e.key === "ArrowRight") {
203
264
  document.getElementById("next_btn").click();
204
- } else if (e.key.toLowerCase() == "a") {
265
+ } else if (e.key === "ArrowLeft") {
205
266
  document.getElementById("back_btn").click();
206
267
  }
207
268
  }
208
- document.addEventListener('keypress', shortcuts, false);
269
+ document.addEventListener('keydown', shortcuts, false);
209
270
  </script>
210
271
  """
211
272
 
@@ -214,36 +275,149 @@ class ActiveLearner:
214
275
  filepaths = df["filepath"].tolist()
215
276
 
216
277
  with gr.Blocks(head=shortcut_js) as demo:
217
- current_index = gr.State(value=0)
278
+ with gr.Tabs():
279
+ with gr.Tab("Labeling"):
280
+ current_index = gr.State(value=0)
281
+
282
+ with gr.Row(min_height=500):
283
+ image = gr.Image(
284
+ type="filepath",
285
+ label="Image",
286
+ value=filepaths[0],
287
+ height=500
288
+ )
289
+
290
+ # Add bar plot with top 5 predictions
291
+ with gr.Column():
292
+ pred_plot = gr.BarPlot(
293
+ x="probability",
294
+ y="class",
295
+ title="Top 5 Predictions",
296
+ x_lim=[0, 1],
297
+ value=None
298
+ if "pred_raw" not in df.columns
299
+ else pd.DataFrame(
300
+ {
301
+ "class": self.class_names,
302
+ "probability": df["pred_raw"].iloc[0],
303
+ }
304
+ ).nlargest(5, "probability"),
305
+ )
306
+
307
+ filename = gr.Textbox(
308
+ label="Filename", value=filepaths[0], interactive=False
309
+ )
310
+
311
+ pred_label = gr.Textbox(
312
+ label="Predicted Label",
313
+ value=df["pred_label"].iloc[0]
314
+ if "pred_label" in df.columns
315
+ else "",
316
+ interactive=False,
317
+ )
318
+ pred_conf = gr.Textbox(
319
+ label="Confidence",
320
+ value=f"{df['pred_conf'].iloc[0]:.2%}"
321
+ if "pred_conf" in df.columns
322
+ else "",
323
+ interactive=False,
324
+ )
325
+
326
+ category = gr.Radio(
327
+ choices=self.class_names,
328
+ label="Select Category",
329
+ value=df["pred_label"].iloc[0]
330
+ if "pred_label" in df.columns
331
+ else None,
332
+ )
218
333
 
219
- filename = gr.Textbox(
220
- label="Filename", value=filepaths[0], interactive=False
221
- )
334
+ with gr.Row():
335
+ back_btn = gr.Button("← Previous", elem_id="back_btn")
336
+ submit_btn = gr.Button(
337
+ "Submit (↑/Enter)",
338
+ variant="primary",
339
+ elem_id="submit_btn",
340
+ )
341
+ next_btn = gr.Button("Next →", elem_id="next_btn")
342
+
343
+ progress = gr.Slider(
344
+ minimum=0,
345
+ maximum=len(filepaths) - 1,
346
+ value=0,
347
+ label="Progress",
348
+ interactive=False,
349
+ )
222
350
 
223
- image = gr.Image(
224
- type="filepath", label="Image", value=filepaths[0], height=500
351
+ finish_btn = gr.Button("Finish Labeling", variant="primary")
352
+
353
+ with gr.Tab("Zero-Shot Inference"):
354
+ gr.Markdown("""
355
+ Uses a VLM to predict the label of the image.
356
+ """)
357
+
358
+ import xinfer
359
+ from xinfer.model_registry import model_registry
360
+ from xinfer.types import ModelInputOutput
361
+
362
+ # Get models and filter for image-to-text models
363
+ all_models = model_registry.list_models()
364
+ model_list = [
365
+ model.id
366
+ for model in all_models
367
+ if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
368
+ ]
369
+
370
+ with gr.Row():
371
+ with gr.Row():
372
+ model_dropdown = gr.Dropdown(
373
+ choices=model_list,
374
+ label="Select a model",
375
+ value="vikhyatk/moondream2",
376
+ )
377
+ device_dropdown = gr.Dropdown(
378
+ choices=["cuda", "cpu"],
379
+ label="Device",
380
+ value="cuda" if torch.cuda.is_available() else "cpu",
381
+ )
382
+ dtype_dropdown = gr.Dropdown(
383
+ choices=["float32", "float16", "bfloat16"],
384
+ label="Data Type",
385
+ value="float16"
386
+ if torch.cuda.is_available()
387
+ else "float32",
388
+ )
389
+
390
+ with gr.Column():
391
+ prompt_textbox = gr.Textbox(
392
+ label="Prompt",
393
+ lines=5,
394
+ value=f"Classify the image into one of the following categories: {self.class_names}. Answer with the category name only.",
395
+ interactive=True,
396
+ )
397
+ inference_btn = gr.Button("Run Inference", variant="primary")
398
+
399
+ result_textbox = gr.Textbox(
400
+ label="Result",
401
+ lines=3,
402
+ interactive=False,
403
+ )
404
+
405
+ def run_zero_shot_inference(prompt, model, device, dtype, current_filename):
406
+ model = xinfer.create_model(model, device=device, dtype=dtype)
407
+ result = model.infer(current_filename, prompt).text
408
+ return result
409
+
410
+ inference_btn.click(
411
+ fn=run_zero_shot_inference,
412
+ inputs=[
413
+ prompt_textbox,
414
+ model_dropdown,
415
+ device_dropdown,
416
+ dtype_dropdown,
417
+ filename,
418
+ ],
419
+ outputs=[result_textbox],
225
420
  )
226
- category = gr.Radio(choices=self.class_names, label="Select Category")
227
-
228
- with gr.Row():
229
- back_btn = gr.Button("← Previous (A)", elem_id="back_btn")
230
- submit_btn = gr.Button(
231
- "Submit (W)",
232
- variant="primary",
233
- elem_id="submit_btn",
234
- interactive=False,
235
- )
236
- next_btn = gr.Button("Next → (D)", elem_id="next_btn")
237
-
238
- progress = gr.Slider(
239
- minimum=0,
240
- maximum=len(filepaths) - 1,
241
- value=0,
242
- label="Progress",
243
- interactive=False,
244
- )
245
-
246
- finish_btn = gr.Button("Finish Labeling", variant="primary")
247
421
 
248
422
  def update_submit_btn(choice):
249
423
  return gr.Button(interactive=choice is not None)
@@ -253,23 +427,84 @@ class ActiveLearner:
253
427
  )
254
428
 
255
429
  def navigate(current_idx, direction):
430
+ # Convert current_idx to int before arithmetic
431
+ current_idx = int(current_idx)
256
432
  next_idx = current_idx + direction
433
+
257
434
  if 0 <= next_idx < len(filepaths):
258
- return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
435
+ plot_data = (
436
+ None
437
+ if "pred_raw" not in df.columns
438
+ else pd.DataFrame(
439
+ {
440
+ "class": self.class_names,
441
+ "probability": df["pred_raw"].iloc[next_idx],
442
+ }
443
+ ).nlargest(5, "probability")
444
+ )
445
+ return (
446
+ filepaths[next_idx],
447
+ filepaths[next_idx],
448
+ df["pred_label"].iloc[next_idx]
449
+ if "pred_label" in df.columns
450
+ else "",
451
+ f"{df['pred_conf'].iloc[next_idx]:.2%}"
452
+ if "pred_conf" in df.columns
453
+ else "",
454
+ df["pred_label"].iloc[next_idx]
455
+ if "pred_label" in df.columns
456
+ else None,
457
+ next_idx,
458
+ next_idx,
459
+ plot_data,
460
+ )
461
+ plot_data = (
462
+ None
463
+ if "pred_raw" not in df.columns
464
+ else pd.DataFrame(
465
+ {
466
+ "class": self.class_names,
467
+ "probability": df["pred_raw"].iloc[current_idx],
468
+ }
469
+ ).nlargest(5, "probability")
470
+ )
259
471
  return (
260
472
  filepaths[current_idx],
261
473
  filepaths[current_idx],
474
+ df["pred_label"].iloc[current_idx]
475
+ if "pred_label" in df.columns
476
+ else "",
477
+ f"{df['pred_conf'].iloc[current_idx]:.2%}"
478
+ if "pred_conf" in df.columns
479
+ else "",
480
+ df["pred_label"].iloc[current_idx]
481
+ if "pred_label" in df.columns
482
+ else None,
262
483
  current_idx,
263
484
  current_idx,
485
+ plot_data,
264
486
  )
265
487
 
266
488
  def save_and_next(current_idx, selected_category):
489
+ # Convert current_idx to int before arithmetic
490
+ current_idx = int(current_idx)
491
+
267
492
  if selected_category is None:
493
+ plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
494
+ {
495
+ "class": self.class_names,
496
+ "probability": df["pred_raw"].iloc[current_idx],
497
+ }
498
+ ).nlargest(5, "probability")
268
499
  return (
269
500
  filepaths[current_idx],
270
501
  filepaths[current_idx],
502
+ df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
503
+ f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
504
+ df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
271
505
  current_idx,
272
506
  current_idx,
507
+ plot_data,
273
508
  )
274
509
 
275
510
  # Save the current annotation
@@ -279,13 +514,39 @@ class ActiveLearner:
279
514
  # Move to next image if not at the end
280
515
  next_idx = current_idx + 1
281
516
  if next_idx >= len(filepaths):
517
+ plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
518
+ {
519
+ "class": self.class_names,
520
+ "probability": df["pred_raw"].iloc[current_idx],
521
+ }
522
+ ).nlargest(5, "probability")
282
523
  return (
283
524
  filepaths[current_idx],
284
525
  filepaths[current_idx],
526
+ df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
527
+ f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
528
+ df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
285
529
  current_idx,
286
530
  current_idx,
531
+ plot_data,
287
532
  )
288
- return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
533
+
534
+ plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
535
+ {
536
+ "class": self.class_names,
537
+ "probability": df["pred_raw"].iloc[next_idx],
538
+ }
539
+ ).nlargest(5, "probability")
540
+ return (
541
+ filepaths[next_idx],
542
+ filepaths[next_idx],
543
+ df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else "",
544
+ f"{df['pred_conf'].iloc[next_idx]:.2%}" if "pred_conf" in df.columns else "",
545
+ df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else None,
546
+ next_idx,
547
+ next_idx,
548
+ plot_data,
549
+ )
289
550
 
290
551
  def convert_csv_to_parquet():
291
552
  try:
@@ -301,19 +562,46 @@ class ActiveLearner:
301
562
  back_btn.click(
302
563
  fn=lambda idx: navigate(idx, -1),
303
564
  inputs=[current_index],
304
- outputs=[filename, image, current_index, progress],
565
+ outputs=[
566
+ filename,
567
+ image,
568
+ pred_label,
569
+ pred_conf,
570
+ category,
571
+ current_index,
572
+ progress,
573
+ pred_plot,
574
+ ],
305
575
  )
306
576
 
307
577
  next_btn.click(
308
578
  fn=lambda idx: navigate(idx, 1),
309
579
  inputs=[current_index],
310
- outputs=[filename, image, current_index, progress],
580
+ outputs=[
581
+ filename,
582
+ image,
583
+ pred_label,
584
+ pred_conf,
585
+ category,
586
+ current_index,
587
+ progress,
588
+ pred_plot,
589
+ ],
311
590
  )
312
591
 
313
592
  submit_btn.click(
314
593
  fn=save_and_next,
315
594
  inputs=[current_index, category],
316
- outputs=[filename, image, current_index, progress],
595
+ outputs=[
596
+ filename,
597
+ image,
598
+ pred_label,
599
+ pred_conf,
600
+ category,
601
+ current_index,
602
+ progress,
603
+ pred_plot,
604
+ ],
317
605
  )
318
606
 
319
607
  finish_btn.click(fn=convert_csv_to_parquet)
@@ -325,10 +613,6 @@ class ActiveLearner:
325
613
  Add samples to the training set.
326
614
  """
327
615
  new_train_set = df.copy()
328
- # new_train_set.drop(columns=["pred_conf"], inplace=True)
329
- # new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
330
-
331
- # len_old = len(self.train_set)
332
616
 
333
617
  logger.info(f"Adding {len(new_train_set)} samples to training set")
334
618
  self.train_set = pd.concat([self.train_set, new_train_set])
@@ -340,13 +624,3 @@ class ActiveLearner:
340
624
 
341
625
  self.train_set.to_parquet(f"{output_filename}.parquet")
342
626
  logger.info(f"Saved training set to {output_filename}.parquet")
343
-
344
- # if len(self.train_set) == len_old:
345
- # logger.warning("No new samples added to training set")
346
-
347
- # elif len_old + len(new_train_set) < len(self.train_set):
348
- # logger.warning("Some samples were duplicates and removed from training set")
349
-
350
- # else:
351
- # logger.info("All new samples added to training set")
352
- # logger.info(f"Training set now has {len(self.train_set)} samples")
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: active-vision
3
- Version: 0.0.5
3
+ Version: 0.1.1
4
4
  Summary: Active learning for edge vision.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
7
7
  License-File: LICENSE
8
+ Requires-Dist: accelerate>=1.2.1
8
9
  Requires-Dist: datasets>=3.2.0
9
10
  Requires-Dist: fastai>=2.7.18
10
11
  Requires-Dist: gradio>=5.12.0
@@ -13,6 +14,8 @@ Requires-Dist: ipywidgets>=8.1.5
13
14
  Requires-Dist: loguru>=0.7.3
14
15
  Requires-Dist: seaborn>=0.13.2
15
16
  Requires-Dist: timm>=1.0.13
17
+ Requires-Dist: transformers>=4.48.0
18
+ Requires-Dist: xinfer>=0.3.2
16
19
 
17
20
  ![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
18
21
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
@@ -68,17 +71,18 @@ cd active-vision
68
71
  pip install -e .
69
72
  ```
70
73
 
71
- I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
72
-
73
- If you're using uv:
74
-
75
- ```bash
76
- uv venv
77
- uv sync
78
- ```
79
- Once the virtual environment is created, you can install the package using pip.
80
74
 
81
75
  > [!TIP]
76
+ > I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
77
+ >
78
+ > If you're using uv:
79
+ >
80
+ > ```bash
81
+ > uv venv
82
+ > uv sync
83
+ > ```
84
+ > Once the virtual environment is created, you can install the package using pip.
85
+ >
82
86
  > If you're using uv add a `uv` before the pip install command to install into your virtual environment. Eg:
83
87
  > ```bash
84
88
  > uv pip install active-vision
@@ -117,12 +121,16 @@ pred_df = al.predict(filepaths)
117
121
  # Sample low confidence predictions from unlabeled set
118
122
  uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
119
123
 
120
- # Launch a Gradio UI to label the low confidence samples
124
+ # Launch a Gradio UI to label the low confidence samples, save the labeled samples to a file
121
125
  al.label(uncertain_df, output_filename="uncertain")
122
126
  ```
123
127
 
124
128
  ![Gradio UI](https://raw.githubusercontent.com/dnth/active-vision/main/assets/labeling_ui.png)
125
129
 
130
+ In the UI, you can optionally run zero-shot inference on the image. This will use a VLM to predict the label of the image. There are a dozen VLM models as supported in the [x.infer project](https://github.com/dnth/x.infer).
131
+
132
+ ![Zero-Shot Inference](https://raw.githubusercontent.com/dnth/active-vision/main/assets/zero_shot_ui.png)
133
+
126
134
  Once complete, the labeled samples will be save into a new df.
127
135
  We can now add the newly labeled data to the training set.
128
136
 
@@ -167,12 +175,12 @@ The active learning loop is a iterative process and can keep going until you hit
167
175
  For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
168
176
 
169
177
 
170
- | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
171
- |-----------------|---------------------|--------------|----------------------|----------------|--------|
172
- | 9469 | 94.90% | 80 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
173
- | 9469 | 95.11% | 200 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
174
- | 275 | 99.33% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
175
- | 275 | 93.40% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
178
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
179
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |------------------------------------------------------------------------------------- |
180
+ | 9469 | 94.90% | 80 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
181
+ | 9469 | 95.11% | 200 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
182
+ | 275 | 99.33% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
183
+ | 275 | 93.40% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
176
184
 
177
185
  ### Dog Food
178
186
  - num classes: 2
@@ -182,11 +190,11 @@ To start the active learning loop, I labeled 20 images (10 images from each clas
182
190
 
183
191
  I decided to stop the active learning loop at 160 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
184
192
 
185
- | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
186
- |-----------------|---------------------|--------------|-------|----------------|--------|
187
- | 2100 | 99.70% | ? | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
188
- | 160 | 100.00% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
189
- | 160 | 97.60% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
193
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
194
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |--------------------------------------------------------------------------------------------- |
195
+ | 2100 | 99.70% | ? | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
196
+ | 160 | 100.00% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
197
+ | 160 | 97.60% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
190
198
 
191
199
  ### Oxford-IIIT Pet
192
200
  - num classes: 37
@@ -196,13 +204,27 @@ To start the active learning loop, I labeled 370 images (10 images from each cla
196
204
 
197
205
  I decided to stop the active learning loop at 612 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
198
206
 
199
- | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
200
- |-----------------|---------------------|--------------|-------|----------------|--------|
201
- | 3680 | 95.40% | 5 | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
202
- | 612 | 90.26% | 11 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
203
- | 612 | 91.38% | 11 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
207
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
208
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |------------------------------------------------------------------------------------------------- |
209
+ | 3680 | 95.40% | 5 | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
210
+ | 612 | 90.26% | 11 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
211
+ | 612 | 91.38% | 11 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
212
+
213
+ ### Eurosat RGB
214
+ - num classes: 10
215
+ - num images: 16100
216
+
217
+ To start the active learning loop, I labeled 100 images (10 images from each class) and iteratively labeled the most informative images until I hit 1188 labeled images.
218
+
219
+ I decided to stop the active learning loop at 1188 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
204
220
 
205
221
 
222
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
223
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |-------------------------------------------------------------------------------------------- |
224
+ | 16100 | 98.55% | 6 | vit-base-patch16-224 | ❌ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/03_train_all.ipynb) |
225
+ | 1188 | 94.59% | 6 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb) |
226
+ | 1188 | 96.57% | 13 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb) |
227
+
206
228
 
207
229
  ## ➿ Workflow
208
230
  This section describes a more detailed workflow for active learning. There are two workflows for active learning that we can use depending on the availability of labeled data.
@@ -270,55 +292,21 @@ graph TD
270
292
 
271
293
 
272
294
 
273
- <!-- ## Methodology
274
- To test out the workflows we will use the [imagenette dataset](https://huggingface.co/datasets/frgfm/imagenette). But this will be applicable to any dataset.
275
-
276
- Imagenette is a subset of the ImageNet dataset with 10 classes. We will use this dataset to test out the workflows. Additionally, Imagenette has an existing leaderboard which we can use to evaluate the performance of the models.
277
-
278
- ### Step 1: Download the dataset
279
- Download the imagenette dataset. The imagenette dataset has a train and validation split. Since the leaderboard is based on the validation set, we will evalutate the performance of our model on the validation set to make it easier to compare to the leaderboard.
280
-
281
- We will treat the imagenette train set as a unlabeled set and iteratively sample from it while monitoring the performance on the validation set. Ideally we will be able to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
295
+ ## 🧱 Sampling Approaches
282
296
 
283
- I've processed the imagenette dataset and uploaded it to the hub. You can download it from [here](https://huggingface.co/datasets/dnth/active-learning-imagenette).
297
+ Recommendation 1:
298
+ - 10% randomly selected from unlabeled items.
299
+ - 80% selected from the lowest confidence items.
300
+ - 10% selected as outliers.
284
301
 
285
- To load the dataset, you can use the following code:
286
- ```python
287
- from datasets import load_dataset
288
-
289
- unlabeled_dataset = load_dataset("dnth/active-learning-imagenette", "unlabeled")
290
- eval_dataset = load_dataset("dnth/active-learning-imagenette", "evaluation")
291
- ```
302
+ Recommendation 2:
292
303
 
293
- ### Step 2: Initial Sampling
294
- Label an initial dataset of 10 images from each class. This will give us a small proxy dataset to train our model on. The sampling will be done randomly. There are more intelligent sampling strategies but we will start with random sampling.
304
+ - Sample 100 predicted images at 10–20% confidence.
305
+ - Sample 100 predicted images at 20–30% confidence.
306
+ - Sample 100 predicted images at 30–40% confidence, and so on.
295
307
 
296
- ### Step 3: Training the proxy model
297
- Train a proxy model on the initial dataset. The proxy model will be a small model that is easy to train and deploy. We will use the fastai framework to train the model. We will use the resnet18 architecture as a starting point. Once training is complete, compute the accuracy of the proxy model on the validation set and compare it to the leaderboard.
298
308
 
299
- > [!TIP]
300
- > With the initial model we got 91.24% accuracy on the validation set. See the [notebook](./nbs/01_initial_sampling.ipynb) for more details.
301
- > | Train Epochs | Number of Images | Validation Accuracy | Source |
302
- > |--------------|-----------------|----------------------|------------------|
303
- > | 10 | 100 | 91.24% | Initial sampling [notebook](./nbs/01_initial_sampling.ipynb) |
304
- > | 80 | 9469 | 94.90% | fastai |
305
- > | 200 | 9469 | 95.11% | fastai |
309
+ Uncertainty and diversity sampling are most effective when combined. For instance, you could first sample the most uncertain items using an uncertainty sampling method, then apply a diversity sampling method such as clustering to select a diverse set from the uncertain items.
306
310
 
311
+ Ultimately, the right ratios can depend on the specific task and dataset.
307
312
 
308
-
309
- ### Step 4: Inference on the unlabeled dataset
310
- Run inference on the unlabeled dataset (the remaining imagenette train set) and evaluate the performance of the proxy model.
311
-
312
- ### Step 5: Active learning
313
- Use active learning to select the most informative images to label from the unlabeled set. Pick the top 10 images from the unlabeled set that the proxy model is least confident about and label them.
314
-
315
- ### Step 6: Repeat
316
- Repeat step 3 - 5 until the performance on the validation set is close to the leaderboard. Note the number of labeled images vs the performance on the validation set. Ideally we want to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
317
-
318
-
319
- After the first iteration we got 94.57% accuracy on the validation set. See the [notebook](./nbs/03_retrain_model.ipynb) for more details.
320
-
321
- > [!TIP]
322
- > | Train Epochs | Number of Images | Validation Accuracy | Source |
323
- > |--------------|-----------------|----------------------|------------------|
324
- > | 10 | 200 | 94.57% | First relabeling [notebook](./nbs/03_retrain_model.ipynb) | -->
@@ -0,0 +1,7 @@
1
+ active_vision/__init__.py,sha256=xWa6YKvR3wF8p_D9PprKNGP3VnxjbyVpcwnPCMhhaHM,43
2
+ active_vision/core.py,sha256=jWzTOx3GCB2Sq5-JGgoi-ZD2teoIGTYas9StqZxXefo,24999
3
+ active_vision-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
+ active_vision-0.1.1.dist-info/METADATA,sha256=U8-IH0WJnPj6KPBsfsxcW4GZCTDY0KFxrqz7migcnro,15454
5
+ active_vision-0.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ active_vision-0.1.1.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
+ active_vision-0.1.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- active_vision/__init__.py,sha256=u-7eEtxmLFoQfY0fM9JSs_lWb4e1c7WxR3cC619BTXE,43
2
- active_vision/core.py,sha256=mKS-ZZunjPgXuavm_J4oYiO9lm6UNRjFEzIn4kNfdVA,13421
3
- active_vision-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
- active_vision-0.0.5.dist-info/METADATA,sha256=mSFB-DeJ43roTwswTp3oHcG3CIyKnO-7ZCqaYbw26eQ,15846
5
- active_vision-0.0.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- active_vision-0.0.5.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
- active_vision-0.0.5.dist-info/RECORD,,