active-vision 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
active_vision/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- __version__ = "0.1.0"
1
+ __version__ = "0.2.0"
2
2
 
3
3
  from .core import *
active_vision/core.py CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
2
2
  from loguru import logger
3
3
  from fastai.vision.all import *
4
4
  import torch
5
- import torch.nn.functional as F
5
+ import numpy as np
6
6
 
7
7
  import warnings
8
8
  from typing import Callable
@@ -142,7 +142,8 @@ class ActiveLearner:
142
142
  {
143
143
  "filepath": filepaths,
144
144
  "pred_label": [self.learn.dls.vocab[i] for i in cls_preds.numpy()],
145
- "pred_conf": torch.max(F.softmax(preds, dim=1), dim=1)[0].numpy(),
145
+ "pred_conf": torch.max(preds, dim=1)[0].numpy(),
146
+ "pred_raw": preds.numpy().tolist(),
146
147
  }
147
148
  )
148
149
  return self.pred_df
@@ -189,37 +190,61 @@ class ActiveLearner:
189
190
  df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
190
191
 
191
192
  if strategy == "least-confidence":
192
- logger.info(f"Getting top {num_samples} low confidence samples")
193
-
193
+ logger.info(
194
+ f"Using least confidence strategy to get top {num_samples} samples"
195
+ )
194
196
  df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
195
197
  self.num_classes - (self.num_classes - 1)
196
198
  )
197
199
 
198
- # Sort by descending uncertainty score
199
- uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
200
- num_samples
200
+ elif strategy == "margin-of-confidence":
201
+ logger.info(
202
+ f"Using margin of confidence strategy to get top {num_samples} samples"
201
203
  )
202
- return uncertain_df
204
+ if len(df["pred_raw"].iloc[0]) < 2:
205
+ logger.error("pred_raw has less than 2 elements")
206
+ raise ValueError("pred_raw has less than 2 elements")
203
207
 
204
- # TODO: Implement margin of confidence strategy
205
- elif strategy == "margin-of-confidence":
206
- logger.error("Margin of confidence strategy not implemented")
207
- raise NotImplementedError("Margin of confidence strategy not implemented")
208
+ # Calculate uncertainty score as 1 - (difference between top two predictions)
209
+ df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
210
+ lambda x: 1 - (np.sort(x)[-1] - np.sort(x)[-2])
211
+ )
208
212
 
209
- # TODO: Implement ratio of confidence strategy
210
213
  elif strategy == "ratio-of-confidence":
211
- logger.error("Ratio of confidence strategy not implemented")
212
- raise NotImplementedError("Ratio of confidence strategy not implemented")
214
+ logger.info(
215
+ f"Using ratio of confidence strategy to get top {num_samples} samples"
216
+ )
217
+ if len(df["pred_raw"].iloc[0]) < 2:
218
+ logger.error("pred_raw has less than 2 elements")
219
+ raise ValueError("pred_raw has less than 2 elements")
220
+
221
+ # Calculate uncertainty score as ratio of top two predictions
222
+ df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
223
+ lambda x: np.sort(x)[-2] / np.sort(x)[-1]
224
+ )
213
225
 
214
- # TODO: Implement entropy strategy
215
226
  elif strategy == "entropy":
216
- logger.error("Entropy strategy not implemented")
217
- raise NotImplementedError("Entropy strategy not implemented")
227
+ logger.info(f"Using entropy strategy to get top {num_samples} samples")
228
+
229
+ # Calculate uncertainty score as entropy of the prediction
230
+ df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
231
+ lambda x: -np.sum(x * np.log2(x))
232
+ )
233
+
234
+ # Normalize the uncertainty score to be between 0 and 1 by dividing by log2 of the number of classes
235
+ df.loc[:, "uncertainty_score"] = df["uncertainty_score"] / np.log2(
236
+ self.num_classes
237
+ )
218
238
 
219
239
  else:
220
240
  logger.error(f"Unknown strategy: {strategy}")
221
241
  raise ValueError(f"Unknown strategy: {strategy}")
222
242
 
243
+ df = df[
244
+ ["filepath", "pred_label", "pred_conf", "uncertainty_score", "pred_raw"]
245
+ ]
246
+ return df.sort_values(by="uncertainty_score", ascending=False).head(num_samples)
247
+
223
248
  def sample_diverse(self, df: pd.DataFrame, num_samples: int):
224
249
  """
225
250
  Sample top `num_samples` diverse samples. Returns a df with filepaths and predicted labels, and confidence scores.
@@ -258,7 +283,7 @@ class ActiveLearner:
258
283
  return;
259
284
  }
260
285
 
261
- if (e.key === "ArrowUp" || e.key === "Enter") {
286
+ if (e.key === "ArrowUp") {
262
287
  document.getElementById("submit_btn").click();
263
288
  } else if (e.key === "ArrowRight") {
264
289
  document.getElementById("next_btn").click();
@@ -275,107 +300,149 @@ class ActiveLearner:
275
300
  filepaths = df["filepath"].tolist()
276
301
 
277
302
  with gr.Blocks(head=shortcut_js) as demo:
278
- current_index = gr.State(value=0)
279
-
280
- image = gr.Image(
281
- type="filepath", label="Image", value=filepaths[0], height=500
282
- )
283
-
284
- with gr.Row():
285
- filename = gr.Textbox(
286
- label="Filename", value=filepaths[0], interactive=False
287
- )
303
+ with gr.Tabs():
304
+ with gr.Tab("Labeling"):
305
+ current_index = gr.State(value=0)
306
+
307
+ with gr.Row(min_height=500):
308
+ image = gr.Image(
309
+ type="filepath",
310
+ label="Image",
311
+ value=filepaths[0],
312
+ height=500,
313
+ )
288
314
 
289
- pred_label = gr.Textbox(
290
- label="Predicted Label",
291
- value=df["pred_label"].iloc[0]
292
- if "pred_label" in df.columns
293
- else "",
294
- interactive=False,
295
- )
296
- pred_conf = gr.Textbox(
297
- label="Confidence",
298
- value=f"{df['pred_conf'].iloc[0]:.2%}"
299
- if "pred_conf" in df.columns
300
- else "",
301
- interactive=False,
302
- )
315
+ # Add bar plot with top 5 predictions
316
+ with gr.Column():
317
+ pred_plot = gr.BarPlot(
318
+ x="probability",
319
+ y="class",
320
+ title="Top 5 Predictions",
321
+ x_lim=[0, 1],
322
+ value=None
323
+ if "pred_raw" not in df.columns
324
+ else pd.DataFrame(
325
+ {
326
+ "class": self.class_names,
327
+ "probability": df["pred_raw"].iloc[0],
328
+ }
329
+ ).nlargest(5, "probability"),
330
+ )
331
+
332
+ filename = gr.Textbox(
333
+ label="Filename", value=filepaths[0], interactive=False
334
+ )
335
+
336
+ pred_label = gr.Textbox(
337
+ label="Predicted Label",
338
+ value=df["pred_label"].iloc[0]
339
+ if "pred_label" in df.columns
340
+ else "",
341
+ interactive=False,
342
+ )
343
+ pred_conf = gr.Textbox(
344
+ label="Confidence",
345
+ value=f"{df['pred_conf'].iloc[0]:.2%}"
346
+ if "pred_conf" in df.columns
347
+ else "",
348
+ interactive=False,
349
+ )
350
+
351
+ category = gr.Radio(
352
+ choices=self.class_names,
353
+ label="Select Category",
354
+ value=df["pred_label"].iloc[0]
355
+ if "pred_label" in df.columns
356
+ else None,
357
+ )
303
358
 
304
- category = gr.Radio(
305
- choices=self.class_names,
306
- label="Select Category",
307
- value=df["pred_label"].iloc[0] if "pred_label" in df.columns else None,
308
- )
359
+ with gr.Row():
360
+ back_btn = gr.Button("← Previous", elem_id="back_btn")
361
+ submit_btn = gr.Button(
362
+ "Submit ",
363
+ variant="primary",
364
+ elem_id="submit_btn",
365
+ )
366
+ next_btn = gr.Button("Next →", elem_id="next_btn")
367
+
368
+ progress = gr.Slider(
369
+ minimum=0,
370
+ maximum=len(filepaths) - 1,
371
+ value=0,
372
+ step=1,
373
+ label="Progress",
374
+ interactive=True,
375
+ )
309
376
 
310
- with gr.Row():
311
- back_btn = gr.Button("← Previous", elem_id="back_btn")
312
- submit_btn = gr.Button(
313
- "Submit (↑/Enter)",
314
- variant="primary",
315
- elem_id="submit_btn",
316
- interactive=False,
317
- )
318
- next_btn = gr.Button("Next →", elem_id="next_btn")
319
-
320
- progress = gr.Slider(
321
- minimum=0,
322
- maximum=len(filepaths) - 1,
323
- value=0,
324
- label="Progress",
325
- interactive=False,
326
- )
377
+ # Add event handler for slider changes
378
+ progress.change(
379
+ fn=lambda idx: navigate(idx, 0),
380
+ inputs=[progress],
381
+ outputs=[
382
+ filename,
383
+ image,
384
+ pred_label,
385
+ pred_conf,
386
+ category,
387
+ current_index,
388
+ progress,
389
+ pred_plot,
390
+ ],
391
+ )
327
392
 
328
- finish_btn = gr.Button("Finish Labeling", variant="primary")
393
+ finish_btn = gr.Button("Finish Labeling", variant="primary")
329
394
 
330
- with gr.Accordion("Zero-Shot Inference", open=False) as zero_shot_accordion:
331
- gr.Markdown("""
332
- Uses a VLM to predict the label of the image.
333
- """)
395
+ with gr.Tab("Zero-Shot Inference"):
396
+ gr.Markdown("""
397
+ Uses a VLM to predict the label of the image.
398
+ """)
334
399
 
335
- import xinfer
336
- from xinfer.model_registry import model_registry
337
- from xinfer.types import ModelInputOutput
400
+ import xinfer
401
+ from xinfer.model_registry import model_registry
402
+ from xinfer.types import ModelInputOutput
338
403
 
339
- # Get models and filter for image-to-text models
340
- all_models = model_registry.list_models()
341
- model_list = [
342
- model.id
343
- for model in all_models
344
- if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
345
- ]
404
+ # Get models and filter for image-to-text models
405
+ all_models = model_registry.list_models()
406
+ model_list = [
407
+ model.id
408
+ for model in all_models
409
+ if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
410
+ ]
346
411
 
347
- with gr.Row():
348
412
  with gr.Row():
349
- model_dropdown = gr.Dropdown(
350
- choices=model_list,
351
- label="Select a model",
352
- value="vikhyatk/moondream2",
353
- )
354
- device_dropdown = gr.Dropdown(
355
- choices=["cuda", "cpu"],
356
- label="Device",
357
- value="cuda" if torch.cuda.is_available() else "cpu",
413
+ with gr.Row():
414
+ model_dropdown = gr.Dropdown(
415
+ choices=model_list,
416
+ label="Select a model",
417
+ value="vikhyatk/moondream2",
418
+ )
419
+ device_dropdown = gr.Dropdown(
420
+ choices=["cuda", "cpu"],
421
+ label="Device",
422
+ value="cuda" if torch.cuda.is_available() else "cpu",
423
+ )
424
+ dtype_dropdown = gr.Dropdown(
425
+ choices=["float32", "float16", "bfloat16"],
426
+ label="Data Type",
427
+ value="float16"
428
+ if torch.cuda.is_available()
429
+ else "float32",
430
+ )
431
+
432
+ with gr.Column():
433
+ prompt_textbox = gr.Textbox(
434
+ label="Prompt",
435
+ lines=5,
436
+ value=f"Classify the image into one of the following categories: {self.class_names}. Answer with the category name only.",
437
+ interactive=True,
358
438
  )
359
- dtype_dropdown = gr.Dropdown(
360
- choices=["float32", "float16", "bfloat16"],
361
- label="Data Type",
362
- value="float16" if torch.cuda.is_available() else "float32",
363
- )
364
-
365
- with gr.Column():
366
- prompt_textbox = gr.Textbox(
367
- label="Prompt",
368
- lines=3,
369
- value=f"Classify the image into one of the following categories: {self.class_names}",
370
- interactive=True,
371
- )
372
- inference_btn = gr.Button("Run Inference", variant="primary")
439
+ inference_btn = gr.Button("Run Inference", variant="primary")
373
440
 
374
- result_textbox = gr.Textbox(
375
- label="Result",
376
- lines=3,
377
- interactive=False,
378
- )
441
+ result_textbox = gr.Textbox(
442
+ label="Result",
443
+ lines=3,
444
+ interactive=False,
445
+ )
379
446
 
380
447
  def run_zero_shot_inference(prompt, model, device, dtype, current_filename):
381
448
  model = xinfer.create_model(model, device=device, dtype=dtype)
@@ -407,6 +474,16 @@ class ActiveLearner:
407
474
  next_idx = current_idx + direction
408
475
 
409
476
  if 0 <= next_idx < len(filepaths):
477
+ plot_data = (
478
+ None
479
+ if "pred_raw" not in df.columns
480
+ else pd.DataFrame(
481
+ {
482
+ "class": self.class_names,
483
+ "probability": df["pred_raw"].iloc[next_idx],
484
+ }
485
+ ).nlargest(5, "probability")
486
+ )
410
487
  return (
411
488
  filepaths[next_idx],
412
489
  filepaths[next_idx],
@@ -421,7 +498,18 @@ class ActiveLearner:
421
498
  else None,
422
499
  next_idx,
423
500
  next_idx,
501
+ plot_data,
424
502
  )
503
+ plot_data = (
504
+ None
505
+ if "pred_raw" not in df.columns
506
+ else pd.DataFrame(
507
+ {
508
+ "class": self.class_names,
509
+ "probability": df["pred_raw"].iloc[current_idx],
510
+ }
511
+ ).nlargest(5, "probability")
512
+ )
425
513
  return (
426
514
  filepaths[current_idx],
427
515
  filepaths[current_idx],
@@ -436,6 +524,7 @@ class ActiveLearner:
436
524
  else None,
437
525
  current_idx,
438
526
  current_idx,
527
+ plot_data,
439
528
  )
440
529
 
441
530
  def save_and_next(current_idx, selected_category):
@@ -443,6 +532,16 @@ class ActiveLearner:
443
532
  current_idx = int(current_idx)
444
533
 
445
534
  if selected_category is None:
535
+ plot_data = (
536
+ None
537
+ if "pred_raw" not in df.columns
538
+ else pd.DataFrame(
539
+ {
540
+ "class": self.class_names,
541
+ "probability": df["pred_raw"].iloc[current_idx],
542
+ }
543
+ ).nlargest(5, "probability")
544
+ )
446
545
  return (
447
546
  filepaths[current_idx],
448
547
  filepaths[current_idx],
@@ -457,6 +556,7 @@ class ActiveLearner:
457
556
  else None,
458
557
  current_idx,
459
558
  current_idx,
559
+ plot_data,
460
560
  )
461
561
 
462
562
  # Save the current annotation
@@ -466,6 +566,16 @@ class ActiveLearner:
466
566
  # Move to next image if not at the end
467
567
  next_idx = current_idx + 1
468
568
  if next_idx >= len(filepaths):
569
+ plot_data = (
570
+ None
571
+ if "pred_raw" not in df.columns
572
+ else pd.DataFrame(
573
+ {
574
+ "class": self.class_names,
575
+ "probability": df["pred_raw"].iloc[current_idx],
576
+ }
577
+ ).nlargest(5, "probability")
578
+ )
469
579
  return (
470
580
  filepaths[current_idx],
471
581
  filepaths[current_idx],
@@ -480,7 +590,19 @@ class ActiveLearner:
480
590
  else None,
481
591
  current_idx,
482
592
  current_idx,
593
+ plot_data,
483
594
  )
595
+
596
+ plot_data = (
597
+ None
598
+ if "pred_raw" not in df.columns
599
+ else pd.DataFrame(
600
+ {
601
+ "class": self.class_names,
602
+ "probability": df["pred_raw"].iloc[next_idx],
603
+ }
604
+ ).nlargest(5, "probability")
605
+ )
484
606
  return (
485
607
  filepaths[next_idx],
486
608
  filepaths[next_idx],
@@ -495,6 +617,7 @@ class ActiveLearner:
495
617
  else None,
496
618
  next_idx,
497
619
  next_idx,
620
+ plot_data,
498
621
  )
499
622
 
500
623
  def convert_csv_to_parquet():
@@ -519,6 +642,7 @@ class ActiveLearner:
519
642
  category,
520
643
  current_index,
521
644
  progress,
645
+ pred_plot,
522
646
  ],
523
647
  )
524
648
 
@@ -533,6 +657,7 @@ class ActiveLearner:
533
657
  category,
534
658
  current_index,
535
659
  progress,
660
+ pred_plot,
536
661
  ],
537
662
  )
538
663
 
@@ -547,6 +672,7 @@ class ActiveLearner:
547
672
  category,
548
673
  current_index,
549
674
  progress,
675
+ pred_plot,
550
676
  ],
551
677
  )
552
678
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: active-vision
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Active learning for edge vision.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -17,10 +17,10 @@ Requires-Dist: timm>=1.0.13
17
17
  Requires-Dist: transformers>=4.48.0
18
18
  Requires-Dist: xinfer>=0.3.2
19
19
 
20
- ![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
21
- ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
22
- [![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge)](https://pypi.org/project/active-vision/)
23
- ![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)
20
+ [![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge&logo=python&logoColor=white)](https://pypi.org/project/active-vision/)
21
+ [![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/active-vision/)
22
+ [![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)](https://pypi.org/project/active-vision/)
23
+ [![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge&logo=apache&logoColor=white)](https://github.com/dnth/active-vision/blob/main/LICENSE)
24
24
 
25
25
  <p align="center">
26
26
  <img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/logo.png" alt="active-vision">
@@ -47,9 +47,9 @@ The goal of this project is to create a framework for the active learning loop f
47
47
 
48
48
  Uncertainty Sampling:
49
49
  - [X] Least confidence
50
- - [ ] Margin of confidence
51
- - [ ] Ratio of confidence
52
- - [ ] Entropy
50
+ - [X] Margin of confidence
51
+ - [X] Ratio of confidence
52
+ - [X] Entropy
53
53
 
54
54
  Diverse Sampling:
55
55
  - [X] Random sampling
@@ -71,17 +71,18 @@ cd active-vision
71
71
  pip install -e .
72
72
  ```
73
73
 
74
- I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
75
-
76
- If you're using uv:
77
-
78
- ```bash
79
- uv venv
80
- uv sync
81
- ```
82
- Once the virtual environment is created, you can install the package using pip.
83
74
 
84
75
  > [!TIP]
76
+ > I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
77
+ >
78
+ > If you're using uv:
79
+ >
80
+ > ```bash
81
+ > uv venv
82
+ > uv sync
83
+ > ```
84
+ > Once the virtual environment is created, you can install the package using pip.
85
+ >
85
86
  > If you're using uv add a `uv` before the pip install command to install into your virtual environment. Eg:
86
87
  > ```bash
87
88
  > uv pip install active-vision
@@ -120,12 +121,16 @@ pred_df = al.predict(filepaths)
120
121
  # Sample low confidence predictions from unlabeled set
121
122
  uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
122
123
 
123
- # Launch a Gradio UI to label the low confidence samples
124
+ # Launch a Gradio UI to label the low confidence samples, save the labeled samples to a file
124
125
  al.label(uncertain_df, output_filename="uncertain")
125
126
  ```
126
127
 
127
128
  ![Gradio UI](https://raw.githubusercontent.com/dnth/active-vision/main/assets/labeling_ui.png)
128
129
 
130
+ In the UI, you can optionally run zero-shot inference on the image. This will use a VLM to predict the label of the image. There are a dozen VLM models as supported in the [x.infer project](https://github.com/dnth/x.infer).
131
+
132
+ ![Zero-Shot Inference](https://raw.githubusercontent.com/dnth/active-vision/main/assets/zero_shot_ui.png)
133
+
129
134
  Once complete, the labeled samples will be save into a new df.
130
135
  We can now add the newly labeled data to the training set.
131
136
 
@@ -167,15 +172,15 @@ The active learning loop is a iterative process and can keep going until you hit
167
172
  - You hit a budget.
168
173
  - Other criteria.
169
174
 
170
- For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
175
+ For this dataset, I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set exceeds the top performing model on the leaderboard.
171
176
 
172
177
 
173
- | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
174
- |-----------------|---------------------|--------------|----------------------|----------------|--------|
175
- | 9469 | 94.90% | 80 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
176
- | 9469 | 95.11% | 200 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
177
- | 275 | 99.33% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
178
- | 275 | 93.40% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
178
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
179
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |------------------------------------------------------------------------------------- |
180
+ | 9469 | 94.90% | 80 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
181
+ | 9469 | 95.11% | 200 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
182
+ | 275 | 99.33% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
183
+ | 275 | 93.40% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
179
184
 
180
185
  ### Dog Food
181
186
  - num classes: 2
@@ -185,11 +190,11 @@ To start the active learning loop, I labeled 20 images (10 images from each clas
185
190
 
186
191
  I decided to stop the active learning loop at 160 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
187
192
 
188
- | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
189
- |-----------------|---------------------|--------------|-------|----------------|--------|
190
- | 2100 | 99.70% | ? | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
191
- | 160 | 100.00% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
192
- | 160 | 97.60% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
193
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
194
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |--------------------------------------------------------------------------------------------- |
195
+ | 2100 | 99.70% | ? | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
196
+ | 160 | 100.00% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
197
+ | 160 | 97.60% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
193
198
 
194
199
  ### Oxford-IIIT Pet
195
200
  - num classes: 37
@@ -199,13 +204,27 @@ To start the active learning loop, I labeled 370 images (10 images from each cla
199
204
 
200
205
  I decided to stop the active learning loop at 612 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
201
206
 
202
- | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
203
- |-----------------|---------------------|--------------|-------|----------------|--------|
204
- | 3680 | 95.40% | 5 | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
205
- | 612 | 90.26% | 11 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
206
- | 612 | 91.38% | 11 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
207
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
208
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |------------------------------------------------------------------------------------------------- |
209
+ | 3680 | 95.40% | 5 | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
210
+ | 612 | 90.26% | 11 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
211
+ | 612 | 91.38% | 11 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
212
+
213
+ ### Eurosat RGB
214
+ - num classes: 10
215
+ - num images: 16100
216
+
217
+ To start the active learning loop, I labeled 100 images (10 images from each class) and iteratively labeled the most informative images until I hit 1188 labeled images.
218
+
219
+ I decided to stop the active learning loop at 1188 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
207
220
 
208
221
 
222
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
223
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |-------------------------------------------------------------------------------------------- |
224
+ | 16100 | 98.55% | 6 | vit-base-patch16-224 | ❌ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/03_train_all.ipynb) |
225
+ | 1188 | 94.59% | 6 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb) |
226
+ | 1188 | 96.57% | 13 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb) |
227
+
209
228
 
210
229
  ## ➿ Workflow
211
230
  This section describes a more detailed workflow for active learning. There are two workflows for active learning that we can use depending on the availability of labeled data.
@@ -273,55 +292,21 @@ graph TD
273
292
 
274
293
 
275
294
 
276
- <!-- ## Methodology
277
- To test out the workflows we will use the [imagenette dataset](https://huggingface.co/datasets/frgfm/imagenette). But this will be applicable to any dataset.
278
-
279
- Imagenette is a subset of the ImageNet dataset with 10 classes. We will use this dataset to test out the workflows. Additionally, Imagenette has an existing leaderboard which we can use to evaluate the performance of the models.
280
-
281
- ### Step 1: Download the dataset
282
- Download the imagenette dataset. The imagenette dataset has a train and validation split. Since the leaderboard is based on the validation set, we will evalutate the performance of our model on the validation set to make it easier to compare to the leaderboard.
283
-
284
- We will treat the imagenette train set as a unlabeled set and iteratively sample from it while monitoring the performance on the validation set. Ideally we will be able to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
295
+ ## 🧱 Sampling Approaches
285
296
 
286
- I've processed the imagenette dataset and uploaded it to the hub. You can download it from [here](https://huggingface.co/datasets/dnth/active-learning-imagenette).
297
+ Recommendation 1:
298
+ - 10% randomly selected from unlabeled items.
299
+ - 80% selected from the lowest confidence items.
300
+ - 10% selected as outliers.
287
301
 
288
- To load the dataset, you can use the following code:
289
- ```python
290
- from datasets import load_dataset
291
-
292
- unlabeled_dataset = load_dataset("dnth/active-learning-imagenette", "unlabeled")
293
- eval_dataset = load_dataset("dnth/active-learning-imagenette", "evaluation")
294
- ```
302
+ Recommendation 2:
295
303
 
296
- ### Step 2: Initial Sampling
297
- Label an initial dataset of 10 images from each class. This will give us a small proxy dataset to train our model on. The sampling will be done randomly. There are more intelligent sampling strategies but we will start with random sampling.
304
+ - Sample 100 predicted images at 10–20% confidence.
305
+ - Sample 100 predicted images at 20–30% confidence.
306
+ - Sample 100 predicted images at 30–40% confidence, and so on.
298
307
 
299
- ### Step 3: Training the proxy model
300
- Train a proxy model on the initial dataset. The proxy model will be a small model that is easy to train and deploy. We will use the fastai framework to train the model. We will use the resnet18 architecture as a starting point. Once training is complete, compute the accuracy of the proxy model on the validation set and compare it to the leaderboard.
301
308
 
302
- > [!TIP]
303
- > With the initial model we got 91.24% accuracy on the validation set. See the [notebook](./nbs/01_initial_sampling.ipynb) for more details.
304
- > | Train Epochs | Number of Images | Validation Accuracy | Source |
305
- > |--------------|-----------------|----------------------|------------------|
306
- > | 10 | 100 | 91.24% | Initial sampling [notebook](./nbs/01_initial_sampling.ipynb) |
307
- > | 80 | 9469 | 94.90% | fastai |
308
- > | 200 | 9469 | 95.11% | fastai |
309
+ Uncertainty and diversity sampling are most effective when combined. For instance, you could first sample the most uncertain items using an uncertainty sampling method, then apply a diversity sampling method such as clustering to select a diverse set from the uncertain items.
309
310
 
311
+ Ultimately, the right ratios can depend on the specific task and dataset.
310
312
 
311
-
312
- ### Step 4: Inference on the unlabeled dataset
313
- Run inference on the unlabeled dataset (the remaining imagenette train set) and evaluate the performance of the proxy model.
314
-
315
- ### Step 5: Active learning
316
- Use active learning to select the most informative images to label from the unlabeled set. Pick the top 10 images from the unlabeled set that the proxy model is least confident about and label them.
317
-
318
- ### Step 6: Repeat
319
- Repeat step 3 - 5 until the performance on the validation set is close to the leaderboard. Note the number of labeled images vs the performance on the validation set. Ideally we want to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
320
-
321
-
322
- After the first iteration we got 94.57% accuracy on the validation set. See the [notebook](./nbs/03_retrain_model.ipynb) for more details.
323
-
324
- > [!TIP]
325
- > | Train Epochs | Number of Images | Validation Accuracy | Source |
326
- > |--------------|-----------------|----------------------|------------------|
327
- > | 10 | 200 | 94.57% | First relabeling [notebook](./nbs/03_retrain_model.ipynb) | -->
@@ -0,0 +1,7 @@
1
+ active_vision/__init__.py,sha256=SxR6MPyULKlvx-86S3NIk46Tz1xlN-g_vI_aW3LitG4,43
2
+ active_vision/core.py,sha256=4Nl8e3isinIlzcD6bCbG9TTGiuG0PQkKNUIvnAsbaTY,27373
3
+ active_vision-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
+ active_vision-0.2.0.dist-info/METADATA,sha256=3XvDTC1Cnxd3rIUUXyY8MwTgKGcnncN9D2VvKnkw1jQ,15675
5
+ active_vision-0.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ active_vision-0.2.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
+ active_vision-0.2.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- active_vision/__init__.py,sha256=dDQijes3C7zAUc_08TyblLSP6Lk0PcPPI8PYgEliKCI,43
2
- active_vision/core.py,sha256=D_ve-nMv2EWSaQCOBTggleo-1op8JHXchk0QLicGDqg,21715
3
- active_vision-0.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
- active_vision-0.1.0.dist-info/METADATA,sha256=aA793OK3PGKnKVchMQthXl1H14xcBh_kq9tAO9o6jf0,15944
5
- active_vision-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- active_vision-0.1.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
- active_vision-0.1.0.dist-info/RECORD,,