active-vision 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
active_vision/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- __version__ = "0.4.1"
1
+ __version__ = "0.4.2"
2
2
 
3
3
  from .core import *
active_vision/core.py CHANGED
@@ -20,8 +20,11 @@ from fastai.vision.all import (
20
20
  valley,
21
21
  vision_learner,
22
22
  )
23
+ from itables import show
23
24
  from loguru import logger
24
25
 
26
+ from .utils import get_base64_image
27
+
25
28
  warnings.filterwarnings("ignore", category=FutureWarning)
26
29
  pd.set_option("display.max_colwidth", 50)
27
30
 
@@ -102,6 +105,9 @@ class ActiveLearner:
102
105
  self.valid_set = self.learn.dls.valid_ds.items
103
106
  self.class_names = self.dls.vocab
104
107
  self.num_classes = self.dls.c
108
+
109
+ logger.info(f"Training set size: {len(self.train_set)}")
110
+ logger.info(f"Validation set size: {len(self.valid_set)}")
105
111
  logger.info("Done. Ready to train.")
106
112
 
107
113
  def load_dataset(
@@ -209,7 +215,9 @@ class ActiveLearner:
209
215
  logger.info(f"Learning rate: {lr} with one-cycle learning rate scheduler")
210
216
  self.learn.fine_tune(epochs, lr, freeze_epochs=head_tuning_epochs)
211
217
 
212
- def predict(self, filepaths: list[str], batch_size: int = 16):
218
+ def predict(
219
+ self, filepaths: list[str], batch_size: int = 16, interactive: bool = False
220
+ ):
213
221
  """
214
222
  Run inference on an unlabeled dataset. Returns a df with filepaths and predicted labels, and confidence scores.
215
223
  """
@@ -257,47 +265,95 @@ class ActiveLearner:
257
265
  lambda x: [round(e, 4) for e in x]
258
266
  )
259
267
 
268
+ if interactive:
269
+ logger.info("Rendering interactive table")
270
+ interactive_pred_df = self.pred_df.copy()
271
+ interactive_pred_df["image"] = interactive_pred_df["filepath"].apply(
272
+ get_base64_image
273
+ )
274
+ interactive_pred_df = interactive_pred_df[
275
+ ["image", "filepath", "pred_label", "pred_conf", "logits", "embeddings"]
276
+ ]
277
+
278
+ show(
279
+ interactive_pred_df,
280
+ columnDefs=[{"width": "200px", "targets": "_all"}],
281
+ style="width:1200px",
282
+ autoWidth=False,
283
+ )
284
+
260
285
  return self.pred_df
261
286
 
262
287
  def evaluate(
263
- self, df: pd.DataFrame, filepath_col: str, label_col: str, batch_size: int = 16
288
+ self,
289
+ df: pd.DataFrame,
290
+ filepath_col: str,
291
+ label_col: str,
292
+ batch_size: int = 16,
293
+ interactive: bool = False,
264
294
  ):
265
295
  """
266
296
  Evaluate on a labeled dataset. Returns a score.
267
297
  """
268
298
  self.eval_set = df.copy()
269
299
 
270
- filepaths = self.eval_set[filepath_col].tolist()
271
- labels = self.eval_set[label_col].tolist()
272
- test_dl = self.dls.test_dl(filepaths, bs=batch_size)
273
- preds, _, cls_preds = self.learn.get_preds(dl=test_dl, with_decoded=True)
300
+ test_dl = self.dls.test_dl(self.eval_set, bs=batch_size, with_labels=True)
301
+ probs, targs, cls_preds, loss = self.learn.get_preds(
302
+ dl=test_dl, with_decoded=True, with_loss=True
303
+ )
274
304
 
275
- self.eval_df = pd.DataFrame(
305
+ eval_df = pd.DataFrame(
276
306
  {
277
- "filepath": filepaths,
278
- "label": labels,
307
+ "filepath": self.eval_set[filepath_col].tolist(),
308
+ "label": self.eval_set[label_col].tolist(),
279
309
  "pred_label": [self.learn.dls.vocab[i] for i in cls_preds.numpy()],
310
+ "pred_conf": torch.max(F.softmax(probs, dim=1), dim=1)[0].numpy(),
311
+ "loss": loss.numpy().tolist(),
280
312
  }
281
313
  )
282
314
 
283
- accuracy = float((self.eval_df["label"] == self.eval_df["pred_label"]).mean())
315
+ accuracy = float((eval_df["label"] == eval_df["pred_label"]).mean())
284
316
  self.eval_accuracy = accuracy
285
317
  logger.info(f"Accuracy: {accuracy:.2%}")
286
- return accuracy
318
+
319
+ if interactive:
320
+ logger.info("Rendering interactive table")
321
+
322
+ interactive_eval_df = eval_df.copy()
323
+ interactive_eval_df["image"] = interactive_eval_df["filepath"].apply(
324
+ get_base64_image
325
+ )
326
+ interactive_eval_df = interactive_eval_df[
327
+ ["image", "filepath", "label", "pred_label", "pred_conf", "loss"]
328
+ ]
329
+
330
+ show(
331
+ interactive_eval_df,
332
+ columnDefs=[{"width": "200px", "targets": "_all"}],
333
+ style="width:1200px",
334
+ autoWidth=False,
335
+ )
336
+
337
+ return eval_df
287
338
 
288
339
  def sample_uncertain(
289
- self, df: pd.DataFrame, num_samples: int, strategy: str = "least-confidence"
340
+ self, df: pd.DataFrame, num_samples: int, strategy: str = "least-confidence", interactive: bool = False
290
341
  ):
291
342
  """
292
343
  Sample top `num_samples` low confidence samples. Returns a df with filepaths and predicted labels, and confidence scores.
293
344
 
345
+ Args:
346
+ df: DataFrame with predictions
347
+ num_samples: Number of samples to select
348
+ strategy: Sampling strategy to use
349
+ interactive: Whether to display an interactive table of results
350
+
294
351
  Strategies:
295
- - least-confidence: Get top `num_samples` low confidence samples.
296
- - margin-of-confidence: Get top `num_samples` samples with the smallest margin between the top two predictions.
297
- - ratio-of-confidence: Get top `num_samples` samples with the highest ratio between the top two predictions.
298
- - entropy: Get top `num_samples` samples with the highest entropy.
352
+ - least-confidence: Get top `num_samples` low confidence samples.
353
+ - margin-of-confidence: Get top `num_samples` samples with the smallest margin between the top two predictions.
354
+ - ratio-of-confidence: Get top `num_samples` samples with the highest ratio between the top two predictions.
355
+ - entropy: Get top `num_samples` samples with the highest entropy.
299
356
  """
300
-
301
357
  # Remove samples that is already in the training set
302
358
  df = df[~df["filepath"].isin(self.dataset["filepath"])].copy()
303
359
 
@@ -366,20 +422,41 @@ class ActiveLearner:
366
422
  ]
367
423
 
368
424
  df["score"] = df["score"].round(4)
425
+ result_df = df.sort_values(by="score", ascending=False).head(num_samples)
426
+
427
+ if interactive:
428
+ logger.info("Rendering interactive table")
429
+ interactive_df = result_df.copy()
430
+ interactive_df["image"] = interactive_df["filepath"].apply(get_base64_image)
431
+ interactive_df = interactive_df[
432
+ ["image", "filepath", "strategy", "score", "pred_label", "pred_conf"]
433
+ ]
369
434
 
370
- return df.sort_values(by="score", ascending=False).head(num_samples)
435
+ show(
436
+ interactive_df,
437
+ columnDefs=[{"width": "200px", "targets": "_all"}],
438
+ style="width:1200px",
439
+ autoWidth=False,
440
+ )
441
+
442
+ return result_df
371
443
 
372
444
  def sample_diverse(
373
- self, df: pd.DataFrame, num_samples: int, strategy: str = "model-based-outlier"
445
+ self, df: pd.DataFrame, num_samples: int, strategy: str = "model-based-outlier", interactive: bool = False
374
446
  ):
375
447
  """
376
448
  Sample top `num_samples` diverse samples. Returns a df with filepaths and predicted labels, and confidence scores.
377
449
 
378
- Strategies:
379
- - model-based-outlier: Get top `num_samples` samples with lowest activation of the model's last layer.
380
- - cluster-based: Get top `num_samples` samples with the highest distance to the nearest neighbor.
381
- - representative: Get top `num_samples` samples with the highest distance to the centroid of the training set.
450
+ Args:
451
+ df: DataFrame with predictions
452
+ num_samples: Number of samples to select
453
+ strategy: Sampling strategy to use
454
+ interactive: Whether to display an interactive table of results
382
455
 
456
+ Strategies:
457
+ - model-based-outlier: Get top `num_samples` samples with lowest activation of the model's last layer.
458
+ - cluster-based: Get top `num_samples` samples with the highest distance to the nearest neighbor.
459
+ - representative: Get top `num_samples` samples with the highest distance to the centroid of the training set.
383
460
  """
384
461
  # Remove samples that is already in the training set
385
462
  df = df[~df["filepath"].isin(self.dataset["filepath"])].copy()
@@ -444,19 +521,39 @@ class ActiveLearner:
444
521
  ]
445
522
 
446
523
  df["score"] = df["score"].round(4)
524
+ result_df = df.sort_values(by="score", ascending=False).head(num_samples)
525
+
526
+ if interactive:
527
+ logger.info("Rendering interactive table")
528
+ interactive_df = result_df.copy()
529
+ interactive_df["image"] = interactive_df["filepath"].apply(get_base64_image)
530
+ interactive_df = interactive_df[
531
+ ["image", "filepath", "strategy", "score", "pred_label", "pred_conf"]
532
+ ]
447
533
 
448
- # Sort by score ascending higher rank = more outlier-like compared to the validation set
449
- return df.sort_values(by="score", ascending=False).head(num_samples)
534
+ show(
535
+ interactive_df,
536
+ columnDefs=[{"width": "200px", "targets": "_all"}],
537
+ style="width:1200px",
538
+ autoWidth=False,
539
+ )
540
+
541
+ return result_df
450
542
 
451
543
  else:
452
544
  logger.error(f"Unknown strategy: {strategy}")
453
545
  raise ValueError(f"Unknown strategy: {strategy}")
454
546
 
455
- def sample_random(self, df: pd.DataFrame, num_samples: int, seed: int = None):
547
+ def sample_random(self, df: pd.DataFrame, num_samples: int, seed: int = None, interactive: bool = False):
456
548
  """
457
549
  Sample `num_samples` random samples. Returns a df with filepaths and predicted labels, and confidence scores.
458
- """
459
550
 
551
+ Args:
552
+ df: DataFrame with predictions
553
+ num_samples: Number of samples to select
554
+ seed: Random seed for reproducibility
555
+ interactive: Whether to display an interactive table of results
556
+ """
460
557
  logger.info(f"Sampling {num_samples} random samples")
461
558
  df = df[~df["filepath"].isin(self.dataset["filepath"])].copy()
462
559
  df["strategy"] = "random"
@@ -464,9 +561,26 @@ class ActiveLearner:
464
561
 
465
562
  if seed is not None:
466
563
  logger.info(f"Using seed: {seed}")
467
- return df.sample(n=num_samples, random_state=seed)
564
+ result_df = df.sample(n=num_samples, random_state=seed)
565
+
566
+ if interactive:
567
+ logger.info("Rendering interactive table")
568
+ interactive_df = result_df.copy()
569
+ interactive_df["image"] = interactive_df["filepath"].apply(get_base64_image)
570
+ interactive_df = interactive_df[
571
+ ["image", "filepath", "strategy", "score", "pred_label", "pred_conf"]
572
+ ]
573
+
574
+ show(
575
+ interactive_df,
576
+ columnDefs=[{"width": "200px", "targets": "_all"}],
577
+ style="width:1200px",
578
+ autoWidth=False,
579
+ )
580
+
581
+ return result_df
468
582
 
469
- def sample_combination(self, df: pd.DataFrame, num_samples: int, combination: dict):
583
+ def sample_combination(self, df: pd.DataFrame, num_samples: int, combination: dict, interactive: bool = False):
470
584
  """
471
585
  Sample samples based on a combination of strategies.
472
586
 
@@ -491,6 +605,7 @@ class ActiveLearner:
491
605
  - representative
492
606
  Other:
493
607
  - random
608
+ interactive: Whether to display an interactive table of results
494
609
 
495
610
  Returns:
496
611
  DataFrame containing the combined samples
@@ -499,9 +614,7 @@ class ActiveLearner:
499
614
 
500
615
  # Validate total proportions sum to 1
501
616
  if not np.isclose(sum(combination.values()), 1.0):
502
- raise ValueError(
503
- f"Proportions must sum to 1, got {sum(combination.values())}"
504
- )
617
+ raise ValueError(f"Proportions must sum to 1, got {sum(combination.values())}")
505
618
 
506
619
  # Calculate samples per strategy and handle rounding
507
620
  samples_per_strategy = {
@@ -528,14 +641,14 @@ class ActiveLearner:
528
641
  "entropy",
529
642
  ]:
530
643
  strategy_df = self.sample_uncertain(
531
- df=df, num_samples=n_samples, strategy=strategy
644
+ df=df, num_samples=n_samples, strategy=strategy, interactive=False
532
645
  )
533
646
  elif strategy in ["model-based-outlier", "cluster-based", "representative"]:
534
647
  strategy_df = self.sample_diverse(
535
- df=df, num_samples=n_samples, strategy=strategy
648
+ df=df, num_samples=n_samples, strategy=strategy, interactive=False
536
649
  )
537
650
  elif strategy == "random":
538
- strategy_df = self.sample_random(df=df, num_samples=n_samples)
651
+ strategy_df = self.sample_random(df=df, num_samples=n_samples, interactive=False)
539
652
  else:
540
653
  raise ValueError(f"Unknown strategy: {strategy}")
541
654
 
@@ -543,7 +656,24 @@ class ActiveLearner:
543
656
  # Remove selected samples from the pool to avoid duplicates
544
657
  df = df[~df["filepath"].isin(strategy_df["filepath"])]
545
658
 
546
- return pd.concat(sampled_dfs, ignore_index=True)
659
+ combined_df = pd.concat(sampled_dfs, ignore_index=True)
660
+
661
+ if interactive:
662
+ logger.info("Rendering interactive table")
663
+ interactive_df = combined_df.copy()
664
+ interactive_df["image"] = interactive_df["filepath"].apply(get_base64_image)
665
+ interactive_df = interactive_df[
666
+ ["image", "filepath", "strategy", "score", "pred_label", "pred_conf"]
667
+ ]
668
+
669
+ show(
670
+ interactive_df,
671
+ columnDefs=[{"width": "200px", "targets": "_all"}],
672
+ style="width:1200px",
673
+ autoWidth=False,
674
+ )
675
+
676
+ return combined_df
547
677
 
548
678
  def summary(self, filename: str = None, show: bool = True):
549
679
  results_df = pd.DataFrame(
@@ -567,13 +697,22 @@ class ActiveLearner:
567
697
  if filename is None:
568
698
  # Generate filename with timestamp, accuracy and dataset size
569
699
  from datetime import datetime
700
+
570
701
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
571
- accuracy_str = f"{self.eval_accuracy:.2%}" if self.eval_accuracy is not None else "no_eval"
702
+ accuracy_str = (
703
+ f"{self.eval_accuracy:.2%}"
704
+ if self.eval_accuracy is not None
705
+ else "no_eval"
706
+ )
572
707
  dataset_size = len(self.train_set) + len(self.valid_set)
573
- filename = f"{self.name}_{timestamp}_acc_{accuracy_str}_n_{dataset_size}.parquet"
708
+ filename = f"{self.name}_{timestamp}_acc_{accuracy_str}_n_{dataset_size}_results.parquet"
574
709
  elif not filename.endswith(".parquet"):
575
710
  filename = f"{filename}.parquet"
576
711
 
712
+ logger.info("Saving learner to pickle file")
713
+ learner_name = filename.replace("_results.parquet", "_learner.pkl")
714
+ self.learn.export(learner_name)
715
+
577
716
  results_df.to_parquet(filename)
578
717
  logger.info(f"Saved results to {filename}")
579
718
  if show:
@@ -597,12 +736,14 @@ class ActiveLearner:
597
736
  return;
598
737
  }
599
738
 
600
- if (e.key === "ArrowUp") {
739
+ if (e.key === "ArrowUp" || e.key === "w") {
601
740
  document.getElementById("submit_btn").click();
602
- } else if (e.key === "ArrowRight") {
741
+ } else if (e.key === "ArrowRight" || e.key === "d") {
603
742
  document.getElementById("next_btn").click();
604
- } else if (e.key === "ArrowLeft") {
743
+ } else if (e.key === "ArrowLeft" || e.key === "a") {
605
744
  document.getElementById("back_btn").click();
745
+ } else if (e.key === "ArrowDown" || e.key === "s") {
746
+ document.getElementById("finish_btn").click();
606
747
  }
607
748
  }
608
749
  document.addEventListener('keydown', shortcuts, false);
@@ -710,6 +851,9 @@ class ActiveLearner:
710
851
  interactive=True,
711
852
  )
712
853
 
854
+ # Add element_id to finish button
855
+ finish_btn = gr.Button("Finish Labeling", variant="primary", elem_id="finish_btn")
856
+
713
857
  # Add event handler for slider changes
714
858
  progress.change(
715
859
  fn=lambda idx: navigate(idx, 0),
@@ -728,8 +872,6 @@ class ActiveLearner:
728
872
  ],
729
873
  )
730
874
 
731
- finish_btn = gr.Button("Finish Labeling", variant="primary")
732
-
733
875
  with gr.Tab("Zero-Shot Inference"):
734
876
  gr.Markdown("""
735
877
  Uses a VLM to predict the label of the image.
active_vision/utils.py ADDED
@@ -0,0 +1,24 @@
1
+ import base64
2
+ from io import BytesIO
3
+
4
+ from loguru import logger
5
+ from PIL import Image
6
+
7
+
8
+ def get_base64_image(filepath, width=200):
9
+ try:
10
+ with Image.open(filepath) as img:
11
+ # Convert to RGB if needed
12
+ if img.mode != "RGB":
13
+ img = img.convert("RGB")
14
+
15
+ aspect_ratio = img.height / img.width
16
+ height = int(width * aspect_ratio)
17
+ img = img.resize((width, height), Image.Resampling.LANCZOS)
18
+ buffered = BytesIO()
19
+ img.save(buffered, format="JPEG")
20
+ img_str = base64.b64encode(buffered.getvalue()).decode()
21
+ return f'<img src="data:image/jpeg;base64,{img_str}" width="{width}" alt="Sample Image">'
22
+ except Exception as e:
23
+ logger.warning(f"Failed to encode image {filepath}: {e}")
24
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: active-vision
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Active learning for computer vision.
5
5
  Project-URL: Homepage, https://github.com/dnth/active-vision
6
6
  Project-URL: Bug Tracker, https://github.com/dnth/active-vision/issues
@@ -12,6 +12,7 @@ Requires-Dist: fastai>=2.7.18
12
12
  Requires-Dist: gradio>=5.12.0
13
13
  Requires-Dist: ipykernel>=6.29.5
14
14
  Requires-Dist: ipywidgets>=8.1.5
15
+ Requires-Dist: itables>=2.2.4
15
16
  Requires-Dist: loguru>=0.7.3
16
17
  Requires-Dist: seaborn>=0.13.2
17
18
  Requires-Dist: timm>=1.0.13
@@ -59,7 +60,7 @@ Description-Content-Type: text/markdown
59
60
  <br />
60
61
  <a href="https://dnth.github.io/active-vision" target="_blank" rel="noopener noreferrer"><strong>Explore the docs »</strong></a>
61
62
  <br />
62
- <a href="#️-quickstart" target="_blank" rel="noopener noreferrer">Quickstart</a>
63
+ <a href="#-quickstart" target="_blank" rel="noopener noreferrer">Quickstart</a>
63
64
  ·
64
65
  <a href="https://github.com/dnth/active-vision/issues/new?assignees=&labels=Feature+Request&projects=&template=feature_request.md" target="_blank" rel="noopener noreferrer">Feature Request</a>
65
66
  ·
@@ -109,7 +110,12 @@ Get a release from PyPI
109
110
  pip install active-vision
110
111
  ```
111
112
 
112
- Install from source
113
+ Install bleeding edge from source
114
+ ```bash
115
+ pip install git+https://github.com/dnth/active-vision.git
116
+ ```
117
+
118
+ Local install
113
119
  ```bash
114
120
  git clone https://github.com/dnth/active-vision.git
115
121
  cd active-vision
@@ -135,8 +141,8 @@ pip install -e .
135
141
 
136
142
  ## 🚀 Quickstart
137
143
 
138
- [![Open In Colab][colab_badge]](https://colab.research.google.com/github/dnth/active-vision/blob/main/nbs/imagenette/quickstart.ipynb)
139
- [![Open In Kaggle][kaggle_badge]](https://kaggle.com/kernels/welcome?src=https://github.com/dnth/active-vision/blob/main/nbs/imagenette/quickstart.ipynb)
144
+ [![Open In Colab][colab_badge]](https://colab.research.google.com/github/dnth/active-vision/blob/main/docs/quickstart.ipynb)
145
+ [![Open In Kaggle][kaggle_badge]](https://kaggle.com/kernels/welcome?src=https://github.com/dnth/active-vision/blob/main/docs/quickstart.ipynb)
140
146
 
141
147
  The following are code snippets for the active learning loop in active-vision. I recommend running the quickstart notebook in Colab or Kaggle to see the full workflow.
142
148
 
@@ -275,7 +281,34 @@ I decided to stop the active learning loop at 1188 labeled images because the pe
275
281
  | 1188 | 96.57% | 13 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb) |
276
282
 
277
283
 
278
- ## ➿ Workflow
284
+ ### Beans
285
+ - num classes: 3
286
+ - num images: 1034
287
+
288
+ | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
289
+ |----------------: |--------------------: |-------------: |---------------------- |:---------------: |-------------------------------------------------------------------------------------------- |
290
+ | 380 | 95.31% | 13 | vit_small_patch16_224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/beans/active_learning.ipynb) |
291
+ | 1034 | 98.43% | 13 | vit_small_patch16_224 | ❌ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/beans/train_all.ipynb) |
292
+
293
+
294
+ ## 🧱 Sampling Approaches
295
+
296
+ Uncertainty and diversity sampling are most effective when combined. Some recommmendations:
297
+
298
+ - Least Confidence Sampling with Cluster-Based Sampling: This approach first selects a large sample of the most uncertain items using least confidence sampling, and then applies cluster-based sampling to ensure diversity within that selection. This method helps to select data points that are both uncertain and representative of different clusters in the data.
299
+
300
+ - Uncertainty Sampling with Model-Based Outliers: This strategy combines uncertainty sampling to find items near the decision boundary with model-based outlier detection to identify items with features that are relatively unknown to the current model. This approach aims to maximize the model's current confusion by selecting items that are both uncertain and different from the current training data.
301
+
302
+ - Uncertainty Sampling with Model-Based Outliers and Clustering: To address the issue of the previous approach potentially oversampling similar items, clustering can be applied after using uncertainty sampling with model-based outliers to ensure diversity.
303
+
304
+ - Representative Sampling with Cluster-Based Sampling: This method addresses the limitation of basic representative sampling by independently clustering both the training data and the unlabeled data. It then identifies clusters that are most representative of the unlabeled data and oversamples from those clusters. This leads to a more diverse set of items compared to representative sampling alone.
305
+
306
+ - Sampling from the Highest-Entropy Cluster: This method combines clustering with uncertainty by selecting the cluster with the highest average uncertainty (using entropy). This method aims to sample data points from the cluster that straddles the decision boundary most closely.
307
+
308
+ - Combining Active Learning Scores: Rather than filtering the output of one sampling strategy with another, this approach combines the scores from different sampling strategies and ranks items based on an aggregate score. This allows for a more nuanced approach to selecting items.
309
+
310
+
311
+ ## ➿ Workflows
279
312
  This section describes a more detailed workflow for active learning. There are two workflows for active learning that we can use depending on the availability of labeled data.
280
313
 
281
314
  ### With unlabeled data
@@ -340,22 +373,6 @@ graph TD
340
373
  ```
341
374
 
342
375
 
376
+ ## 📚 Acknowledgements
343
377
 
344
- ## 🧱 Sampling Approaches
345
-
346
- Recommendation 1:
347
- - 10% randomly selected from unlabeled items.
348
- - 80% selected from the lowest confidence items.
349
- - 10% selected as outliers.
350
-
351
- Recommendation 2:
352
-
353
- - Sample 100 predicted images at 10–20% confidence.
354
- - Sample 100 predicted images at 20–30% confidence.
355
- - Sample 100 predicted images at 30–40% confidence, and so on.
356
-
357
-
358
- Uncertainty and diversity sampling are most effective when combined. For instance, you could first sample the most uncertain items using an uncertainty sampling method, then apply a diversity sampling method such as clustering to select a diverse set from the uncertain items.
359
-
360
- Ultimately, the right ratios can depend on the specific task and dataset.
361
-
378
+ This project is inspired by the book [Human-in-the-Loop Machine Learning by Robert Monarch](https://www.manning.com/books/human-in-the-loop-machine-learning).
@@ -0,0 +1,7 @@
1
+ active_vision/__init__.py,sha256=WHlXpfYbX7Qp1P6kfuEm60U1xl8_A6fQPVJDSLjojYo,43
2
+ active_vision/core.py,sha256=51fskr1E2QLwgxuKzv75yxqH73ktIxEPovxqN7LWibM,47081
3
+ active_vision/utils.py,sha256=IV8RbxCX2Owk0-fF0-LK53YJr8erHQjYb6gSrj9oACo,841
4
+ active_vision-0.4.2.dist-info/METADATA,sha256=v_fUTAj-2vn33eQJH3aafPAMw3KINJe-u9XK-2Nojyk,19812
5
+ active_vision-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ active_vision-0.4.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
7
+ active_vision-0.4.2.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- active_vision/__init__.py,sha256=vauWDAlrr6fiIylIKSzErXOEopRtTsBk8G4hC9418M0,43
2
- active_vision/core.py,sha256=ZDRylM3KsoLxy9qA9bld4WxzcKcyCwH8IJ1cFxtz5mE,41607
3
- active_vision-0.4.1.dist-info/METADATA,sha256=LpgLc_E7jJVXxUHrIPv-1RZq_CEE3enyb0O2PDZMrJM,17262
4
- active_vision-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
- active_vision-0.4.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
6
- active_vision-0.4.1.dist-info/RECORD,,