active-vision 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
active_vision/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- __version__ = "0.1.1"
1
+ __version__ = "0.2.0"
2
2
 
3
3
  from .core import *
active_vision/core.py CHANGED
@@ -2,6 +2,7 @@ import pandas as pd
2
2
  from loguru import logger
3
3
  from fastai.vision.all import *
4
4
  import torch
5
+ import numpy as np
5
6
 
6
7
  import warnings
7
8
  from typing import Callable
@@ -189,37 +190,61 @@ class ActiveLearner:
189
190
  df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
190
191
 
191
192
  if strategy == "least-confidence":
192
- logger.info(f"Getting top {num_samples} low confidence samples")
193
-
193
+ logger.info(
194
+ f"Using least confidence strategy to get top {num_samples} samples"
195
+ )
194
196
  df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
195
197
  self.num_classes - (self.num_classes - 1)
196
198
  )
197
199
 
198
- # Sort by descending uncertainty score
199
- uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
200
- num_samples
200
+ elif strategy == "margin-of-confidence":
201
+ logger.info(
202
+ f"Using margin of confidence strategy to get top {num_samples} samples"
201
203
  )
202
- return uncertain_df
204
+ if len(df["pred_raw"].iloc[0]) < 2:
205
+ logger.error("pred_raw has less than 2 elements")
206
+ raise ValueError("pred_raw has less than 2 elements")
203
207
 
204
- # TODO: Implement margin of confidence strategy
205
- elif strategy == "margin-of-confidence":
206
- logger.error("Margin of confidence strategy not implemented")
207
- raise NotImplementedError("Margin of confidence strategy not implemented")
208
+ # Calculate uncertainty score as 1 - (difference between top two predictions)
209
+ df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
210
+ lambda x: 1 - (np.sort(x)[-1] - np.sort(x)[-2])
211
+ )
208
212
 
209
- # TODO: Implement ratio of confidence strategy
210
213
  elif strategy == "ratio-of-confidence":
211
- logger.error("Ratio of confidence strategy not implemented")
212
- raise NotImplementedError("Ratio of confidence strategy not implemented")
214
+ logger.info(
215
+ f"Using ratio of confidence strategy to get top {num_samples} samples"
216
+ )
217
+ if len(df["pred_raw"].iloc[0]) < 2:
218
+ logger.error("pred_raw has less than 2 elements")
219
+ raise ValueError("pred_raw has less than 2 elements")
220
+
221
+ # Calculate uncertainty score as ratio of top two predictions
222
+ df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
223
+ lambda x: np.sort(x)[-2] / np.sort(x)[-1]
224
+ )
213
225
 
214
- # TODO: Implement entropy strategy
215
226
  elif strategy == "entropy":
216
- logger.error("Entropy strategy not implemented")
217
- raise NotImplementedError("Entropy strategy not implemented")
227
+ logger.info(f"Using entropy strategy to get top {num_samples} samples")
228
+
229
+ # Calculate uncertainty score as entropy of the prediction
230
+ df.loc[:, "uncertainty_score"] = df["pred_raw"].apply(
231
+ lambda x: -np.sum(x * np.log2(x))
232
+ )
233
+
234
+ # Normalize the uncertainty score to be between 0 and 1 by dividing by log2 of the number of classes
235
+ df.loc[:, "uncertainty_score"] = df["uncertainty_score"] / np.log2(
236
+ self.num_classes
237
+ )
218
238
 
219
239
  else:
220
240
  logger.error(f"Unknown strategy: {strategy}")
221
241
  raise ValueError(f"Unknown strategy: {strategy}")
222
242
 
243
+ df = df[
244
+ ["filepath", "pred_label", "pred_conf", "uncertainty_score", "pred_raw"]
245
+ ]
246
+ return df.sort_values(by="uncertainty_score", ascending=False).head(num_samples)
247
+
223
248
  def sample_diverse(self, df: pd.DataFrame, num_samples: int):
224
249
  """
225
250
  Sample top `num_samples` diverse samples. Returns a df with filepaths and predicted labels, and confidence scores.
@@ -258,7 +283,7 @@ class ActiveLearner:
258
283
  return;
259
284
  }
260
285
 
261
- if (e.key === "ArrowUp" || e.key === "Enter") {
286
+ if (e.key === "ArrowUp") {
262
287
  document.getElementById("submit_btn").click();
263
288
  } else if (e.key === "ArrowRight") {
264
289
  document.getElementById("next_btn").click();
@@ -284,7 +309,7 @@ class ActiveLearner:
284
309
  type="filepath",
285
310
  label="Image",
286
311
  value=filepaths[0],
287
- height=500
312
+ height=500,
288
313
  )
289
314
 
290
315
  # Add bar plot with top 5 predictions
@@ -334,7 +359,7 @@ class ActiveLearner:
334
359
  with gr.Row():
335
360
  back_btn = gr.Button("← Previous", elem_id="back_btn")
336
361
  submit_btn = gr.Button(
337
- "Submit (↑/Enter)",
362
+ "Submit ",
338
363
  variant="primary",
339
364
  elem_id="submit_btn",
340
365
  )
@@ -344,8 +369,25 @@ class ActiveLearner:
344
369
  minimum=0,
345
370
  maximum=len(filepaths) - 1,
346
371
  value=0,
372
+ step=1,
347
373
  label="Progress",
348
- interactive=False,
374
+ interactive=True,
375
+ )
376
+
377
+ # Add event handler for slider changes
378
+ progress.change(
379
+ fn=lambda idx: navigate(idx, 0),
380
+ inputs=[progress],
381
+ outputs=[
382
+ filename,
383
+ image,
384
+ pred_label,
385
+ pred_conf,
386
+ category,
387
+ current_index,
388
+ progress,
389
+ pred_plot,
390
+ ],
349
391
  )
350
392
 
351
393
  finish_btn = gr.Button("Finish Labeling", variant="primary")
@@ -490,18 +532,28 @@ class ActiveLearner:
490
532
  current_idx = int(current_idx)
491
533
 
492
534
  if selected_category is None:
493
- plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
494
- {
495
- "class": self.class_names,
496
- "probability": df["pred_raw"].iloc[current_idx],
497
- }
498
- ).nlargest(5, "probability")
535
+ plot_data = (
536
+ None
537
+ if "pred_raw" not in df.columns
538
+ else pd.DataFrame(
539
+ {
540
+ "class": self.class_names,
541
+ "probability": df["pred_raw"].iloc[current_idx],
542
+ }
543
+ ).nlargest(5, "probability")
544
+ )
499
545
  return (
500
546
  filepaths[current_idx],
501
547
  filepaths[current_idx],
502
- df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
503
- f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
504
- df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
548
+ df["pred_label"].iloc[current_idx]
549
+ if "pred_label" in df.columns
550
+ else "",
551
+ f"{df['pred_conf'].iloc[current_idx]:.2%}"
552
+ if "pred_conf" in df.columns
553
+ else "",
554
+ df["pred_label"].iloc[current_idx]
555
+ if "pred_label" in df.columns
556
+ else None,
505
557
  current_idx,
506
558
  current_idx,
507
559
  plot_data,
@@ -514,35 +566,55 @@ class ActiveLearner:
514
566
  # Move to next image if not at the end
515
567
  next_idx = current_idx + 1
516
568
  if next_idx >= len(filepaths):
517
- plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
518
- {
519
- "class": self.class_names,
520
- "probability": df["pred_raw"].iloc[current_idx],
521
- }
522
- ).nlargest(5, "probability")
569
+ plot_data = (
570
+ None
571
+ if "pred_raw" not in df.columns
572
+ else pd.DataFrame(
573
+ {
574
+ "class": self.class_names,
575
+ "probability": df["pred_raw"].iloc[current_idx],
576
+ }
577
+ ).nlargest(5, "probability")
578
+ )
523
579
  return (
524
580
  filepaths[current_idx],
525
581
  filepaths[current_idx],
526
- df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
527
- f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
528
- df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
582
+ df["pred_label"].iloc[current_idx]
583
+ if "pred_label" in df.columns
584
+ else "",
585
+ f"{df['pred_conf'].iloc[current_idx]:.2%}"
586
+ if "pred_conf" in df.columns
587
+ else "",
588
+ df["pred_label"].iloc[current_idx]
589
+ if "pred_label" in df.columns
590
+ else None,
529
591
  current_idx,
530
592
  current_idx,
531
593
  plot_data,
532
594
  )
533
595
 
534
- plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
535
- {
536
- "class": self.class_names,
537
- "probability": df["pred_raw"].iloc[next_idx],
538
- }
539
- ).nlargest(5, "probability")
596
+ plot_data = (
597
+ None
598
+ if "pred_raw" not in df.columns
599
+ else pd.DataFrame(
600
+ {
601
+ "class": self.class_names,
602
+ "probability": df["pred_raw"].iloc[next_idx],
603
+ }
604
+ ).nlargest(5, "probability")
605
+ )
540
606
  return (
541
607
  filepaths[next_idx],
542
608
  filepaths[next_idx],
543
- df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else "",
544
- f"{df['pred_conf'].iloc[next_idx]:.2%}" if "pred_conf" in df.columns else "",
545
- df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else None,
609
+ df["pred_label"].iloc[next_idx]
610
+ if "pred_label" in df.columns
611
+ else "",
612
+ f"{df['pred_conf'].iloc[next_idx]:.2%}"
613
+ if "pred_conf" in df.columns
614
+ else "",
615
+ df["pred_label"].iloc[next_idx]
616
+ if "pred_label" in df.columns
617
+ else None,
546
618
  next_idx,
547
619
  next_idx,
548
620
  plot_data,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: active-vision
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Active learning for edge vision.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -17,10 +17,10 @@ Requires-Dist: timm>=1.0.13
17
17
  Requires-Dist: transformers>=4.48.0
18
18
  Requires-Dist: xinfer>=0.3.2
19
19
 
20
- ![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
21
- ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
22
- [![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge)](https://pypi.org/project/active-vision/)
23
- ![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)
20
+ [![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge&logo=python&logoColor=white)](https://pypi.org/project/active-vision/)
21
+ [![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/active-vision/)
22
+ [![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)](https://pypi.org/project/active-vision/)
23
+ [![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge&logo=apache&logoColor=white)](https://github.com/dnth/active-vision/blob/main/LICENSE)
24
24
 
25
25
  <p align="center">
26
26
  <img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/logo.png" alt="active-vision">
@@ -47,9 +47,9 @@ The goal of this project is to create a framework for the active learning loop f
47
47
 
48
48
  Uncertainty Sampling:
49
49
  - [X] Least confidence
50
- - [ ] Margin of confidence
51
- - [ ] Ratio of confidence
52
- - [ ] Entropy
50
+ - [X] Margin of confidence
51
+ - [X] Ratio of confidence
52
+ - [X] Entropy
53
53
 
54
54
  Diverse Sampling:
55
55
  - [X] Random sampling
@@ -172,7 +172,7 @@ The active learning loop is a iterative process and can keep going until you hit
172
172
  - You hit a budget.
173
173
  - Other criteria.
174
174
 
175
- For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
175
+ For this dataset, I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set exceeds the top performing model on the leaderboard.
176
176
 
177
177
 
178
178
  | #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
@@ -0,0 +1,7 @@
1
+ active_vision/__init__.py,sha256=SxR6MPyULKlvx-86S3NIk46Tz1xlN-g_vI_aW3LitG4,43
2
+ active_vision/core.py,sha256=4Nl8e3isinIlzcD6bCbG9TTGiuG0PQkKNUIvnAsbaTY,27373
3
+ active_vision-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
+ active_vision-0.2.0.dist-info/METADATA,sha256=3XvDTC1Cnxd3rIUUXyY8MwTgKGcnncN9D2VvKnkw1jQ,15675
5
+ active_vision-0.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ active_vision-0.2.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
+ active_vision-0.2.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- active_vision/__init__.py,sha256=xWa6YKvR3wF8p_D9PprKNGP3VnxjbyVpcwnPCMhhaHM,43
2
- active_vision/core.py,sha256=jWzTOx3GCB2Sq5-JGgoi-ZD2teoIGTYas9StqZxXefo,24999
3
- active_vision-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
- active_vision-0.1.1.dist-info/METADATA,sha256=U8-IH0WJnPj6KPBsfsxcW4GZCTDY0KFxrqz7migcnro,15454
5
- active_vision-0.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- active_vision-0.1.1.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
7
- active_vision-0.1.1.dist-info/RECORD,,