active-vision 0.0.5__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- active_vision/__init__.py +1 -1
- active_vision/core.py +348 -74
- {active_vision-0.0.5.dist-info → active_vision-0.1.1.dist-info}/METADATA +60 -72
- active_vision-0.1.1.dist-info/RECORD +7 -0
- active_vision-0.0.5.dist-info/RECORD +0 -7
- {active_vision-0.0.5.dist-info → active_vision-0.1.1.dist-info}/LICENSE +0 -0
- {active_vision-0.0.5.dist-info → active_vision-0.1.1.dist-info}/WHEEL +0 -0
- {active_vision-0.0.5.dist-info → active_vision-0.1.1.dist-info}/top_level.txt +0 -0
active_vision/__init__.py
CHANGED
active_vision/core.py
CHANGED
@@ -1,19 +1,7 @@
|
|
1
1
|
import pandas as pd
|
2
2
|
from loguru import logger
|
3
|
-
from fastai.
|
4
|
-
from fastai.vision.all import (
|
5
|
-
ImageDataLoaders,
|
6
|
-
aug_transforms,
|
7
|
-
Resize,
|
8
|
-
vision_learner,
|
9
|
-
accuracy,
|
10
|
-
valley,
|
11
|
-
slide,
|
12
|
-
minimum,
|
13
|
-
steep,
|
14
|
-
)
|
3
|
+
from fastai.vision.all import *
|
15
4
|
import torch
|
16
|
-
import torch.nn.functional as F
|
17
5
|
|
18
6
|
import warnings
|
19
7
|
from typing import Callable
|
@@ -22,7 +10,28 @@ warnings.filterwarnings("ignore", category=FutureWarning)
|
|
22
10
|
|
23
11
|
|
24
12
|
class ActiveLearner:
|
25
|
-
|
13
|
+
"""
|
14
|
+
Active Learning framework for computer vision tasks.
|
15
|
+
|
16
|
+
Attributes:
|
17
|
+
Model Related:
|
18
|
+
model: The base model architecture (str or Callable)
|
19
|
+
learn: fastai Learner object for training
|
20
|
+
lrs: Learning rate finder results
|
21
|
+
|
22
|
+
Data Related:
|
23
|
+
train_set (pd.DataFrame): Training dataset
|
24
|
+
eval_set (pd.DataFrame): Evaluation dataset with ground truth labels
|
25
|
+
dls: fastai DataLoaders object
|
26
|
+
class_names: List of class names from the dataset
|
27
|
+
num_classes (int): Number of classes in the dataset
|
28
|
+
|
29
|
+
Prediction Related:
|
30
|
+
pred_df (pd.DataFrame): Predictions on a dataframe
|
31
|
+
eval_df (pd.DataFrame): Predictions on evaluation data
|
32
|
+
"""
|
33
|
+
|
34
|
+
def __init__(self, model_name: str | Callable):
|
26
35
|
self.model = self.load_model(model_name)
|
27
36
|
|
28
37
|
def load_model(self, model_name: str | Callable):
|
@@ -43,6 +52,7 @@ class ActiveLearner:
|
|
43
52
|
batch_size: int = 16,
|
44
53
|
image_size: int = 224,
|
45
54
|
batch_tfms: Callable = None,
|
55
|
+
learner_path: str = None,
|
46
56
|
):
|
47
57
|
logger.info(f"Loading dataset from {filepath_col} and {label_col}")
|
48
58
|
self.train_set = df.copy()
|
@@ -58,22 +68,66 @@ class ActiveLearner:
|
|
58
68
|
item_tfms=Resize(image_size),
|
59
69
|
batch_tfms=batch_tfms,
|
60
70
|
)
|
61
|
-
|
62
|
-
|
71
|
+
|
72
|
+
if learner_path:
|
73
|
+
logger.info(f"Loading learner from {learner_path}")
|
74
|
+
gpu_available = torch.cuda.is_available()
|
75
|
+
if gpu_available:
|
76
|
+
logger.info(f"Loading learner on GPU.")
|
77
|
+
else:
|
78
|
+
logger.info(f"Loading learner on CPU.")
|
79
|
+
|
80
|
+
self.learn = load_learner(learner_path, cpu=not gpu_available)
|
81
|
+
else:
|
82
|
+
logger.info("Creating learner")
|
83
|
+
self.learn = vision_learner(
|
84
|
+
self.dls, self.model, metrics=accuracy
|
85
|
+
).to_fp16()
|
86
|
+
|
63
87
|
self.class_names = self.dls.vocab
|
88
|
+
self.num_classes = self.dls.c
|
64
89
|
logger.info("Done. Ready to train.")
|
65
90
|
|
66
|
-
def show_batch(
|
67
|
-
self
|
91
|
+
def show_batch(
|
92
|
+
self,
|
93
|
+
num_samples: int = 9,
|
94
|
+
unique: bool = False,
|
95
|
+
num_rows: int = None,
|
96
|
+
num_cols: int = None,
|
97
|
+
):
|
98
|
+
"""
|
99
|
+
Show a batch of images from the dataset.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
num_samples: Number of samples to show.
|
103
|
+
unique: Whether to show unique samples.
|
104
|
+
num_rows: Number of rows in the grid.
|
105
|
+
num_cols: Number of columns in the grid.
|
106
|
+
"""
|
107
|
+
self.dls.show_batch(
|
108
|
+
max_n=num_samples, unique=unique, nrows=num_rows, ncols=num_cols
|
109
|
+
)
|
68
110
|
|
69
111
|
def lr_find(self):
|
70
112
|
logger.info("Finding optimal learning rate")
|
71
113
|
self.lrs = self.learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
|
72
114
|
logger.info(f"Optimal learning rate: {self.lrs.valley}")
|
73
115
|
|
74
|
-
def train(self, epochs: int, lr: float):
|
75
|
-
|
76
|
-
|
116
|
+
def train(self, epochs: int, lr: float, head_tuning_epochs: int = 1):
|
117
|
+
"""
|
118
|
+
Train the model.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
epochs: Number of epochs to train for.
|
122
|
+
lr: Learning rate.
|
123
|
+
head_tuning_epochs: Number of epochs to train the head.
|
124
|
+
"""
|
125
|
+
logger.info(f"Training head for {head_tuning_epochs} epochs")
|
126
|
+
logger.info(f"Training model end-to-end for {epochs} epochs")
|
127
|
+
logger.info(f"Learning rate: {lr} with one-cycle learning rate scheduler")
|
128
|
+
self.learn.fine_tune(
|
129
|
+
epochs, lr, freeze_epochs=head_tuning_epochs, cbs=[ShowGraphCallback()]
|
130
|
+
)
|
77
131
|
|
78
132
|
def predict(self, filepaths: list[str], batch_size: int = 16):
|
79
133
|
"""
|
@@ -87,7 +141,8 @@ class ActiveLearner:
|
|
87
141
|
{
|
88
142
|
"filepath": filepaths,
|
89
143
|
"pred_label": [self.learn.dls.vocab[i] for i in cls_preds.numpy()],
|
90
|
-
"pred_conf": torch.max(
|
144
|
+
"pred_conf": torch.max(preds, dim=1)[0].numpy(),
|
145
|
+
"pred_raw": preds.numpy().tolist(),
|
91
146
|
}
|
92
147
|
)
|
93
148
|
return self.pred_df
|
@@ -131,11 +186,17 @@ class ActiveLearner:
|
|
131
186
|
"""
|
132
187
|
|
133
188
|
# Remove samples that is already in the training set
|
134
|
-
df = df[~df["filepath"].isin(self.train_set["filepath"])]
|
189
|
+
df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
|
135
190
|
|
136
191
|
if strategy == "least-confidence":
|
137
192
|
logger.info(f"Getting top {num_samples} low confidence samples")
|
138
|
-
|
193
|
+
|
194
|
+
df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
|
195
|
+
self.num_classes - (self.num_classes - 1)
|
196
|
+
)
|
197
|
+
|
198
|
+
# Sort by descending uncertainty score
|
199
|
+
uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
|
139
200
|
num_samples
|
140
201
|
)
|
141
202
|
return uncertain_df
|
@@ -197,15 +258,15 @@ class ActiveLearner:
|
|
197
258
|
return;
|
198
259
|
}
|
199
260
|
|
200
|
-
if (e.key.
|
261
|
+
if (e.key === "ArrowUp" || e.key === "Enter") {
|
201
262
|
document.getElementById("submit_btn").click();
|
202
|
-
} else if (e.key
|
263
|
+
} else if (e.key === "ArrowRight") {
|
203
264
|
document.getElementById("next_btn").click();
|
204
|
-
} else if (e.key
|
265
|
+
} else if (e.key === "ArrowLeft") {
|
205
266
|
document.getElementById("back_btn").click();
|
206
267
|
}
|
207
268
|
}
|
208
|
-
document.addEventListener('
|
269
|
+
document.addEventListener('keydown', shortcuts, false);
|
209
270
|
</script>
|
210
271
|
"""
|
211
272
|
|
@@ -214,36 +275,149 @@ class ActiveLearner:
|
|
214
275
|
filepaths = df["filepath"].tolist()
|
215
276
|
|
216
277
|
with gr.Blocks(head=shortcut_js) as demo:
|
217
|
-
|
278
|
+
with gr.Tabs():
|
279
|
+
with gr.Tab("Labeling"):
|
280
|
+
current_index = gr.State(value=0)
|
281
|
+
|
282
|
+
with gr.Row(min_height=500):
|
283
|
+
image = gr.Image(
|
284
|
+
type="filepath",
|
285
|
+
label="Image",
|
286
|
+
value=filepaths[0],
|
287
|
+
height=500
|
288
|
+
)
|
289
|
+
|
290
|
+
# Add bar plot with top 5 predictions
|
291
|
+
with gr.Column():
|
292
|
+
pred_plot = gr.BarPlot(
|
293
|
+
x="probability",
|
294
|
+
y="class",
|
295
|
+
title="Top 5 Predictions",
|
296
|
+
x_lim=[0, 1],
|
297
|
+
value=None
|
298
|
+
if "pred_raw" not in df.columns
|
299
|
+
else pd.DataFrame(
|
300
|
+
{
|
301
|
+
"class": self.class_names,
|
302
|
+
"probability": df["pred_raw"].iloc[0],
|
303
|
+
}
|
304
|
+
).nlargest(5, "probability"),
|
305
|
+
)
|
306
|
+
|
307
|
+
filename = gr.Textbox(
|
308
|
+
label="Filename", value=filepaths[0], interactive=False
|
309
|
+
)
|
310
|
+
|
311
|
+
pred_label = gr.Textbox(
|
312
|
+
label="Predicted Label",
|
313
|
+
value=df["pred_label"].iloc[0]
|
314
|
+
if "pred_label" in df.columns
|
315
|
+
else "",
|
316
|
+
interactive=False,
|
317
|
+
)
|
318
|
+
pred_conf = gr.Textbox(
|
319
|
+
label="Confidence",
|
320
|
+
value=f"{df['pred_conf'].iloc[0]:.2%}"
|
321
|
+
if "pred_conf" in df.columns
|
322
|
+
else "",
|
323
|
+
interactive=False,
|
324
|
+
)
|
325
|
+
|
326
|
+
category = gr.Radio(
|
327
|
+
choices=self.class_names,
|
328
|
+
label="Select Category",
|
329
|
+
value=df["pred_label"].iloc[0]
|
330
|
+
if "pred_label" in df.columns
|
331
|
+
else None,
|
332
|
+
)
|
218
333
|
|
219
|
-
|
220
|
-
|
221
|
-
|
334
|
+
with gr.Row():
|
335
|
+
back_btn = gr.Button("← Previous", elem_id="back_btn")
|
336
|
+
submit_btn = gr.Button(
|
337
|
+
"Submit (↑/Enter)",
|
338
|
+
variant="primary",
|
339
|
+
elem_id="submit_btn",
|
340
|
+
)
|
341
|
+
next_btn = gr.Button("Next →", elem_id="next_btn")
|
342
|
+
|
343
|
+
progress = gr.Slider(
|
344
|
+
minimum=0,
|
345
|
+
maximum=len(filepaths) - 1,
|
346
|
+
value=0,
|
347
|
+
label="Progress",
|
348
|
+
interactive=False,
|
349
|
+
)
|
222
350
|
|
223
|
-
|
224
|
-
|
351
|
+
finish_btn = gr.Button("Finish Labeling", variant="primary")
|
352
|
+
|
353
|
+
with gr.Tab("Zero-Shot Inference"):
|
354
|
+
gr.Markdown("""
|
355
|
+
Uses a VLM to predict the label of the image.
|
356
|
+
""")
|
357
|
+
|
358
|
+
import xinfer
|
359
|
+
from xinfer.model_registry import model_registry
|
360
|
+
from xinfer.types import ModelInputOutput
|
361
|
+
|
362
|
+
# Get models and filter for image-to-text models
|
363
|
+
all_models = model_registry.list_models()
|
364
|
+
model_list = [
|
365
|
+
model.id
|
366
|
+
for model in all_models
|
367
|
+
if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
|
368
|
+
]
|
369
|
+
|
370
|
+
with gr.Row():
|
371
|
+
with gr.Row():
|
372
|
+
model_dropdown = gr.Dropdown(
|
373
|
+
choices=model_list,
|
374
|
+
label="Select a model",
|
375
|
+
value="vikhyatk/moondream2",
|
376
|
+
)
|
377
|
+
device_dropdown = gr.Dropdown(
|
378
|
+
choices=["cuda", "cpu"],
|
379
|
+
label="Device",
|
380
|
+
value="cuda" if torch.cuda.is_available() else "cpu",
|
381
|
+
)
|
382
|
+
dtype_dropdown = gr.Dropdown(
|
383
|
+
choices=["float32", "float16", "bfloat16"],
|
384
|
+
label="Data Type",
|
385
|
+
value="float16"
|
386
|
+
if torch.cuda.is_available()
|
387
|
+
else "float32",
|
388
|
+
)
|
389
|
+
|
390
|
+
with gr.Column():
|
391
|
+
prompt_textbox = gr.Textbox(
|
392
|
+
label="Prompt",
|
393
|
+
lines=5,
|
394
|
+
value=f"Classify the image into one of the following categories: {self.class_names}. Answer with the category name only.",
|
395
|
+
interactive=True,
|
396
|
+
)
|
397
|
+
inference_btn = gr.Button("Run Inference", variant="primary")
|
398
|
+
|
399
|
+
result_textbox = gr.Textbox(
|
400
|
+
label="Result",
|
401
|
+
lines=3,
|
402
|
+
interactive=False,
|
403
|
+
)
|
404
|
+
|
405
|
+
def run_zero_shot_inference(prompt, model, device, dtype, current_filename):
|
406
|
+
model = xinfer.create_model(model, device=device, dtype=dtype)
|
407
|
+
result = model.infer(current_filename, prompt).text
|
408
|
+
return result
|
409
|
+
|
410
|
+
inference_btn.click(
|
411
|
+
fn=run_zero_shot_inference,
|
412
|
+
inputs=[
|
413
|
+
prompt_textbox,
|
414
|
+
model_dropdown,
|
415
|
+
device_dropdown,
|
416
|
+
dtype_dropdown,
|
417
|
+
filename,
|
418
|
+
],
|
419
|
+
outputs=[result_textbox],
|
225
420
|
)
|
226
|
-
category = gr.Radio(choices=self.class_names, label="Select Category")
|
227
|
-
|
228
|
-
with gr.Row():
|
229
|
-
back_btn = gr.Button("← Previous (A)", elem_id="back_btn")
|
230
|
-
submit_btn = gr.Button(
|
231
|
-
"Submit (W)",
|
232
|
-
variant="primary",
|
233
|
-
elem_id="submit_btn",
|
234
|
-
interactive=False,
|
235
|
-
)
|
236
|
-
next_btn = gr.Button("Next → (D)", elem_id="next_btn")
|
237
|
-
|
238
|
-
progress = gr.Slider(
|
239
|
-
minimum=0,
|
240
|
-
maximum=len(filepaths) - 1,
|
241
|
-
value=0,
|
242
|
-
label="Progress",
|
243
|
-
interactive=False,
|
244
|
-
)
|
245
|
-
|
246
|
-
finish_btn = gr.Button("Finish Labeling", variant="primary")
|
247
421
|
|
248
422
|
def update_submit_btn(choice):
|
249
423
|
return gr.Button(interactive=choice is not None)
|
@@ -253,23 +427,84 @@ class ActiveLearner:
|
|
253
427
|
)
|
254
428
|
|
255
429
|
def navigate(current_idx, direction):
|
430
|
+
# Convert current_idx to int before arithmetic
|
431
|
+
current_idx = int(current_idx)
|
256
432
|
next_idx = current_idx + direction
|
433
|
+
|
257
434
|
if 0 <= next_idx < len(filepaths):
|
258
|
-
|
435
|
+
plot_data = (
|
436
|
+
None
|
437
|
+
if "pred_raw" not in df.columns
|
438
|
+
else pd.DataFrame(
|
439
|
+
{
|
440
|
+
"class": self.class_names,
|
441
|
+
"probability": df["pred_raw"].iloc[next_idx],
|
442
|
+
}
|
443
|
+
).nlargest(5, "probability")
|
444
|
+
)
|
445
|
+
return (
|
446
|
+
filepaths[next_idx],
|
447
|
+
filepaths[next_idx],
|
448
|
+
df["pred_label"].iloc[next_idx]
|
449
|
+
if "pred_label" in df.columns
|
450
|
+
else "",
|
451
|
+
f"{df['pred_conf'].iloc[next_idx]:.2%}"
|
452
|
+
if "pred_conf" in df.columns
|
453
|
+
else "",
|
454
|
+
df["pred_label"].iloc[next_idx]
|
455
|
+
if "pred_label" in df.columns
|
456
|
+
else None,
|
457
|
+
next_idx,
|
458
|
+
next_idx,
|
459
|
+
plot_data,
|
460
|
+
)
|
461
|
+
plot_data = (
|
462
|
+
None
|
463
|
+
if "pred_raw" not in df.columns
|
464
|
+
else pd.DataFrame(
|
465
|
+
{
|
466
|
+
"class": self.class_names,
|
467
|
+
"probability": df["pred_raw"].iloc[current_idx],
|
468
|
+
}
|
469
|
+
).nlargest(5, "probability")
|
470
|
+
)
|
259
471
|
return (
|
260
472
|
filepaths[current_idx],
|
261
473
|
filepaths[current_idx],
|
474
|
+
df["pred_label"].iloc[current_idx]
|
475
|
+
if "pred_label" in df.columns
|
476
|
+
else "",
|
477
|
+
f"{df['pred_conf'].iloc[current_idx]:.2%}"
|
478
|
+
if "pred_conf" in df.columns
|
479
|
+
else "",
|
480
|
+
df["pred_label"].iloc[current_idx]
|
481
|
+
if "pred_label" in df.columns
|
482
|
+
else None,
|
262
483
|
current_idx,
|
263
484
|
current_idx,
|
485
|
+
plot_data,
|
264
486
|
)
|
265
487
|
|
266
488
|
def save_and_next(current_idx, selected_category):
|
489
|
+
# Convert current_idx to int before arithmetic
|
490
|
+
current_idx = int(current_idx)
|
491
|
+
|
267
492
|
if selected_category is None:
|
493
|
+
plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
|
494
|
+
{
|
495
|
+
"class": self.class_names,
|
496
|
+
"probability": df["pred_raw"].iloc[current_idx],
|
497
|
+
}
|
498
|
+
).nlargest(5, "probability")
|
268
499
|
return (
|
269
500
|
filepaths[current_idx],
|
270
501
|
filepaths[current_idx],
|
502
|
+
df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
|
503
|
+
f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
|
504
|
+
df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
|
271
505
|
current_idx,
|
272
506
|
current_idx,
|
507
|
+
plot_data,
|
273
508
|
)
|
274
509
|
|
275
510
|
# Save the current annotation
|
@@ -279,13 +514,39 @@ class ActiveLearner:
|
|
279
514
|
# Move to next image if not at the end
|
280
515
|
next_idx = current_idx + 1
|
281
516
|
if next_idx >= len(filepaths):
|
517
|
+
plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
|
518
|
+
{
|
519
|
+
"class": self.class_names,
|
520
|
+
"probability": df["pred_raw"].iloc[current_idx],
|
521
|
+
}
|
522
|
+
).nlargest(5, "probability")
|
282
523
|
return (
|
283
524
|
filepaths[current_idx],
|
284
525
|
filepaths[current_idx],
|
526
|
+
df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else "",
|
527
|
+
f"{df['pred_conf'].iloc[current_idx]:.2%}" if "pred_conf" in df.columns else "",
|
528
|
+
df["pred_label"].iloc[current_idx] if "pred_label" in df.columns else None,
|
285
529
|
current_idx,
|
286
530
|
current_idx,
|
531
|
+
plot_data,
|
287
532
|
)
|
288
|
-
|
533
|
+
|
534
|
+
plot_data = None if "pred_raw" not in df.columns else pd.DataFrame(
|
535
|
+
{
|
536
|
+
"class": self.class_names,
|
537
|
+
"probability": df["pred_raw"].iloc[next_idx],
|
538
|
+
}
|
539
|
+
).nlargest(5, "probability")
|
540
|
+
return (
|
541
|
+
filepaths[next_idx],
|
542
|
+
filepaths[next_idx],
|
543
|
+
df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else "",
|
544
|
+
f"{df['pred_conf'].iloc[next_idx]:.2%}" if "pred_conf" in df.columns else "",
|
545
|
+
df["pred_label"].iloc[next_idx] if "pred_label" in df.columns else None,
|
546
|
+
next_idx,
|
547
|
+
next_idx,
|
548
|
+
plot_data,
|
549
|
+
)
|
289
550
|
|
290
551
|
def convert_csv_to_parquet():
|
291
552
|
try:
|
@@ -301,19 +562,46 @@ class ActiveLearner:
|
|
301
562
|
back_btn.click(
|
302
563
|
fn=lambda idx: navigate(idx, -1),
|
303
564
|
inputs=[current_index],
|
304
|
-
outputs=[
|
565
|
+
outputs=[
|
566
|
+
filename,
|
567
|
+
image,
|
568
|
+
pred_label,
|
569
|
+
pred_conf,
|
570
|
+
category,
|
571
|
+
current_index,
|
572
|
+
progress,
|
573
|
+
pred_plot,
|
574
|
+
],
|
305
575
|
)
|
306
576
|
|
307
577
|
next_btn.click(
|
308
578
|
fn=lambda idx: navigate(idx, 1),
|
309
579
|
inputs=[current_index],
|
310
|
-
outputs=[
|
580
|
+
outputs=[
|
581
|
+
filename,
|
582
|
+
image,
|
583
|
+
pred_label,
|
584
|
+
pred_conf,
|
585
|
+
category,
|
586
|
+
current_index,
|
587
|
+
progress,
|
588
|
+
pred_plot,
|
589
|
+
],
|
311
590
|
)
|
312
591
|
|
313
592
|
submit_btn.click(
|
314
593
|
fn=save_and_next,
|
315
594
|
inputs=[current_index, category],
|
316
|
-
outputs=[
|
595
|
+
outputs=[
|
596
|
+
filename,
|
597
|
+
image,
|
598
|
+
pred_label,
|
599
|
+
pred_conf,
|
600
|
+
category,
|
601
|
+
current_index,
|
602
|
+
progress,
|
603
|
+
pred_plot,
|
604
|
+
],
|
317
605
|
)
|
318
606
|
|
319
607
|
finish_btn.click(fn=convert_csv_to_parquet)
|
@@ -325,10 +613,6 @@ class ActiveLearner:
|
|
325
613
|
Add samples to the training set.
|
326
614
|
"""
|
327
615
|
new_train_set = df.copy()
|
328
|
-
# new_train_set.drop(columns=["pred_conf"], inplace=True)
|
329
|
-
# new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
|
330
|
-
|
331
|
-
# len_old = len(self.train_set)
|
332
616
|
|
333
617
|
logger.info(f"Adding {len(new_train_set)} samples to training set")
|
334
618
|
self.train_set = pd.concat([self.train_set, new_train_set])
|
@@ -340,13 +624,3 @@ class ActiveLearner:
|
|
340
624
|
|
341
625
|
self.train_set.to_parquet(f"{output_filename}.parquet")
|
342
626
|
logger.info(f"Saved training set to {output_filename}.parquet")
|
343
|
-
|
344
|
-
# if len(self.train_set) == len_old:
|
345
|
-
# logger.warning("No new samples added to training set")
|
346
|
-
|
347
|
-
# elif len_old + len(new_train_set) < len(self.train_set):
|
348
|
-
# logger.warning("Some samples were duplicates and removed from training set")
|
349
|
-
|
350
|
-
# else:
|
351
|
-
# logger.info("All new samples added to training set")
|
352
|
-
# logger.info(f"Training set now has {len(self.train_set)} samples")
|
@@ -1,10 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: active-vision
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: Active learning for edge vision.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
7
7
|
License-File: LICENSE
|
8
|
+
Requires-Dist: accelerate>=1.2.1
|
8
9
|
Requires-Dist: datasets>=3.2.0
|
9
10
|
Requires-Dist: fastai>=2.7.18
|
10
11
|
Requires-Dist: gradio>=5.12.0
|
@@ -13,6 +14,8 @@ Requires-Dist: ipywidgets>=8.1.5
|
|
13
14
|
Requires-Dist: loguru>=0.7.3
|
14
15
|
Requires-Dist: seaborn>=0.13.2
|
15
16
|
Requires-Dist: timm>=1.0.13
|
17
|
+
Requires-Dist: transformers>=4.48.0
|
18
|
+
Requires-Dist: xinfer>=0.3.2
|
16
19
|
|
17
20
|

|
18
21
|

|
@@ -68,17 +71,18 @@ cd active-vision
|
|
68
71
|
pip install -e .
|
69
72
|
```
|
70
73
|
|
71
|
-
I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
|
72
|
-
|
73
|
-
If you're using uv:
|
74
|
-
|
75
|
-
```bash
|
76
|
-
uv venv
|
77
|
-
uv sync
|
78
|
-
```
|
79
|
-
Once the virtual environment is created, you can install the package using pip.
|
80
74
|
|
81
75
|
> [!TIP]
|
76
|
+
> I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
|
77
|
+
>
|
78
|
+
> If you're using uv:
|
79
|
+
>
|
80
|
+
> ```bash
|
81
|
+
> uv venv
|
82
|
+
> uv sync
|
83
|
+
> ```
|
84
|
+
> Once the virtual environment is created, you can install the package using pip.
|
85
|
+
>
|
82
86
|
> If you're using uv add a `uv` before the pip install command to install into your virtual environment. Eg:
|
83
87
|
> ```bash
|
84
88
|
> uv pip install active-vision
|
@@ -117,12 +121,16 @@ pred_df = al.predict(filepaths)
|
|
117
121
|
# Sample low confidence predictions from unlabeled set
|
118
122
|
uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
|
119
123
|
|
120
|
-
# Launch a Gradio UI to label the low confidence samples
|
124
|
+
# Launch a Gradio UI to label the low confidence samples, save the labeled samples to a file
|
121
125
|
al.label(uncertain_df, output_filename="uncertain")
|
122
126
|
```
|
123
127
|
|
124
128
|

|
125
129
|
|
130
|
+
In the UI, you can optionally run zero-shot inference on the image. This will use a VLM to predict the label of the image. There are a dozen VLM models as supported in the [x.infer project](https://github.com/dnth/x.infer).
|
131
|
+
|
132
|
+

|
133
|
+
|
126
134
|
Once complete, the labeled samples will be save into a new df.
|
127
135
|
We can now add the newly labeled data to the training set.
|
128
136
|
|
@@ -167,12 +175,12 @@ The active learning loop is a iterative process and can keep going until you hit
|
|
167
175
|
For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
|
168
176
|
|
169
177
|
|
170
|
-
| #Labeled Images
|
171
|
-
|
172
|
-
| 9469
|
173
|
-
| 9469
|
174
|
-
| 275
|
175
|
-
| 275
|
178
|
+
| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
|
179
|
+
|----------------: |--------------------: |-------------: |---------------------- |:---------------: |------------------------------------------------------------------------------------- |
|
180
|
+
| 9469 | 94.90% | 80 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
|
181
|
+
| 9469 | 95.11% | 200 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
|
182
|
+
| 275 | 99.33% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
|
183
|
+
| 275 | 93.40% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
|
176
184
|
|
177
185
|
### Dog Food
|
178
186
|
- num classes: 2
|
@@ -182,11 +190,11 @@ To start the active learning loop, I labeled 20 images (10 images from each clas
|
|
182
190
|
|
183
191
|
I decided to stop the active learning loop at 160 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
|
184
192
|
|
185
|
-
| #Labeled Images
|
186
|
-
|
187
|
-
| 2100
|
188
|
-
| 160
|
189
|
-
| 160
|
193
|
+
| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
|
194
|
+
|----------------: |--------------------: |-------------: |---------------------- |:---------------: |--------------------------------------------------------------------------------------------- |
|
195
|
+
| 2100 | 99.70% | ? | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
|
196
|
+
| 160 | 100.00% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
|
197
|
+
| 160 | 97.60% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
|
190
198
|
|
191
199
|
### Oxford-IIIT Pet
|
192
200
|
- num classes: 37
|
@@ -196,13 +204,27 @@ To start the active learning loop, I labeled 370 images (10 images from each cla
|
|
196
204
|
|
197
205
|
I decided to stop the active learning loop at 612 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
|
198
206
|
|
199
|
-
| #Labeled Images
|
200
|
-
|
201
|
-
| 3680
|
202
|
-
| 612
|
203
|
-
| 612
|
207
|
+
| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
|
208
|
+
|----------------: |--------------------: |-------------: |---------------------- |:---------------: |------------------------------------------------------------------------------------------------- |
|
209
|
+
| 3680 | 95.40% | 5 | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
|
210
|
+
| 612 | 90.26% | 11 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
|
211
|
+
| 612 | 91.38% | 11 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
|
212
|
+
|
213
|
+
### Eurosat RGB
|
214
|
+
- num classes: 10
|
215
|
+
- num images: 16100
|
216
|
+
|
217
|
+
To start the active learning loop, I labeled 100 images (10 images from each class) and iteratively labeled the most informative images until I hit 1188 labeled images.
|
218
|
+
|
219
|
+
I decided to stop the active learning loop at 1188 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
|
204
220
|
|
205
221
|
|
222
|
+
| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
|
223
|
+
|----------------: |--------------------: |-------------: |---------------------- |:---------------: |-------------------------------------------------------------------------------------------- |
|
224
|
+
| 16100 | 98.55% | 6 | vit-base-patch16-224 | ❌ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/03_train_all.ipynb) |
|
225
|
+
| 1188 | 94.59% | 6 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb) |
|
226
|
+
| 1188 | 96.57% | 13 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/eurosat_rgb/02_train.ipynb) |
|
227
|
+
|
206
228
|
|
207
229
|
## ➿ Workflow
|
208
230
|
This section describes a more detailed workflow for active learning. There are two workflows for active learning that we can use depending on the availability of labeled data.
|
@@ -270,55 +292,21 @@ graph TD
|
|
270
292
|
|
271
293
|
|
272
294
|
|
273
|
-
|
274
|
-
To test out the workflows we will use the [imagenette dataset](https://huggingface.co/datasets/frgfm/imagenette). But this will be applicable to any dataset.
|
275
|
-
|
276
|
-
Imagenette is a subset of the ImageNet dataset with 10 classes. We will use this dataset to test out the workflows. Additionally, Imagenette has an existing leaderboard which we can use to evaluate the performance of the models.
|
277
|
-
|
278
|
-
### Step 1: Download the dataset
|
279
|
-
Download the imagenette dataset. The imagenette dataset has a train and validation split. Since the leaderboard is based on the validation set, we will evalutate the performance of our model on the validation set to make it easier to compare to the leaderboard.
|
280
|
-
|
281
|
-
We will treat the imagenette train set as a unlabeled set and iteratively sample from it while monitoring the performance on the validation set. Ideally we will be able to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
|
295
|
+
## 🧱 Sampling Approaches
|
282
296
|
|
283
|
-
|
297
|
+
Recommendation 1:
|
298
|
+
- 10% randomly selected from unlabeled items.
|
299
|
+
- 80% selected from the lowest confidence items.
|
300
|
+
- 10% selected as outliers.
|
284
301
|
|
285
|
-
|
286
|
-
```python
|
287
|
-
from datasets import load_dataset
|
288
|
-
|
289
|
-
unlabeled_dataset = load_dataset("dnth/active-learning-imagenette", "unlabeled")
|
290
|
-
eval_dataset = load_dataset("dnth/active-learning-imagenette", "evaluation")
|
291
|
-
```
|
302
|
+
Recommendation 2:
|
292
303
|
|
293
|
-
|
294
|
-
|
304
|
+
- Sample 100 predicted images at 10–20% confidence.
|
305
|
+
- Sample 100 predicted images at 20–30% confidence.
|
306
|
+
- Sample 100 predicted images at 30–40% confidence, and so on.
|
295
307
|
|
296
|
-
### Step 3: Training the proxy model
|
297
|
-
Train a proxy model on the initial dataset. The proxy model will be a small model that is easy to train and deploy. We will use the fastai framework to train the model. We will use the resnet18 architecture as a starting point. Once training is complete, compute the accuracy of the proxy model on the validation set and compare it to the leaderboard.
|
298
308
|
|
299
|
-
|
300
|
-
> With the initial model we got 91.24% accuracy on the validation set. See the [notebook](./nbs/01_initial_sampling.ipynb) for more details.
|
301
|
-
> | Train Epochs | Number of Images | Validation Accuracy | Source |
|
302
|
-
> |--------------|-----------------|----------------------|------------------|
|
303
|
-
> | 10 | 100 | 91.24% | Initial sampling [notebook](./nbs/01_initial_sampling.ipynb) |
|
304
|
-
> | 80 | 9469 | 94.90% | fastai |
|
305
|
-
> | 200 | 9469 | 95.11% | fastai |
|
309
|
+
Uncertainty and diversity sampling are most effective when combined. For instance, you could first sample the most uncertain items using an uncertainty sampling method, then apply a diversity sampling method such as clustering to select a diverse set from the uncertain items.
|
306
310
|
|
311
|
+
Ultimately, the right ratios can depend on the specific task and dataset.
|
307
312
|
|
308
|
-
|
309
|
-
### Step 4: Inference on the unlabeled dataset
|
310
|
-
Run inference on the unlabeled dataset (the remaining imagenette train set) and evaluate the performance of the proxy model.
|
311
|
-
|
312
|
-
### Step 5: Active learning
|
313
|
-
Use active learning to select the most informative images to label from the unlabeled set. Pick the top 10 images from the unlabeled set that the proxy model is least confident about and label them.
|
314
|
-
|
315
|
-
### Step 6: Repeat
|
316
|
-
Repeat step 3 - 5 until the performance on the validation set is close to the leaderboard. Note the number of labeled images vs the performance on the validation set. Ideally we want to get to a point where the performance on the validation set is close to the leaderboard with minimal number of labeled images.
|
317
|
-
|
318
|
-
|
319
|
-
After the first iteration we got 94.57% accuracy on the validation set. See the [notebook](./nbs/03_retrain_model.ipynb) for more details.
|
320
|
-
|
321
|
-
> [!TIP]
|
322
|
-
> | Train Epochs | Number of Images | Validation Accuracy | Source |
|
323
|
-
> |--------------|-----------------|----------------------|------------------|
|
324
|
-
> | 10 | 200 | 94.57% | First relabeling [notebook](./nbs/03_retrain_model.ipynb) | -->
|
@@ -0,0 +1,7 @@
|
|
1
|
+
active_vision/__init__.py,sha256=xWa6YKvR3wF8p_D9PprKNGP3VnxjbyVpcwnPCMhhaHM,43
|
2
|
+
active_vision/core.py,sha256=jWzTOx3GCB2Sq5-JGgoi-ZD2teoIGTYas9StqZxXefo,24999
|
3
|
+
active_vision-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
|
+
active_vision-0.1.1.dist-info/METADATA,sha256=U8-IH0WJnPj6KPBsfsxcW4GZCTDY0KFxrqz7migcnro,15454
|
5
|
+
active_vision-0.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
active_vision-0.1.1.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
|
7
|
+
active_vision-0.1.1.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
active_vision/__init__.py,sha256=u-7eEtxmLFoQfY0fM9JSs_lWb4e1c7WxR3cC619BTXE,43
|
2
|
-
active_vision/core.py,sha256=mKS-ZZunjPgXuavm_J4oYiO9lm6UNRjFEzIn4kNfdVA,13421
|
3
|
-
active_vision-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
|
-
active_vision-0.0.5.dist-info/METADATA,sha256=mSFB-DeJ43roTwswTp3oHcG3CIyKnO-7ZCqaYbw26eQ,15846
|
5
|
-
active_vision-0.0.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
active_vision-0.0.5.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
|
7
|
-
active_vision-0.0.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|