active-vision 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- active_vision/__init__.py +1 -1
- active_vision/core.py +343 -62
- {active_vision-0.0.4.dist-info → active_vision-0.1.0.dist-info}/METADATA +111 -19
- active_vision-0.1.0.dist-info/RECORD +7 -0
- active_vision-0.0.4.dist-info/RECORD +0 -7
- {active_vision-0.0.4.dist-info → active_vision-0.1.0.dist-info}/LICENSE +0 -0
- {active_vision-0.0.4.dist-info → active_vision-0.1.0.dist-info}/WHEEL +0 -0
- {active_vision-0.0.4.dist-info → active_vision-0.1.0.dist-info}/top_level.txt +0 -0
active_vision/__init__.py
CHANGED
active_vision/core.py
CHANGED
@@ -1,37 +1,48 @@
|
|
1
1
|
import pandas as pd
|
2
2
|
from loguru import logger
|
3
|
-
from fastai.vision.
|
4
|
-
from fastai.callback.all import ShowGraphCallback
|
5
|
-
from fastai.vision.all import (
|
6
|
-
ImageDataLoaders,
|
7
|
-
aug_transforms,
|
8
|
-
Resize,
|
9
|
-
vision_learner,
|
10
|
-
accuracy,
|
11
|
-
valley,
|
12
|
-
slide,
|
13
|
-
minimum,
|
14
|
-
steep,
|
15
|
-
)
|
3
|
+
from fastai.vision.all import *
|
16
4
|
import torch
|
17
5
|
import torch.nn.functional as F
|
18
6
|
|
19
7
|
import warnings
|
8
|
+
from typing import Callable
|
20
9
|
|
21
10
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
22
11
|
|
23
12
|
|
24
13
|
class ActiveLearner:
|
25
|
-
|
14
|
+
"""
|
15
|
+
Active Learning framework for computer vision tasks.
|
16
|
+
|
17
|
+
Attributes:
|
18
|
+
Model Related:
|
19
|
+
model: The base model architecture (str or Callable)
|
20
|
+
learn: fastai Learner object for training
|
21
|
+
lrs: Learning rate finder results
|
22
|
+
|
23
|
+
Data Related:
|
24
|
+
train_set (pd.DataFrame): Training dataset
|
25
|
+
eval_set (pd.DataFrame): Evaluation dataset with ground truth labels
|
26
|
+
dls: fastai DataLoaders object
|
27
|
+
class_names: List of class names from the dataset
|
28
|
+
num_classes (int): Number of classes in the dataset
|
29
|
+
|
30
|
+
Prediction Related:
|
31
|
+
pred_df (pd.DataFrame): Predictions on a dataframe
|
32
|
+
eval_df (pd.DataFrame): Predictions on evaluation data
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(self, model_name: str | Callable):
|
26
36
|
self.model = self.load_model(model_name)
|
27
37
|
|
28
|
-
def load_model(self, model_name: str):
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
def load_model(self, model_name: str | Callable):
|
39
|
+
if isinstance(model_name, Callable):
|
40
|
+
logger.info(f"Loading fastai model {model_name.__name__}")
|
41
|
+
return model_name
|
42
|
+
|
43
|
+
if isinstance(model_name, str):
|
44
|
+
logger.info(f"Loading timm model {model_name}")
|
45
|
+
return model_name
|
35
46
|
|
36
47
|
def load_dataset(
|
37
48
|
self,
|
@@ -41,6 +52,8 @@ class ActiveLearner:
|
|
41
52
|
valid_pct: float = 0.2,
|
42
53
|
batch_size: int = 16,
|
43
54
|
image_size: int = 224,
|
55
|
+
batch_tfms: Callable = None,
|
56
|
+
learner_path: str = None,
|
44
57
|
):
|
45
58
|
logger.info(f"Loading dataset from {filepath_col} and {label_col}")
|
46
59
|
self.train_set = df.copy()
|
@@ -54,21 +67,68 @@ class ActiveLearner:
|
|
54
67
|
label_col=label_col,
|
55
68
|
bs=batch_size,
|
56
69
|
item_tfms=Resize(image_size),
|
57
|
-
batch_tfms=
|
70
|
+
batch_tfms=batch_tfms,
|
58
71
|
)
|
59
|
-
|
60
|
-
|
72
|
+
|
73
|
+
if learner_path:
|
74
|
+
logger.info(f"Loading learner from {learner_path}")
|
75
|
+
gpu_available = torch.cuda.is_available()
|
76
|
+
if gpu_available:
|
77
|
+
logger.info(f"Loading learner on GPU.")
|
78
|
+
else:
|
79
|
+
logger.info(f"Loading learner on CPU.")
|
80
|
+
|
81
|
+
self.learn = load_learner(learner_path, cpu=not gpu_available)
|
82
|
+
else:
|
83
|
+
logger.info("Creating learner")
|
84
|
+
self.learn = vision_learner(
|
85
|
+
self.dls, self.model, metrics=accuracy
|
86
|
+
).to_fp16()
|
87
|
+
|
61
88
|
self.class_names = self.dls.vocab
|
89
|
+
self.num_classes = self.dls.c
|
62
90
|
logger.info("Done. Ready to train.")
|
63
91
|
|
92
|
+
def show_batch(
|
93
|
+
self,
|
94
|
+
num_samples: int = 9,
|
95
|
+
unique: bool = False,
|
96
|
+
num_rows: int = None,
|
97
|
+
num_cols: int = None,
|
98
|
+
):
|
99
|
+
"""
|
100
|
+
Show a batch of images from the dataset.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
num_samples: Number of samples to show.
|
104
|
+
unique: Whether to show unique samples.
|
105
|
+
num_rows: Number of rows in the grid.
|
106
|
+
num_cols: Number of columns in the grid.
|
107
|
+
"""
|
108
|
+
self.dls.show_batch(
|
109
|
+
max_n=num_samples, unique=unique, nrows=num_rows, ncols=num_cols
|
110
|
+
)
|
111
|
+
|
64
112
|
def lr_find(self):
|
65
113
|
logger.info("Finding optimal learning rate")
|
66
114
|
self.lrs = self.learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
|
67
115
|
logger.info(f"Optimal learning rate: {self.lrs.valley}")
|
68
116
|
|
69
|
-
def train(self, epochs: int, lr: float):
|
70
|
-
|
71
|
-
|
117
|
+
def train(self, epochs: int, lr: float, head_tuning_epochs: int = 1):
|
118
|
+
"""
|
119
|
+
Train the model.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
epochs: Number of epochs to train for.
|
123
|
+
lr: Learning rate.
|
124
|
+
head_tuning_epochs: Number of epochs to train the head.
|
125
|
+
"""
|
126
|
+
logger.info(f"Training head for {head_tuning_epochs} epochs")
|
127
|
+
logger.info(f"Training model end-to-end for {epochs} epochs")
|
128
|
+
logger.info(f"Learning rate: {lr} with one-cycle learning rate scheduler")
|
129
|
+
self.learn.fine_tune(
|
130
|
+
epochs, lr, freeze_epochs=head_tuning_epochs, cbs=[ShowGraphCallback()]
|
131
|
+
)
|
72
132
|
|
73
133
|
def predict(self, filepaths: list[str], batch_size: int = 16):
|
74
134
|
"""
|
@@ -112,13 +172,75 @@ class ActiveLearner:
|
|
112
172
|
logger.info(f"Accuracy: {accuracy:.2%}")
|
113
173
|
return accuracy
|
114
174
|
|
115
|
-
def sample_uncertain(
|
175
|
+
def sample_uncertain(
|
176
|
+
self, df: pd.DataFrame, num_samples: int, strategy: str = "least-confidence"
|
177
|
+
):
|
116
178
|
"""
|
117
179
|
Sample top `num_samples` low confidence samples. Returns a df with filepaths and predicted labels, and confidence scores.
|
180
|
+
|
181
|
+
Strategies:
|
182
|
+
- least-confidence: Get top `num_samples` low confidence samples.
|
183
|
+
- margin-of-confidence: Get top `num_samples` samples with the smallest margin between the top two predictions.
|
184
|
+
- ratio-of-confidence: Get top `num_samples` samples with the highest ratio between the top two predictions.
|
185
|
+
- entropy: Get top `num_samples` samples with the highest entropy.
|
186
|
+
"""
|
187
|
+
|
188
|
+
# Remove samples that is already in the training set
|
189
|
+
df = df[~df["filepath"].isin(self.train_set["filepath"])].copy()
|
190
|
+
|
191
|
+
if strategy == "least-confidence":
|
192
|
+
logger.info(f"Getting top {num_samples} low confidence samples")
|
193
|
+
|
194
|
+
df.loc[:, "uncertainty_score"] = 1 - (df["pred_conf"]) / (
|
195
|
+
self.num_classes - (self.num_classes - 1)
|
196
|
+
)
|
197
|
+
|
198
|
+
# Sort by descending uncertainty score
|
199
|
+
uncertain_df = df.sort_values(by="uncertainty_score", ascending=False).head(
|
200
|
+
num_samples
|
201
|
+
)
|
202
|
+
return uncertain_df
|
203
|
+
|
204
|
+
# TODO: Implement margin of confidence strategy
|
205
|
+
elif strategy == "margin-of-confidence":
|
206
|
+
logger.error("Margin of confidence strategy not implemented")
|
207
|
+
raise NotImplementedError("Margin of confidence strategy not implemented")
|
208
|
+
|
209
|
+
# TODO: Implement ratio of confidence strategy
|
210
|
+
elif strategy == "ratio-of-confidence":
|
211
|
+
logger.error("Ratio of confidence strategy not implemented")
|
212
|
+
raise NotImplementedError("Ratio of confidence strategy not implemented")
|
213
|
+
|
214
|
+
# TODO: Implement entropy strategy
|
215
|
+
elif strategy == "entropy":
|
216
|
+
logger.error("Entropy strategy not implemented")
|
217
|
+
raise NotImplementedError("Entropy strategy not implemented")
|
218
|
+
|
219
|
+
else:
|
220
|
+
logger.error(f"Unknown strategy: {strategy}")
|
221
|
+
raise ValueError(f"Unknown strategy: {strategy}")
|
222
|
+
|
223
|
+
def sample_diverse(self, df: pd.DataFrame, num_samples: int):
|
224
|
+
"""
|
225
|
+
Sample top `num_samples` diverse samples. Returns a df with filepaths and predicted labels, and confidence scores.
|
226
|
+
|
227
|
+
Strategies:
|
228
|
+
- model-based-outlier: Get top `num_samples` samples with lowest activation of the model's last layer.
|
229
|
+
- cluster-based: Get top `num_samples` samples with the highest distance to the nearest neighbor.
|
230
|
+
- representative: Get top `num_samples` samples with the highest distance to the centroid of the training set.
|
231
|
+
"""
|
232
|
+
logger.error("Diverse sampling strategy not implemented")
|
233
|
+
raise NotImplementedError("Diverse sampling strategy not implemented")
|
234
|
+
|
235
|
+
def sample_random(self, df: pd.DataFrame, num_samples: int, seed: int = None):
|
236
|
+
"""
|
237
|
+
Sample `num_samples` random samples. Returns a df with filepaths and predicted labels, and confidence scores.
|
118
238
|
"""
|
119
|
-
|
120
|
-
|
121
|
-
|
239
|
+
|
240
|
+
logger.info(f"Sampling {num_samples} random samples")
|
241
|
+
if seed is not None:
|
242
|
+
logger.info(f"Using seed: {seed}")
|
243
|
+
return df.sample(n=num_samples, random_state=seed)
|
122
244
|
|
123
245
|
def label(self, df: pd.DataFrame, output_filename: str = "labeled"):
|
124
246
|
"""
|
@@ -136,15 +258,15 @@ class ActiveLearner:
|
|
136
258
|
return;
|
137
259
|
}
|
138
260
|
|
139
|
-
if (e.key.
|
261
|
+
if (e.key === "ArrowUp" || e.key === "Enter") {
|
140
262
|
document.getElementById("submit_btn").click();
|
141
|
-
} else if (e.key
|
263
|
+
} else if (e.key === "ArrowRight") {
|
142
264
|
document.getElementById("next_btn").click();
|
143
|
-
} else if (e.key
|
265
|
+
} else if (e.key === "ArrowLeft") {
|
144
266
|
document.getElementById("back_btn").click();
|
145
267
|
}
|
146
268
|
}
|
147
|
-
document.addEventListener('
|
269
|
+
document.addEventListener('keydown', shortcuts, false);
|
148
270
|
</script>
|
149
271
|
"""
|
150
272
|
|
@@ -155,24 +277,45 @@ class ActiveLearner:
|
|
155
277
|
with gr.Blocks(head=shortcut_js) as demo:
|
156
278
|
current_index = gr.State(value=0)
|
157
279
|
|
158
|
-
filename = gr.Textbox(
|
159
|
-
label="Filename", value=filepaths[0], interactive=False
|
160
|
-
)
|
161
|
-
|
162
280
|
image = gr.Image(
|
163
281
|
type="filepath", label="Image", value=filepaths[0], height=500
|
164
282
|
)
|
165
|
-
category = gr.Radio(choices=self.class_names, label="Select Category")
|
166
283
|
|
167
284
|
with gr.Row():
|
168
|
-
|
285
|
+
filename = gr.Textbox(
|
286
|
+
label="Filename", value=filepaths[0], interactive=False
|
287
|
+
)
|
288
|
+
|
289
|
+
pred_label = gr.Textbox(
|
290
|
+
label="Predicted Label",
|
291
|
+
value=df["pred_label"].iloc[0]
|
292
|
+
if "pred_label" in df.columns
|
293
|
+
else "",
|
294
|
+
interactive=False,
|
295
|
+
)
|
296
|
+
pred_conf = gr.Textbox(
|
297
|
+
label="Confidence",
|
298
|
+
value=f"{df['pred_conf'].iloc[0]:.2%}"
|
299
|
+
if "pred_conf" in df.columns
|
300
|
+
else "",
|
301
|
+
interactive=False,
|
302
|
+
)
|
303
|
+
|
304
|
+
category = gr.Radio(
|
305
|
+
choices=self.class_names,
|
306
|
+
label="Select Category",
|
307
|
+
value=df["pred_label"].iloc[0] if "pred_label" in df.columns else None,
|
308
|
+
)
|
309
|
+
|
310
|
+
with gr.Row():
|
311
|
+
back_btn = gr.Button("← Previous", elem_id="back_btn")
|
169
312
|
submit_btn = gr.Button(
|
170
|
-
"Submit (
|
313
|
+
"Submit (↑/Enter)",
|
171
314
|
variant="primary",
|
172
315
|
elem_id="submit_btn",
|
173
316
|
interactive=False,
|
174
317
|
)
|
175
|
-
next_btn = gr.Button("Next →
|
318
|
+
next_btn = gr.Button("Next →", elem_id="next_btn")
|
176
319
|
|
177
320
|
progress = gr.Slider(
|
178
321
|
minimum=0,
|
@@ -184,6 +327,73 @@ class ActiveLearner:
|
|
184
327
|
|
185
328
|
finish_btn = gr.Button("Finish Labeling", variant="primary")
|
186
329
|
|
330
|
+
with gr.Accordion("Zero-Shot Inference", open=False) as zero_shot_accordion:
|
331
|
+
gr.Markdown("""
|
332
|
+
Uses a VLM to predict the label of the image.
|
333
|
+
""")
|
334
|
+
|
335
|
+
import xinfer
|
336
|
+
from xinfer.model_registry import model_registry
|
337
|
+
from xinfer.types import ModelInputOutput
|
338
|
+
|
339
|
+
# Get models and filter for image-to-text models
|
340
|
+
all_models = model_registry.list_models()
|
341
|
+
model_list = [
|
342
|
+
model.id
|
343
|
+
for model in all_models
|
344
|
+
if model.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
|
345
|
+
]
|
346
|
+
|
347
|
+
with gr.Row():
|
348
|
+
with gr.Row():
|
349
|
+
model_dropdown = gr.Dropdown(
|
350
|
+
choices=model_list,
|
351
|
+
label="Select a model",
|
352
|
+
value="vikhyatk/moondream2",
|
353
|
+
)
|
354
|
+
device_dropdown = gr.Dropdown(
|
355
|
+
choices=["cuda", "cpu"],
|
356
|
+
label="Device",
|
357
|
+
value="cuda" if torch.cuda.is_available() else "cpu",
|
358
|
+
)
|
359
|
+
dtype_dropdown = gr.Dropdown(
|
360
|
+
choices=["float32", "float16", "bfloat16"],
|
361
|
+
label="Data Type",
|
362
|
+
value="float16" if torch.cuda.is_available() else "float32",
|
363
|
+
)
|
364
|
+
|
365
|
+
with gr.Column():
|
366
|
+
prompt_textbox = gr.Textbox(
|
367
|
+
label="Prompt",
|
368
|
+
lines=3,
|
369
|
+
value=f"Classify the image into one of the following categories: {self.class_names}",
|
370
|
+
interactive=True,
|
371
|
+
)
|
372
|
+
inference_btn = gr.Button("Run Inference", variant="primary")
|
373
|
+
|
374
|
+
result_textbox = gr.Textbox(
|
375
|
+
label="Result",
|
376
|
+
lines=3,
|
377
|
+
interactive=False,
|
378
|
+
)
|
379
|
+
|
380
|
+
def run_zero_shot_inference(prompt, model, device, dtype, current_filename):
|
381
|
+
model = xinfer.create_model(model, device=device, dtype=dtype)
|
382
|
+
result = model.infer(current_filename, prompt).text
|
383
|
+
return result
|
384
|
+
|
385
|
+
inference_btn.click(
|
386
|
+
fn=run_zero_shot_inference,
|
387
|
+
inputs=[
|
388
|
+
prompt_textbox,
|
389
|
+
model_dropdown,
|
390
|
+
device_dropdown,
|
391
|
+
dtype_dropdown,
|
392
|
+
filename,
|
393
|
+
],
|
394
|
+
outputs=[result_textbox],
|
395
|
+
)
|
396
|
+
|
187
397
|
def update_submit_btn(choice):
|
188
398
|
return gr.Button(interactive=choice is not None)
|
189
399
|
|
@@ -192,21 +402,59 @@ class ActiveLearner:
|
|
192
402
|
)
|
193
403
|
|
194
404
|
def navigate(current_idx, direction):
|
405
|
+
# Convert current_idx to int before arithmetic
|
406
|
+
current_idx = int(current_idx)
|
195
407
|
next_idx = current_idx + direction
|
408
|
+
|
196
409
|
if 0 <= next_idx < len(filepaths):
|
197
|
-
return
|
410
|
+
return (
|
411
|
+
filepaths[next_idx],
|
412
|
+
filepaths[next_idx],
|
413
|
+
df["pred_label"].iloc[next_idx]
|
414
|
+
if "pred_label" in df.columns
|
415
|
+
else "",
|
416
|
+
f"{df['pred_conf'].iloc[next_idx]:.2%}"
|
417
|
+
if "pred_conf" in df.columns
|
418
|
+
else "",
|
419
|
+
df["pred_label"].iloc[next_idx]
|
420
|
+
if "pred_label" in df.columns
|
421
|
+
else None,
|
422
|
+
next_idx,
|
423
|
+
next_idx,
|
424
|
+
)
|
198
425
|
return (
|
199
426
|
filepaths[current_idx],
|
200
427
|
filepaths[current_idx],
|
428
|
+
df["pred_label"].iloc[current_idx]
|
429
|
+
if "pred_label" in df.columns
|
430
|
+
else "",
|
431
|
+
f"{df['pred_conf'].iloc[current_idx]:.2%}"
|
432
|
+
if "pred_conf" in df.columns
|
433
|
+
else "",
|
434
|
+
df["pred_label"].iloc[current_idx]
|
435
|
+
if "pred_label" in df.columns
|
436
|
+
else None,
|
201
437
|
current_idx,
|
202
438
|
current_idx,
|
203
439
|
)
|
204
440
|
|
205
441
|
def save_and_next(current_idx, selected_category):
|
442
|
+
# Convert current_idx to int before arithmetic
|
443
|
+
current_idx = int(current_idx)
|
444
|
+
|
206
445
|
if selected_category is None:
|
207
446
|
return (
|
208
447
|
filepaths[current_idx],
|
209
448
|
filepaths[current_idx],
|
449
|
+
df["pred_label"].iloc[current_idx]
|
450
|
+
if "pred_label" in df.columns
|
451
|
+
else "",
|
452
|
+
f"{df['pred_conf'].iloc[current_idx]:.2%}"
|
453
|
+
if "pred_conf" in df.columns
|
454
|
+
else "",
|
455
|
+
df["pred_label"].iloc[current_idx]
|
456
|
+
if "pred_label" in df.columns
|
457
|
+
else None,
|
210
458
|
current_idx,
|
211
459
|
current_idx,
|
212
460
|
)
|
@@ -221,10 +469,33 @@ class ActiveLearner:
|
|
221
469
|
return (
|
222
470
|
filepaths[current_idx],
|
223
471
|
filepaths[current_idx],
|
472
|
+
df["pred_label"].iloc[current_idx]
|
473
|
+
if "pred_label" in df.columns
|
474
|
+
else "",
|
475
|
+
f"{df['pred_conf'].iloc[current_idx]:.2%}"
|
476
|
+
if "pred_conf" in df.columns
|
477
|
+
else "",
|
478
|
+
df["pred_label"].iloc[current_idx]
|
479
|
+
if "pred_label" in df.columns
|
480
|
+
else None,
|
224
481
|
current_idx,
|
225
482
|
current_idx,
|
226
483
|
)
|
227
|
-
return
|
484
|
+
return (
|
485
|
+
filepaths[next_idx],
|
486
|
+
filepaths[next_idx],
|
487
|
+
df["pred_label"].iloc[next_idx]
|
488
|
+
if "pred_label" in df.columns
|
489
|
+
else "",
|
490
|
+
f"{df['pred_conf'].iloc[next_idx]:.2%}"
|
491
|
+
if "pred_conf" in df.columns
|
492
|
+
else "",
|
493
|
+
df["pred_label"].iloc[next_idx]
|
494
|
+
if "pred_label" in df.columns
|
495
|
+
else None,
|
496
|
+
next_idx,
|
497
|
+
next_idx,
|
498
|
+
)
|
228
499
|
|
229
500
|
def convert_csv_to_parquet():
|
230
501
|
try:
|
@@ -240,19 +511,43 @@ class ActiveLearner:
|
|
240
511
|
back_btn.click(
|
241
512
|
fn=lambda idx: navigate(idx, -1),
|
242
513
|
inputs=[current_index],
|
243
|
-
outputs=[
|
514
|
+
outputs=[
|
515
|
+
filename,
|
516
|
+
image,
|
517
|
+
pred_label,
|
518
|
+
pred_conf,
|
519
|
+
category,
|
520
|
+
current_index,
|
521
|
+
progress,
|
522
|
+
],
|
244
523
|
)
|
245
524
|
|
246
525
|
next_btn.click(
|
247
526
|
fn=lambda idx: navigate(idx, 1),
|
248
527
|
inputs=[current_index],
|
249
|
-
outputs=[
|
528
|
+
outputs=[
|
529
|
+
filename,
|
530
|
+
image,
|
531
|
+
pred_label,
|
532
|
+
pred_conf,
|
533
|
+
category,
|
534
|
+
current_index,
|
535
|
+
progress,
|
536
|
+
],
|
250
537
|
)
|
251
538
|
|
252
539
|
submit_btn.click(
|
253
540
|
fn=save_and_next,
|
254
541
|
inputs=[current_index, category],
|
255
|
-
outputs=[
|
542
|
+
outputs=[
|
543
|
+
filename,
|
544
|
+
image,
|
545
|
+
pred_label,
|
546
|
+
pred_conf,
|
547
|
+
category,
|
548
|
+
current_index,
|
549
|
+
progress,
|
550
|
+
],
|
256
551
|
)
|
257
552
|
|
258
553
|
finish_btn.click(fn=convert_csv_to_parquet)
|
@@ -264,10 +559,6 @@ class ActiveLearner:
|
|
264
559
|
Add samples to the training set.
|
265
560
|
"""
|
266
561
|
new_train_set = df.copy()
|
267
|
-
# new_train_set.drop(columns=["pred_conf"], inplace=True)
|
268
|
-
# new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
|
269
|
-
|
270
|
-
# len_old = len(self.train_set)
|
271
562
|
|
272
563
|
logger.info(f"Adding {len(new_train_set)} samples to training set")
|
273
564
|
self.train_set = pd.concat([self.train_set, new_train_set])
|
@@ -279,13 +570,3 @@ class ActiveLearner:
|
|
279
570
|
|
280
571
|
self.train_set.to_parquet(f"{output_filename}.parquet")
|
281
572
|
logger.info(f"Saved training set to {output_filename}.parquet")
|
282
|
-
|
283
|
-
# if len(self.train_set) == len_old:
|
284
|
-
# logger.warning("No new samples added to training set")
|
285
|
-
|
286
|
-
# elif len_old + len(new_train_set) < len(self.train_set):
|
287
|
-
# logger.warning("Some samples were duplicates and removed from training set")
|
288
|
-
|
289
|
-
# else:
|
290
|
-
# logger.info("All new samples added to training set")
|
291
|
-
# logger.info(f"Training set now has {len(self.train_set)} samples")
|
@@ -1,10 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: active-vision
|
3
|
-
Version: 0.0
|
3
|
+
Version: 0.1.0
|
4
4
|
Summary: Active learning for edge vision.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
7
7
|
License-File: LICENSE
|
8
|
+
Requires-Dist: accelerate>=1.2.1
|
8
9
|
Requires-Dist: datasets>=3.2.0
|
9
10
|
Requires-Dist: fastai>=2.7.18
|
10
11
|
Requires-Dist: gradio>=5.12.0
|
@@ -13,6 +14,8 @@ Requires-Dist: ipywidgets>=8.1.5
|
|
13
14
|
Requires-Dist: loguru>=0.7.3
|
14
15
|
Requires-Dist: seaborn>=0.13.2
|
15
16
|
Requires-Dist: timm>=1.0.13
|
17
|
+
Requires-Dist: transformers>=4.48.0
|
18
|
+
Requires-Dist: xinfer>=0.3.2
|
16
19
|
|
17
20
|

|
18
21
|

|
@@ -23,17 +26,38 @@ Requires-Dist: timm>=1.0.13
|
|
23
26
|
<img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/logo.png" alt="active-vision">
|
24
27
|
</p>
|
25
28
|
|
26
|
-
|
29
|
+
The goal of this project is to create a framework for the active learning loop for computer vision. The diagram below shows a general workflow of how the active learning loop works.
|
27
30
|
|
28
|
-
|
31
|
+
<p align="center">
|
32
|
+
<img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/data_flywheel.gif" alt="active-vision", width="700">
|
33
|
+
</p>
|
29
34
|
|
30
|
-
Supported tasks:
|
35
|
+
### Supported tasks:
|
31
36
|
- [X] Image classification
|
32
37
|
- [ ] Object detection
|
33
38
|
- [ ] Segmentation
|
34
39
|
|
40
|
+
### Supported models:
|
41
|
+
- [X] Fastai models
|
42
|
+
- [X] Torchvision models
|
43
|
+
- [X] Timm models
|
44
|
+
- [ ] Hugging Face models
|
45
|
+
|
46
|
+
### Supported Active Learning Strategies:
|
47
|
+
|
48
|
+
Uncertainty Sampling:
|
49
|
+
- [X] Least confidence
|
50
|
+
- [ ] Margin of confidence
|
51
|
+
- [ ] Ratio of confidence
|
52
|
+
- [ ] Entropy
|
35
53
|
|
36
|
-
|
54
|
+
Diverse Sampling:
|
55
|
+
- [X] Random sampling
|
56
|
+
- [ ] Model-based outlier
|
57
|
+
- [ ] Cluster-based
|
58
|
+
- [ ] Representative
|
59
|
+
|
60
|
+
## 📦 Installation
|
37
61
|
|
38
62
|
Get a release from PyPI
|
39
63
|
```bash
|
@@ -58,18 +82,18 @@ uv sync
|
|
58
82
|
Once the virtual environment is created, you can install the package using pip.
|
59
83
|
|
60
84
|
> [!TIP]
|
61
|
-
> If you're using uv add a uv before the pip install command to install into your virtual environment. Eg:
|
85
|
+
> If you're using uv add a `uv` before the pip install command to install into your virtual environment. Eg:
|
62
86
|
> ```bash
|
63
87
|
> uv pip install active-vision
|
64
88
|
> ```
|
65
89
|
|
66
|
-
## Usage
|
90
|
+
## 🛠️ Usage
|
67
91
|
See the [notebook](./nbs/04_relabel_loop.ipynb) for a complete example.
|
68
92
|
|
69
|
-
Be sure to prepared 3
|
70
|
-
- [
|
71
|
-
- [
|
72
|
-
- [
|
93
|
+
Be sure to prepared 3 subsets of the dataset:
|
94
|
+
- [Initial samples](./nbs/initial_samples.parquet): A dataframe of a labeled images to train an initial model. If you don't have any labeled data, you can label some images yourself.
|
95
|
+
- [Unlabeled samples](./nbs/unlabeled_samples.parquet): A dataframe of *unlabeled* images. We will continuously sample from this set using active learning strategies.
|
96
|
+
- [Evaluation samples](./nbs/evaluation_samples.parquet): A dataframe of *labeled* images. We will use this set to evaluate the performance of the model. This is the test set, DO NOT use it for active learning. Split this out in the beginning.
|
73
97
|
|
74
98
|
As a toy example I created the above 3 datasets from the imagenette dataset.
|
75
99
|
|
@@ -100,7 +124,7 @@ uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
|
|
100
124
|
al.label(uncertain_df, output_filename="uncertain")
|
101
125
|
```
|
102
126
|
|
103
|
-

|
104
128
|
|
105
129
|
Once complete, the labeled samples will be save into a new df.
|
106
130
|
We can now add the newly labeled data to the training set.
|
@@ -119,11 +143,77 @@ Repeat the process until the model is good enough. Use the dataset to train a la
|
|
119
143
|
>
|
120
144
|
> But using the dataset of 200+ images, I trained a more capable model (convnext_small_in22k) and got 99.3% accuracy on the evaluation set. See the [notebook](./nbs/05_retrain_larger.ipynb) for more details.
|
121
145
|
|
122
|
-
|
123
|
-
|
146
|
+
|
147
|
+
## 📊 Benchmarks
|
148
|
+
This section contains the benchmarks I ran using the active learning loop on various datasets.
|
149
|
+
|
150
|
+
Column description:
|
151
|
+
- `#Labeled Images`: The number of labeled images used to train the model.
|
152
|
+
- `Evaluation Accuracy`: The accuracy of the model on the evaluation set.
|
153
|
+
- `Train Epochs`: The number of epochs used to train the model.
|
154
|
+
- `Model`: The model used to train.
|
155
|
+
- `Active Learning`: Whether active learning was used to train the model.
|
156
|
+
- `Source`: The source of the results.
|
157
|
+
|
158
|
+
### Imagenette
|
159
|
+
- num classes: 10
|
160
|
+
- num images: 9469
|
161
|
+
|
162
|
+
To start the active learning loop, I labeled 100 images (10 images from each class) and iteratively relabeled the most informative images until I hit 275 labeled images.
|
163
|
+
|
164
|
+
The active learning loop is a iterative process and can keep going until you hit a stopping point. You can decide your own stopping point based on your use case. It could be:
|
165
|
+
- You ran out of data to label.
|
166
|
+
- You hit a performance goal.
|
167
|
+
- You hit a budget.
|
168
|
+
- Other criteria.
|
169
|
+
|
170
|
+
For this dataset,I decided to stop the active learning loop at 275 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard.
|
171
|
+
|
172
|
+
|
173
|
+
| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
|
174
|
+
|-----------------|---------------------|--------------|----------------------|----------------|--------|
|
175
|
+
| 9469 | 94.90% | 80 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
|
176
|
+
| 9469 | 95.11% | 200 | xse_resnext50 | ❌ | [Link](https://github.com/fastai/imagenette) |
|
177
|
+
| 275 | 99.33% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/05_retrain_larger.ipynb) |
|
178
|
+
| 275 | 93.40% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/04_relabel_loop.ipynb) |
|
179
|
+
|
180
|
+
### Dog Food
|
181
|
+
- num classes: 2
|
182
|
+
- num images: 2100
|
183
|
+
|
184
|
+
To start the active learning loop, I labeled 20 images (10 images from each class) and iteratively relabeled the most informative images until I hit 160 labeled images.
|
185
|
+
|
186
|
+
I decided to stop the active learning loop at 160 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
|
187
|
+
|
188
|
+
| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
|
189
|
+
|-----------------|---------------------|--------------|-------|----------------|--------|
|
190
|
+
| 2100 | 99.70% | ? | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/abhishek/autotrain-dog-vs-food) |
|
191
|
+
| 160 | 100.00% | 6 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/02_train.ipynb) |
|
192
|
+
| 160 | 97.60% | 4 | resnet18 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/dog_food_dataset/01_label.ipynb) |
|
193
|
+
|
194
|
+
### Oxford-IIIT Pet
|
195
|
+
- num classes: 37
|
196
|
+
- num images: 3680
|
197
|
+
|
198
|
+
To start the active learning loop, I labeled 370 images (10 images from each class) and iteratively relabeled the most informative images until I hit 612 labeled images.
|
199
|
+
|
200
|
+
I decided to stop the active learning loop at 612 labeled images because the performance on the evaluation set is close to the top performing model on the leaderboard. You can decide your own stopping point based on your use case.
|
201
|
+
|
202
|
+
| #Labeled Images | Evaluation Accuracy | Train Epochs | Model | Active Learning | Source |
|
203
|
+
|-----------------|---------------------|--------------|-------|----------------|--------|
|
204
|
+
| 3680 | 95.40% | 5 | vit-base-patch16-224 | ❌ | [Link](https://huggingface.co/walterg777/vit-base-oxford-iiit-pets) |
|
205
|
+
| 612 | 90.26% | 11 | convnext_small_in22k | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/02_train.ipynb) |
|
206
|
+
| 612 | 91.38% | 11 | vit-base-patch16-224 | ✓ | [Link](https://github.com/dnth/active-vision/blob/main/nbs/oxford_iiit_pets/03_train_vit.ipynb) |
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
## ➿ Workflow
|
211
|
+
This section describes a more detailed workflow for active learning. There are two workflows for active learning that we can use depending on the availability of labeled data.
|
124
212
|
|
125
213
|
### With unlabeled data
|
126
|
-
If we have no labeled data,
|
214
|
+
If we have no labeled data, the goal of the active learning loop is to build a resonably good labeled dataset to train a larger model.
|
215
|
+
|
216
|
+
Steps:
|
127
217
|
|
128
218
|
1. Load a small proxy model.
|
129
219
|
2. Label an initial dataset. If there is none, you'll have to label some images.
|
@@ -155,24 +245,25 @@ graph TD
|
|
155
245
|
```
|
156
246
|
|
157
247
|
### With labeled data
|
158
|
-
If we have a labeled dataset,
|
248
|
+
If we already have a labeled dataset, the goal of the active learning loop is to iteratively improve the dataset and the model by fixing the most important label errors.
|
249
|
+
|
250
|
+
Steps:
|
159
251
|
|
160
252
|
1. Load a small proxy model.
|
161
253
|
2. Train the proxy model on the labeled dataset.
|
162
254
|
3. Run inference on the entire labeled dataset.
|
163
|
-
4. Get the most
|
255
|
+
4. Get the most impactful label errors with active learning.
|
164
256
|
5. Fix the label errors.
|
165
257
|
6. Repeat steps 2-5 until the dataset is good enough.
|
166
258
|
7. Save the labeled dataset.
|
167
259
|
8. Train a larger model on the saved labeled dataset.
|
168
260
|
|
169
261
|
|
170
|
-
|
171
262
|
```mermaid
|
172
263
|
graph TD
|
173
264
|
A[Load a small proxy model] --> B[Train proxy model on labeled dataset]
|
174
265
|
B --> C[Run inference on labeled dataset]
|
175
|
-
C --> D[Get
|
266
|
+
C --> D[Get label errors using active learning]
|
176
267
|
D --> E[Fix label errors]
|
177
268
|
E --> F{Dataset good enough?}
|
178
269
|
F -->|No| B
|
@@ -181,6 +272,7 @@ graph TD
|
|
181
272
|
```
|
182
273
|
|
183
274
|
|
275
|
+
|
184
276
|
<!-- ## Methodology
|
185
277
|
To test out the workflows we will use the [imagenette dataset](https://huggingface.co/datasets/frgfm/imagenette). But this will be applicable to any dataset.
|
186
278
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
active_vision/__init__.py,sha256=dDQijes3C7zAUc_08TyblLSP6Lk0PcPPI8PYgEliKCI,43
|
2
|
+
active_vision/core.py,sha256=D_ve-nMv2EWSaQCOBTggleo-1op8JHXchk0QLicGDqg,21715
|
3
|
+
active_vision-0.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
|
+
active_vision-0.1.0.dist-info/METADATA,sha256=aA793OK3PGKnKVchMQthXl1H14xcBh_kq9tAO9o6jf0,15944
|
5
|
+
active_vision-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
active_vision-0.1.0.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
|
7
|
+
active_vision-0.1.0.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
active_vision/__init__.py,sha256=XITukjUU49hPFzxCzmxqJAUWh3YE8sWQzmyZ5bVra88,43
|
2
|
-
active_vision/core.py,sha256=0aXDI5Gpj0Spk7TSIxJf8aJDDBgZh0-jkWdYyZ1Zric,10713
|
3
|
-
active_vision-0.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
|
-
active_vision-0.0.4.dist-info/METADATA,sha256=WlvtrzUy8m2nr8izUuTtysdQXO4ZjCO9vGWt2i_GMUI,10421
|
5
|
-
active_vision-0.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
active_vision-0.0.4.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
|
7
|
-
active_vision-0.0.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|