active-vision 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- active_vision/__init__.py +1 -1
- active_vision/core.py +157 -15
- {active_vision-0.0.2.dist-info → active_vision-0.0.3.dist-info}/METADATA +48 -25
- active_vision-0.0.3.dist-info/RECORD +7 -0
- active_vision-0.0.2.dist-info/RECORD +0 -7
- {active_vision-0.0.2.dist-info → active_vision-0.0.3.dist-info}/LICENSE +0 -0
- {active_vision-0.0.2.dist-info → active_vision-0.0.3.dist-info}/WHEEL +0 -0
- {active_vision-0.0.2.dist-info → active_vision-0.0.3.dist-info}/top_level.txt +0 -0
active_vision/__init__.py
CHANGED
active_vision/core.py
CHANGED
@@ -87,7 +87,9 @@ class ActiveLearner:
|
|
87
87
|
)
|
88
88
|
return self.pred_df
|
89
89
|
|
90
|
-
def evaluate(
|
90
|
+
def evaluate(
|
91
|
+
self, df: pd.DataFrame, filepath_col: str, label_col: str, batch_size: int = 16
|
92
|
+
):
|
91
93
|
"""
|
92
94
|
Evaluate on a labeled dataset. Returns a score.
|
93
95
|
"""
|
@@ -114,20 +116,158 @@ class ActiveLearner:
|
|
114
116
|
"""
|
115
117
|
Sample top `num_samples` low confidence samples. Returns a df with filepaths and predicted labels, and confidence scores.
|
116
118
|
"""
|
117
|
-
|
118
|
-
|
119
|
-
).head(num_samples)
|
119
|
+
logger.info(f"Getting top {num_samples} low confidence samples")
|
120
|
+
uncertain_df = df.sort_values(by="pred_conf", ascending=True).head(num_samples)
|
120
121
|
return uncertain_df
|
121
122
|
|
122
|
-
def
|
123
|
+
def label(self, df: pd.DataFrame, output_filename: str = "labeled"):
|
124
|
+
"""
|
125
|
+
Launch a labeling interface for the user to label the samples.
|
126
|
+
Input is a df with filepaths listing the files to be labeled. Output is a df with filepaths and labels.
|
127
|
+
"""
|
128
|
+
import gradio as gr
|
129
|
+
|
130
|
+
shortcut_js = """
|
131
|
+
<script>
|
132
|
+
function shortcuts(e) {
|
133
|
+
// Only block shortcuts if we're in a text input or textarea
|
134
|
+
if (e.target.tagName.toLowerCase() === "textarea" ||
|
135
|
+
(e.target.tagName.toLowerCase() === "input" && e.target.type.toLowerCase() === "text")) {
|
136
|
+
return;
|
137
|
+
}
|
138
|
+
|
139
|
+
if (e.key.toLowerCase() == "w") {
|
140
|
+
document.getElementById("submit_btn").click();
|
141
|
+
} else if (e.key.toLowerCase() == "d") {
|
142
|
+
document.getElementById("next_btn").click();
|
143
|
+
} else if (e.key.toLowerCase() == "a") {
|
144
|
+
document.getElementById("back_btn").click();
|
145
|
+
}
|
146
|
+
}
|
147
|
+
document.addEventListener('keypress', shortcuts, false);
|
148
|
+
</script>
|
149
|
+
"""
|
150
|
+
|
151
|
+
logger.info(f"Launching labeling interface for {len(df)} samples")
|
152
|
+
|
153
|
+
filepaths = df["filepath"].tolist()
|
154
|
+
|
155
|
+
with gr.Blocks(head=shortcut_js) as demo:
|
156
|
+
current_index = gr.State(value=0)
|
157
|
+
|
158
|
+
filename = gr.Textbox(
|
159
|
+
label="Filename", value=filepaths[0], interactive=False
|
160
|
+
)
|
161
|
+
|
162
|
+
image = gr.Image(
|
163
|
+
type="filepath", label="Image", value=filepaths[0], height=500
|
164
|
+
)
|
165
|
+
category = gr.Radio(choices=self.class_names, label="Select Category")
|
166
|
+
|
167
|
+
with gr.Row():
|
168
|
+
back_btn = gr.Button("← Previous (A)", elem_id="back_btn")
|
169
|
+
submit_btn = gr.Button(
|
170
|
+
"Submit (W)",
|
171
|
+
variant="primary",
|
172
|
+
elem_id="submit_btn",
|
173
|
+
interactive=False,
|
174
|
+
)
|
175
|
+
next_btn = gr.Button("Next → (D)", elem_id="next_btn")
|
176
|
+
|
177
|
+
progress = gr.Slider(
|
178
|
+
minimum=0,
|
179
|
+
maximum=len(filepaths) - 1,
|
180
|
+
value=0,
|
181
|
+
label="Progress",
|
182
|
+
interactive=False,
|
183
|
+
)
|
184
|
+
|
185
|
+
finish_btn = gr.Button("Finish Labeling", variant="primary")
|
186
|
+
|
187
|
+
def update_submit_btn(choice):
|
188
|
+
return gr.Button(interactive=choice is not None)
|
189
|
+
|
190
|
+
category.change(
|
191
|
+
fn=update_submit_btn, inputs=[category], outputs=[submit_btn]
|
192
|
+
)
|
193
|
+
|
194
|
+
def navigate(current_idx, direction):
|
195
|
+
next_idx = current_idx + direction
|
196
|
+
if 0 <= next_idx < len(filepaths):
|
197
|
+
return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
|
198
|
+
return (
|
199
|
+
filepaths[current_idx],
|
200
|
+
filepaths[current_idx],
|
201
|
+
current_idx,
|
202
|
+
current_idx,
|
203
|
+
)
|
204
|
+
|
205
|
+
def save_and_next(current_idx, selected_category):
|
206
|
+
if selected_category is None:
|
207
|
+
return (
|
208
|
+
filepaths[current_idx],
|
209
|
+
filepaths[current_idx],
|
210
|
+
current_idx,
|
211
|
+
current_idx,
|
212
|
+
)
|
213
|
+
|
214
|
+
# Save the current annotation
|
215
|
+
with open(f"{output_filename}.csv", "a") as f:
|
216
|
+
f.write(f"{filepaths[current_idx]},{selected_category}\n")
|
217
|
+
|
218
|
+
# Move to next image if not at the end
|
219
|
+
next_idx = current_idx + 1
|
220
|
+
if next_idx >= len(filepaths):
|
221
|
+
return (
|
222
|
+
filepaths[current_idx],
|
223
|
+
filepaths[current_idx],
|
224
|
+
current_idx,
|
225
|
+
current_idx,
|
226
|
+
)
|
227
|
+
return filepaths[next_idx], filepaths[next_idx], next_idx, next_idx
|
228
|
+
|
229
|
+
def convert_csv_to_parquet():
|
230
|
+
try:
|
231
|
+
df = pd.read_csv(f"{output_filename}.csv", header=None)
|
232
|
+
df.columns = ["filepath", "label"]
|
233
|
+
df = df.drop_duplicates(subset=["filepath"], keep="last")
|
234
|
+
df.to_parquet(f"{output_filename}.parquet")
|
235
|
+
gr.Info(f"Annotation saved to {output_filename}.parquet")
|
236
|
+
except Exception as e:
|
237
|
+
logger.error(e)
|
238
|
+
return
|
239
|
+
|
240
|
+
back_btn.click(
|
241
|
+
fn=lambda idx: navigate(idx, -1),
|
242
|
+
inputs=[current_index],
|
243
|
+
outputs=[filename, image, current_index, progress],
|
244
|
+
)
|
245
|
+
|
246
|
+
next_btn.click(
|
247
|
+
fn=lambda idx: navigate(idx, 1),
|
248
|
+
inputs=[current_index],
|
249
|
+
outputs=[filename, image, current_index, progress],
|
250
|
+
)
|
251
|
+
|
252
|
+
submit_btn.click(
|
253
|
+
fn=save_and_next,
|
254
|
+
inputs=[current_index, category],
|
255
|
+
outputs=[filename, image, current_index, progress],
|
256
|
+
)
|
257
|
+
|
258
|
+
finish_btn.click(fn=convert_csv_to_parquet)
|
259
|
+
|
260
|
+
demo.launch(height=1000)
|
261
|
+
|
262
|
+
def add_to_train_set(self, df: pd.DataFrame, output_filename: str):
|
123
263
|
"""
|
124
264
|
Add samples to the training set.
|
125
265
|
"""
|
126
266
|
new_train_set = df.copy()
|
127
|
-
new_train_set.drop(columns=["pred_conf"], inplace=True)
|
128
|
-
new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
|
267
|
+
# new_train_set.drop(columns=["pred_conf"], inplace=True)
|
268
|
+
# new_train_set.rename(columns={"pred_label": "label"}, inplace=True)
|
129
269
|
|
130
|
-
len_old = len(self.train_set)
|
270
|
+
# len_old = len(self.train_set)
|
131
271
|
|
132
272
|
logger.info(f"Adding {len(new_train_set)} samples to training set")
|
133
273
|
self.train_set = pd.concat([self.train_set, new_train_set])
|
@@ -137,13 +277,15 @@ class ActiveLearner:
|
|
137
277
|
)
|
138
278
|
self.train_set.reset_index(drop=True, inplace=True)
|
139
279
|
|
280
|
+
self.train_set.to_parquet(f"{output_filename}.parquet")
|
281
|
+
logger.info(f"Saved training set to {output_filename}.parquet")
|
140
282
|
|
141
|
-
if len(self.train_set) == len_old:
|
142
|
-
|
283
|
+
# if len(self.train_set) == len_old:
|
284
|
+
# logger.warning("No new samples added to training set")
|
143
285
|
|
144
|
-
elif len_old + len(new_train_set) < len(self.train_set):
|
145
|
-
|
286
|
+
# elif len_old + len(new_train_set) < len(self.train_set):
|
287
|
+
# logger.warning("Some samples were duplicates and removed from training set")
|
146
288
|
|
147
|
-
else:
|
148
|
-
|
149
|
-
|
289
|
+
# else:
|
290
|
+
# logger.info("All new samples added to training set")
|
291
|
+
# logger.info(f"Training set now has {len(self.train_set)} samples")
|
@@ -1,12 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: active-vision
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: Active learning for edge vision.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
7
7
|
License-File: LICENSE
|
8
8
|
Requires-Dist: datasets>=3.2.0
|
9
9
|
Requires-Dist: fastai>=2.7.18
|
10
|
+
Requires-Dist: gradio>=5.12.0
|
10
11
|
Requires-Dist: ipykernel>=6.29.5
|
11
12
|
Requires-Dist: ipywidgets>=8.1.5
|
12
13
|
Requires-Dist: loguru>=0.7.3
|
@@ -14,40 +15,53 @@ Requires-Dist: seaborn>=0.13.2
|
|
14
15
|
|
15
16
|
![Python Version](https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge)
|
16
17
|
![License](https://img.shields.io/badge/License-Apache%202.0-green.svg?style=for-the-badge)
|
17
|
-
![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge)
|
18
|
+
[![PyPI](https://img.shields.io/pypi/v/active-vision?style=for-the-badge)](https://pypi.org/project/active-vision/)
|
18
19
|
![Downloads](https://img.shields.io/pepy/dt/active-vision?style=for-the-badge&logo=pypi&logoColor=white&label=Downloads&color=purple)
|
19
20
|
|
20
21
|
<p align="center">
|
21
|
-
<img src="https://
|
22
|
+
<img src="https://raw.githubusercontent.com/dnth/active-vision/main/assets/logo.png" alt="active-vision">
|
22
23
|
</p>
|
23
24
|
|
24
25
|
Active learning at the edge for computer vision.
|
25
26
|
|
26
|
-
The goal of this project is to create a framework for active learning
|
27
|
+
The goal of this project is to create a framework for the active learning loop for computer vision deployed on edge devices.
|
27
28
|
|
28
|
-
##
|
29
|
+
## Installation
|
30
|
+
I recommend using [uv](https://docs.astral.sh/uv/) to set up a virtual environment and install the package. You can also use other virtual env of your choice.
|
29
31
|
|
30
|
-
|
31
|
-
- User interface: streamlit
|
32
|
-
- Database: sqlite
|
33
|
-
- Experiment tracking: wandb
|
32
|
+
If you're using uv:
|
34
33
|
|
35
|
-
|
34
|
+
```bash
|
35
|
+
uv venv
|
36
|
+
uv sync
|
37
|
+
```
|
38
|
+
Once the virtual environment is created, you can install the package using pip.
|
36
39
|
|
37
|
-
PyPI
|
40
|
+
Get a release from PyPI
|
38
41
|
```bash
|
39
42
|
pip install active-vision
|
40
43
|
```
|
41
44
|
|
42
|
-
|
45
|
+
Install from source
|
43
46
|
```bash
|
44
47
|
git clone https://github.com/dnth/active-vision.git
|
45
48
|
cd active-vision
|
46
49
|
pip install -e .
|
47
50
|
```
|
48
51
|
|
52
|
+
> [!TIP]
|
53
|
+
> If you're using uv add a uv before the pip install command to install into your virtual environment. Eg:
|
54
|
+
> ```bash
|
55
|
+
> uv pip install active-vision
|
56
|
+
> ```
|
57
|
+
|
49
58
|
## Usage
|
50
|
-
See the [notebook](./nbs/
|
59
|
+
See the [notebook](./nbs/04_relabel_loop.ipynb) for a complete example.
|
60
|
+
|
61
|
+
Be sure to prepared 3 datasets:
|
62
|
+
- train: A dataframe of an existing labeled training dataset.
|
63
|
+
- unlabeled: A dataframe of unlabeled data which we will sample from using active learning.
|
64
|
+
- eval: A dataframe of labeled data which we will use to evaluate the performance of the model. (Optional)
|
51
65
|
|
52
66
|
```python
|
53
67
|
from active_vision import ActiveLearner
|
@@ -56,29 +70,38 @@ import pandas as pd
|
|
56
70
|
# Create an active learner instance with a model
|
57
71
|
al = ActiveLearner("resnet18")
|
58
72
|
|
59
|
-
# Load
|
73
|
+
# Load dataset
|
60
74
|
train_df = pd.read_parquet("training_samples.parquet")
|
61
|
-
al.load_dataset(
|
75
|
+
al.load_dataset(df, filepath_col="filepath", label_col="label")
|
62
76
|
|
63
|
-
# Train
|
77
|
+
# Train model
|
64
78
|
al.train(epochs=3, lr=1e-3)
|
65
79
|
|
66
|
-
#
|
67
|
-
|
80
|
+
# Evaluate the model on a *labeled* evaluation set
|
81
|
+
accuracy = al.evaluate(eval_df, filepath_col="filepath", label_col="label")
|
68
82
|
|
69
|
-
#
|
70
|
-
accuracy = al.evaluate(eval_df, "filepath", "label")
|
71
|
-
|
72
|
-
# Get predictions from an unlabeled set
|
83
|
+
# Get predictions from an *unlabeled* set
|
73
84
|
pred_df = al.predict(filepaths)
|
74
85
|
|
75
|
-
# Sample low confidence predictions
|
86
|
+
# Sample low confidence predictions from unlabeled set
|
76
87
|
uncertain_df = al.sample_uncertain(pred_df, num_samples=10)
|
77
88
|
|
78
|
-
#
|
79
|
-
al.
|
89
|
+
# Launch a Gradio UI to label the low confidence samples
|
90
|
+
al.label(uncertain_df, output_filename="uncertain")
|
80
91
|
```
|
81
92
|
|
93
|
+
![Gradio UI](./assets/labeling_ui.png)
|
94
|
+
|
95
|
+
Once complete, the labeled samples will be save into a new df.
|
96
|
+
We can now add the newly labeled data to the training set.
|
97
|
+
|
98
|
+
```python
|
99
|
+
# Add newly labeled data to training set and save as a new file active_labeled
|
100
|
+
al.add_to_train_set(labeled_df, output_filename="active_labeled")
|
101
|
+
```
|
102
|
+
|
103
|
+
Repeat the process until the model is good enough. Use the dataset to train a larger model and deploy.
|
104
|
+
|
82
105
|
## Workflow
|
83
106
|
There are two workflows for active learning at the edge that we can use depending on the availability of labeled data.
|
84
107
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
active_vision/__init__.py,sha256=hZp8jB284ByY44Q5cdwTt9Zz5n4QWXnz0OexpEE9muk,43
|
2
|
+
active_vision/core.py,sha256=0aXDI5Gpj0Spk7TSIxJf8aJDDBgZh0-jkWdYyZ1Zric,10713
|
3
|
+
active_vision-0.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
|
+
active_vision-0.0.3.dist-info/METADATA,sha256=g629Kn07n4ZXOOX5cW1nPQK1IR9Mm5vW_z7RqxdwKgY,9385
|
5
|
+
active_vision-0.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
active_vision-0.0.3.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
|
7
|
+
active_vision-0.0.3.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
active_vision/__init__.py,sha256=5VE_DRQ_Rgbo7NlPh3-rP2pUClK48jGxPqAcptBscZ8,43
|
2
|
-
active_vision/core.py,sha256=RBVabC350wucYl7KJgIp3fc1pS9pxtG14iDb-ZyBJxI,5262
|
3
|
-
active_vision-0.0.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
|
-
active_vision-0.0.2.dist-info/METADATA,sha256=7_eqZJnGeIPjb4LLZ-Bqu1AMJ_h77_0bNRyS_COEv5w,8350
|
5
|
-
active_vision-0.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
active_vision-0.0.2.dist-info/top_level.txt,sha256=7qUQvccN2UU63z5S9vrgJmqK-8sFGrtpf1e9Z86nihE,14
|
7
|
-
active_vision-0.0.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|