bplusplus 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bplusplus-2.0.1/LICENSE +21 -0
- bplusplus-2.0.1/PKG-INFO +252 -0
- bplusplus-2.0.1/README.md +209 -0
- bplusplus-2.0.1/pyproject.toml +74 -0
- bplusplus-2.0.1/src/bplusplus/__init__.py +15 -0
- bplusplus-2.0.1/src/bplusplus/collect.py +523 -0
- bplusplus-2.0.1/src/bplusplus/inference.py +1282 -0
- bplusplus-2.0.1/src/bplusplus/insect_detector.py +376 -0
- bplusplus-2.0.1/src/bplusplus/prepare.py +682 -0
- bplusplus-2.0.1/src/bplusplus/tracker.py +261 -0
- bplusplus-2.0.1/src/bplusplus/train.py +913 -0
- bplusplus-2.0.1/src/bplusplus/validation.py +580 -0
bplusplus-2.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Titus Venverloo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
bplusplus-2.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: bplusplus
|
|
3
|
+
Version: 2.0.1
|
|
4
|
+
Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Titus Venverloo
|
|
7
|
+
Author-email: tvenver@mit.edu
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Dist: numpy (>=1.26.0,<1.26.5) ; sys_platform == "win32"
|
|
16
|
+
Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "darwin" and platform_machine == "arm64"
|
|
17
|
+
Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
|
18
|
+
Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "linux" and platform_machine == "aarch64"
|
|
19
|
+
Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "linux" and platform_machine == "x86_64"
|
|
20
|
+
Requires-Dist: pandas (==2.1.4)
|
|
21
|
+
Requires-Dist: pillow (>=10.0.0,<12.0.0) ; sys_platform == "darwin"
|
|
22
|
+
Requires-Dist: pillow (>=10.0.0,<12.0.0) ; sys_platform == "linux"
|
|
23
|
+
Requires-Dist: pillow (>=10.0.0,<12.0.0) ; sys_platform == "win32"
|
|
24
|
+
Requires-Dist: prettytable (==3.7.0)
|
|
25
|
+
Requires-Dist: pygbif (==0.6.5)
|
|
26
|
+
Requires-Dist: pyyaml (==6.0.1)
|
|
27
|
+
Requires-Dist: requests (==2.25.1)
|
|
28
|
+
Requires-Dist: scikit-learn (>=1.3.0,<1.7.0) ; sys_platform == "linux" and platform_machine == "aarch64"
|
|
29
|
+
Requires-Dist: scikit-learn (>=1.3.0,<1.7.0) ; sys_platform == "win32"
|
|
30
|
+
Requires-Dist: scikit-learn (>=1.4.0,<1.8.0) ; sys_platform == "darwin" and platform_machine == "arm64"
|
|
31
|
+
Requires-Dist: scikit-learn (>=1.4.0,<1.8.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
|
32
|
+
Requires-Dist: scikit-learn (>=1.4.0,<1.8.0) ; sys_platform == "linux" and platform_machine == "x86_64"
|
|
33
|
+
Requires-Dist: tabulate (==0.9.0)
|
|
34
|
+
Requires-Dist: torch (>=2.0.0,<2.8.0) ; sys_platform == "darwin" and platform_machine == "arm64"
|
|
35
|
+
Requires-Dist: torch (>=2.0.0,<2.8.0) ; sys_platform == "linux"
|
|
36
|
+
Requires-Dist: torch (>=2.0.0,<2.8.0) ; sys_platform == "win32"
|
|
37
|
+
Requires-Dist: torch (>=2.2.0,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
|
38
|
+
Requires-Dist: tqdm (==4.66.4)
|
|
39
|
+
Requires-Dist: ultralytics (==8.3.173)
|
|
40
|
+
Requires-Dist: validators (==0.33.0)
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# B++ repository
|
|
44
|
+
|
|
45
|
+
[](https://zenodo.org/badge/latestdoi/765250194)
|
|
46
|
+
[](https://pypi.org/project/bplusplus/)
|
|
47
|
+
[](https://pypi.org/project/bplusplus/)
|
|
48
|
+
[](https://pypi.org/project/bplusplus/)
|
|
49
|
+
[](https://pepy.tech/project/bplusplus)
|
|
50
|
+
[](https://pepy.tech/project/bplusplus)
|
|
51
|
+
[](https://pepy.tech/project/bplusplus)
|
|
52
|
+
|
|
53
|
+
This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
|
|
54
|
+
|
|
55
|
+
Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
|
|
56
|
+
|
|
57
|
+
## Key Features
|
|
58
|
+
|
|
59
|
+
- **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
|
|
60
|
+
- **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
|
|
61
|
+
- **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
|
|
62
|
+
- **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
|
|
63
|
+
## Pipeline Overview
|
|
64
|
+
|
|
65
|
+
The process is broken down into five main steps, all detailed in the `full_pipeline.ipynb` notebook:
|
|
66
|
+
|
|
67
|
+
1. **Collect Data**: Select your target species and fetch raw insect images from the web.
|
|
68
|
+
2. **Prepare Data**: Filter, clean, and prepare images for training.
|
|
69
|
+
3. **Train Model**: Train the hierarchical classification model.
|
|
70
|
+
4. **Validate Model**: Evaluate the performance of the trained model.
|
|
71
|
+
5. **Run Inference**: Run the full pipeline on a video file for real-world application.
|
|
72
|
+
|
|
73
|
+
## How to Use
|
|
74
|
+
|
|
75
|
+
### Prerequisites
|
|
76
|
+
|
|
77
|
+
- Python 3.10+
|
|
78
|
+
|
|
79
|
+
### Setup
|
|
80
|
+
|
|
81
|
+
1. **Create and activate a virtual environment:**
|
|
82
|
+
```bash
|
|
83
|
+
python3 -m venv venv
|
|
84
|
+
source venv/bin/activate
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
2. **Install the required packages:**
|
|
88
|
+
```bash
|
|
89
|
+
pip install bplusplus
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Running the Pipeline
|
|
93
|
+
|
|
94
|
+
The pipeline can be run step-by-step using the functions from the `bplusplus` library. While the `full_pipeline.ipynb` notebook provides a complete, executable workflow, the core functions are described below.
|
|
95
|
+
|
|
96
|
+
#### Step 1: Collect Data
|
|
97
|
+
Download images for your target species from the GBIF database. You'll need to provide a list of scientific names.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import bplusplus
|
|
101
|
+
from pathlib import Path
|
|
102
|
+
|
|
103
|
+
# Define species and directories
|
|
104
|
+
names = ["Vespa crabro", "Vespula vulgaris", "Dolichovespula media"]
|
|
105
|
+
GBIF_DATA_DIR = Path("./GBIF_data")
|
|
106
|
+
|
|
107
|
+
# Define search parameters
|
|
108
|
+
search = {"scientificName": names}
|
|
109
|
+
|
|
110
|
+
# Run collection
|
|
111
|
+
bplusplus.collect(
|
|
112
|
+
group_by_key=bplusplus.Group.scientificName,
|
|
113
|
+
search_parameters=search,
|
|
114
|
+
images_per_group=200, # Recommended to download more than needed
|
|
115
|
+
output_directory=GBIF_DATA_DIR,
|
|
116
|
+
num_threads=5
|
|
117
|
+
)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
#### Step 2: Prepare Data
|
|
121
|
+
Process the raw images to extract, crop, and resize insects. This step uses a pre-trained model to ensure only high-quality images are used for training.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
PREPARED_DATA_DIR = Path("./prepared_data")
|
|
125
|
+
|
|
126
|
+
bplusplus.prepare(
|
|
127
|
+
input_directory=GBIF_DATA_DIR,
|
|
128
|
+
output_directory=PREPARED_DATA_DIR,
|
|
129
|
+
img_size=640, # Target image size for training
|
|
130
|
+
conf=0.6, # Detection confidence threshold (0-1)
|
|
131
|
+
valid=0.1, # Validation split ratio (0-1), set to 0 for no validation
|
|
132
|
+
)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
#### Step 3: Train Model
|
|
136
|
+
Train the hierarchical classification model on your prepared data. The model learns to identify family, genus, and species.
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
TRAINED_MODEL_DIR = Path("./trained_model")
|
|
140
|
+
|
|
141
|
+
bplusplus.train(
|
|
142
|
+
batch_size=4,
|
|
143
|
+
epochs=30,
|
|
144
|
+
patience=3,
|
|
145
|
+
img_size=640,
|
|
146
|
+
data_dir=PREPARED_DATA_DIR,
|
|
147
|
+
output_dir=TRAINED_MODEL_DIR,
|
|
148
|
+
species_list=names,
|
|
149
|
+
backbone="resnet50", # Choose: "resnet18", "resnet50", or "resnet101"
|
|
150
|
+
# num_workers=0, # Optional: force single-process loading (most stable)
|
|
151
|
+
# train_transforms=custom_transforms, # Optional: custom torchvision transforms
|
|
152
|
+
)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Note:** The `num_workers` parameter controls DataLoader multiprocessing (defaults to 0 for stability). The `backbone` parameter allows you to choose between different ResNet architectures—use `resnet18` for faster training or `resnet101` for potentially better accuracy.
|
|
156
|
+
|
|
157
|
+
#### Step 4: Validate Model
|
|
158
|
+
Evaluate the trained model on a held-out validation set. This calculates precision, recall, and F1-score at all taxonomic levels.
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
|
|
162
|
+
|
|
163
|
+
results = bplusplus.validate(
|
|
164
|
+
species_list=names,
|
|
165
|
+
validation_dir=PREPARED_DATA_DIR / "valid",
|
|
166
|
+
hierarchical_weights=HIERARCHICAL_MODEL_PATH,
|
|
167
|
+
img_size=640, # Must match training
|
|
168
|
+
batch_size=32,
|
|
169
|
+
backbone="resnet50", # Must match training
|
|
170
|
+
)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
#### Step 5: Run Inference on Video
|
|
174
|
+
Process a video file to detect, classify, and track insects using motion-based detection. The pipeline uses background subtraction (GMM) to detect moving insects, tracks them across frames, and classifies confirmed tracks.
|
|
175
|
+
|
|
176
|
+
**Output files generated in `output_dir`:**
|
|
177
|
+
- `{video}_annotated.mp4` - Video showing confirmed tracks with classifications
|
|
178
|
+
- `{video}_debug.mp4` - Debug video with motion mask and all detections
|
|
179
|
+
- `{video}_results.csv` - Aggregated results per confirmed track
|
|
180
|
+
- `{video}_detections.csv` - Frame-by-frame detection data
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
VIDEO_INPUT_PATH = Path("my_video.mp4")
|
|
184
|
+
OUTPUT_DIR = Path("./output")
|
|
185
|
+
HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
|
|
186
|
+
|
|
187
|
+
results = bplusplus.inference(
|
|
188
|
+
species_list=names,
|
|
189
|
+
hierarchical_model_path=HIERARCHICAL_MODEL_PATH,
|
|
190
|
+
video_path=VIDEO_INPUT_PATH,
|
|
191
|
+
output_dir=OUTPUT_DIR,
|
|
192
|
+
fps=None, # None = process all frames
|
|
193
|
+
backbone="resnet50", # Must match training
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
print(f"Detected {results['tracks']} tracks ({results['confirmed_tracks']} confirmed)")
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
**Custom Detection Configuration:**
|
|
200
|
+
|
|
201
|
+
For advanced control over detection parameters, provide a YAML config file:
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
results = bplusplus.inference(
|
|
205
|
+
...,
|
|
206
|
+
config="detection_config.yaml"
|
|
207
|
+
)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Download a template config from the [releases page](https://github.com/Tvenver/Bplusplus/releases). Parameters control cohesiveness filtering, shape filtering, tracking behavior, and path topology analysis for confirming insect-like movement.
|
|
211
|
+
|
|
212
|
+
### Customization
|
|
213
|
+
|
|
214
|
+
To train the model on your own set of insect species, you only need to change the `names` list in **Step 1**. The pipeline will automatically handle the rest.
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
# To use your own species, change the names in this list
|
|
218
|
+
names = [
|
|
219
|
+
"Vespa crabro",
|
|
220
|
+
"Vespula vulgaris",
|
|
221
|
+
"Dolichovespula media",
|
|
222
|
+
# Add your species here
|
|
223
|
+
]
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
#### Handling an "Unknown" Class
|
|
227
|
+
To train a model that can recognize an "unknown" class for insects that don't belong to your target species, add `"unknown"` to your `species_list`. You must also provide a corresponding `unknown` folder containing images of various other insects in your data directories (e.g., `prepared_data/train/unknown`).
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
# Example with an unknown class
|
|
231
|
+
names_with_unknown = [
|
|
232
|
+
"Vespa crabro",
|
|
233
|
+
"Vespula vulgaris",
|
|
234
|
+
"unknown"
|
|
235
|
+
]
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Directory Structure
|
|
239
|
+
|
|
240
|
+
The pipeline will create the following directories to store artifacts:
|
|
241
|
+
|
|
242
|
+
- `GBIF_data/`: Stores the raw images downloaded from GBIF.
|
|
243
|
+
- `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training (`train/` and optionally `valid/` subdirectories).
|
|
244
|
+
- `trained_model/`: Saves the trained model weights (`best_multitask.pt`).
|
|
245
|
+
- `output/`: Inference results including annotated videos and CSV files.
|
|
246
|
+
|
|
247
|
+
# Citation
|
|
248
|
+
|
|
249
|
+
All information in this GitHub is available under MIT license, as long as credit is given to the authors.
|
|
250
|
+
|
|
251
|
+
**Venverloo, T., Duarte, F., B++: Towards Real-Time Monitoring of Insect Species. MIT Senseable City Laboratory, AMS Institute.**
|
|
252
|
+
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# B++ repository
|
|
2
|
+
|
|
3
|
+
[](https://zenodo.org/badge/latestdoi/765250194)
|
|
4
|
+
[](https://pypi.org/project/bplusplus/)
|
|
5
|
+
[](https://pypi.org/project/bplusplus/)
|
|
6
|
+
[](https://pypi.org/project/bplusplus/)
|
|
7
|
+
[](https://pepy.tech/project/bplusplus)
|
|
8
|
+
[](https://pepy.tech/project/bplusplus)
|
|
9
|
+
[](https://pepy.tech/project/bplusplus)
|
|
10
|
+
|
|
11
|
+
This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
|
|
12
|
+
|
|
13
|
+
Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
|
|
14
|
+
|
|
15
|
+
## Key Features
|
|
16
|
+
|
|
17
|
+
- **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
|
|
18
|
+
- **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
|
|
19
|
+
- **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
|
|
20
|
+
- **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
|
|
21
|
+
## Pipeline Overview
|
|
22
|
+
|
|
23
|
+
The process is broken down into five main steps, all detailed in the `full_pipeline.ipynb` notebook:
|
|
24
|
+
|
|
25
|
+
1. **Collect Data**: Select your target species and fetch raw insect images from the web.
|
|
26
|
+
2. **Prepare Data**: Filter, clean, and prepare images for training.
|
|
27
|
+
3. **Train Model**: Train the hierarchical classification model.
|
|
28
|
+
4. **Validate Model**: Evaluate the performance of the trained model.
|
|
29
|
+
5. **Run Inference**: Run the full pipeline on a video file for real-world application.
|
|
30
|
+
|
|
31
|
+
## How to Use
|
|
32
|
+
|
|
33
|
+
### Prerequisites
|
|
34
|
+
|
|
35
|
+
- Python 3.10+
|
|
36
|
+
|
|
37
|
+
### Setup
|
|
38
|
+
|
|
39
|
+
1. **Create and activate a virtual environment:**
|
|
40
|
+
```bash
|
|
41
|
+
python3 -m venv venv
|
|
42
|
+
source venv/bin/activate
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
2. **Install the required packages:**
|
|
46
|
+
```bash
|
|
47
|
+
pip install bplusplus
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Running the Pipeline
|
|
51
|
+
|
|
52
|
+
The pipeline can be run step-by-step using the functions from the `bplusplus` library. While the `full_pipeline.ipynb` notebook provides a complete, executable workflow, the core functions are described below.
|
|
53
|
+
|
|
54
|
+
#### Step 1: Collect Data
|
|
55
|
+
Download images for your target species from the GBIF database. You'll need to provide a list of scientific names.
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
import bplusplus
|
|
59
|
+
from pathlib import Path
|
|
60
|
+
|
|
61
|
+
# Define species and directories
|
|
62
|
+
names = ["Vespa crabro", "Vespula vulgaris", "Dolichovespula media"]
|
|
63
|
+
GBIF_DATA_DIR = Path("./GBIF_data")
|
|
64
|
+
|
|
65
|
+
# Define search parameters
|
|
66
|
+
search = {"scientificName": names}
|
|
67
|
+
|
|
68
|
+
# Run collection
|
|
69
|
+
bplusplus.collect(
|
|
70
|
+
group_by_key=bplusplus.Group.scientificName,
|
|
71
|
+
search_parameters=search,
|
|
72
|
+
images_per_group=200, # Recommended to download more than needed
|
|
73
|
+
output_directory=GBIF_DATA_DIR,
|
|
74
|
+
num_threads=5
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
#### Step 2: Prepare Data
|
|
79
|
+
Process the raw images to extract, crop, and resize insects. This step uses a pre-trained model to ensure only high-quality images are used for training.
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
PREPARED_DATA_DIR = Path("./prepared_data")
|
|
83
|
+
|
|
84
|
+
bplusplus.prepare(
|
|
85
|
+
input_directory=GBIF_DATA_DIR,
|
|
86
|
+
output_directory=PREPARED_DATA_DIR,
|
|
87
|
+
img_size=640, # Target image size for training
|
|
88
|
+
conf=0.6, # Detection confidence threshold (0-1)
|
|
89
|
+
valid=0.1, # Validation split ratio (0-1), set to 0 for no validation
|
|
90
|
+
)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
#### Step 3: Train Model
|
|
94
|
+
Train the hierarchical classification model on your prepared data. The model learns to identify family, genus, and species.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
TRAINED_MODEL_DIR = Path("./trained_model")
|
|
98
|
+
|
|
99
|
+
bplusplus.train(
|
|
100
|
+
batch_size=4,
|
|
101
|
+
epochs=30,
|
|
102
|
+
patience=3,
|
|
103
|
+
img_size=640,
|
|
104
|
+
data_dir=PREPARED_DATA_DIR,
|
|
105
|
+
output_dir=TRAINED_MODEL_DIR,
|
|
106
|
+
species_list=names,
|
|
107
|
+
backbone="resnet50", # Choose: "resnet18", "resnet50", or "resnet101"
|
|
108
|
+
# num_workers=0, # Optional: force single-process loading (most stable)
|
|
109
|
+
# train_transforms=custom_transforms, # Optional: custom torchvision transforms
|
|
110
|
+
)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Note:** The `num_workers` parameter controls DataLoader multiprocessing (defaults to 0 for stability). The `backbone` parameter allows you to choose between different ResNet architectures—use `resnet18` for faster training or `resnet101` for potentially better accuracy.
|
|
114
|
+
|
|
115
|
+
#### Step 4: Validate Model
|
|
116
|
+
Evaluate the trained model on a held-out validation set. This calculates precision, recall, and F1-score at all taxonomic levels.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
|
|
120
|
+
|
|
121
|
+
results = bplusplus.validate(
|
|
122
|
+
species_list=names,
|
|
123
|
+
validation_dir=PREPARED_DATA_DIR / "valid",
|
|
124
|
+
hierarchical_weights=HIERARCHICAL_MODEL_PATH,
|
|
125
|
+
img_size=640, # Must match training
|
|
126
|
+
batch_size=32,
|
|
127
|
+
backbone="resnet50", # Must match training
|
|
128
|
+
)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
#### Step 5: Run Inference on Video
|
|
132
|
+
Process a video file to detect, classify, and track insects using motion-based detection. The pipeline uses background subtraction (GMM) to detect moving insects, tracks them across frames, and classifies confirmed tracks.
|
|
133
|
+
|
|
134
|
+
**Output files generated in `output_dir`:**
|
|
135
|
+
- `{video}_annotated.mp4` - Video showing confirmed tracks with classifications
|
|
136
|
+
- `{video}_debug.mp4` - Debug video with motion mask and all detections
|
|
137
|
+
- `{video}_results.csv` - Aggregated results per confirmed track
|
|
138
|
+
- `{video}_detections.csv` - Frame-by-frame detection data
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
VIDEO_INPUT_PATH = Path("my_video.mp4")
|
|
142
|
+
OUTPUT_DIR = Path("./output")
|
|
143
|
+
HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
|
|
144
|
+
|
|
145
|
+
results = bplusplus.inference(
|
|
146
|
+
species_list=names,
|
|
147
|
+
hierarchical_model_path=HIERARCHICAL_MODEL_PATH,
|
|
148
|
+
video_path=VIDEO_INPUT_PATH,
|
|
149
|
+
output_dir=OUTPUT_DIR,
|
|
150
|
+
fps=None, # None = process all frames
|
|
151
|
+
backbone="resnet50", # Must match training
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
print(f"Detected {results['tracks']} tracks ({results['confirmed_tracks']} confirmed)")
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Custom Detection Configuration:**
|
|
158
|
+
|
|
159
|
+
For advanced control over detection parameters, provide a YAML config file:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
results = bplusplus.inference(
|
|
163
|
+
...,
|
|
164
|
+
config="detection_config.yaml"
|
|
165
|
+
)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Download a template config from the [releases page](https://github.com/Tvenver/Bplusplus/releases). Parameters control cohesiveness filtering, shape filtering, tracking behavior, and path topology analysis for confirming insect-like movement.
|
|
169
|
+
|
|
170
|
+
### Customization
|
|
171
|
+
|
|
172
|
+
To train the model on your own set of insect species, you only need to change the `names` list in **Step 1**. The pipeline will automatically handle the rest.
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# To use your own species, change the names in this list
|
|
176
|
+
names = [
|
|
177
|
+
"Vespa crabro",
|
|
178
|
+
"Vespula vulgaris",
|
|
179
|
+
"Dolichovespula media",
|
|
180
|
+
# Add your species here
|
|
181
|
+
]
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
#### Handling an "Unknown" Class
|
|
185
|
+
To train a model that can recognize an "unknown" class for insects that don't belong to your target species, add `"unknown"` to your `species_list`. You must also provide a corresponding `unknown` folder containing images of various other insects in your data directories (e.g., `prepared_data/train/unknown`).
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
# Example with an unknown class
|
|
189
|
+
names_with_unknown = [
|
|
190
|
+
"Vespa crabro",
|
|
191
|
+
"Vespula vulgaris",
|
|
192
|
+
"unknown"
|
|
193
|
+
]
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Directory Structure
|
|
197
|
+
|
|
198
|
+
The pipeline will create the following directories to store artifacts:
|
|
199
|
+
|
|
200
|
+
- `GBIF_data/`: Stores the raw images downloaded from GBIF.
|
|
201
|
+
- `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training (`train/` and optionally `valid/` subdirectories).
|
|
202
|
+
- `trained_model/`: Saves the trained model weights (`best_multitask.pt`).
|
|
203
|
+
- `output/`: Inference results including annotated videos and CSV files.
|
|
204
|
+
|
|
205
|
+
# Citation
|
|
206
|
+
|
|
207
|
+
All information in this GitHub is available under MIT license, as long as credit is given to the authors.
|
|
208
|
+
|
|
209
|
+
**Venverloo, T., Duarte, F., B++: Towards Real-Time Monitoring of Insect Species. MIT Senseable City Laboratory, AMS Institute.**
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "bplusplus"
|
|
3
|
+
version = "2.0.1"
|
|
4
|
+
description = "A simple method to create AI models for biodiversity, with collect and prepare pipeline"
|
|
5
|
+
authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlandocloss@pm.me>", "Ase Hatveit <aase@mit.edu>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
|
|
9
|
+
[tool.poetry.dependencies]
|
|
10
|
+
python = "^3.10"
|
|
11
|
+
requests = "2.25.1"
|
|
12
|
+
pandas = "2.1.4"
|
|
13
|
+
ultralytics = "8.3.173"
|
|
14
|
+
pyyaml = "6.0.1"
|
|
15
|
+
tqdm = "4.66.4"
|
|
16
|
+
prettytable = "3.7.0"
|
|
17
|
+
# Pillow with platform-specific compatibility
|
|
18
|
+
pillow = [
|
|
19
|
+
# Windows - stable version
|
|
20
|
+
{version = ">=10.0.0,<12.0.0", markers = "sys_platform == 'win32'"},
|
|
21
|
+
# macOS - all versions support latest
|
|
22
|
+
{version = ">=10.0.0,<12.0.0", markers = "sys_platform == 'darwin'"},
|
|
23
|
+
# Linux - most flexible
|
|
24
|
+
{version = ">=10.0.0,<12.0.0", markers = "sys_platform == 'linux'"}
|
|
25
|
+
]
|
|
26
|
+
# PyTorch with platform-specific compatibility to handle discontinued macOS Intel support
|
|
27
|
+
torch = [
|
|
28
|
+
# Windows - stable version range
|
|
29
|
+
{version = ">=2.0.0,<2.8.0", markers = "sys_platform == 'win32'"},
|
|
30
|
+
# macOS Intel - DISCONTINUED after PyTorch 2.2.2, use last supported version
|
|
31
|
+
{version = ">=2.2.0,<2.3.0", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
|
|
32
|
+
# macOS ARM64 - full support, broader range
|
|
33
|
+
{version = ">=2.0.0,<2.8.0", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"},
|
|
34
|
+
# Linux - most flexible, best wheel support
|
|
35
|
+
{version = ">=2.0.0,<2.8.0", markers = "sys_platform == 'linux'"}
|
|
36
|
+
]
|
|
37
|
+
# Comprehensive environment markers for numpy compatibility across all platforms and architectures
|
|
38
|
+
# Note: pandas 2.1.4 requires numpy >=1.26.0, so we must respect that constraint
|
|
39
|
+
numpy = [
|
|
40
|
+
# Windows (all architectures) - use exact version that works well with ultralytics
|
|
41
|
+
{version = ">=1.26.0,<1.26.5", markers = "sys_platform == 'win32'"},
|
|
42
|
+
# macOS ARM64 (Apple Silicon) - compatible with newer numpy
|
|
43
|
+
{version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"},
|
|
44
|
+
# macOS x86_64 (Intel) - compatible with newer numpy
|
|
45
|
+
{version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
|
|
46
|
+
# Linux ARM64 (aarch64) - common in cloud/lab environments
|
|
47
|
+
{version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'linux' and platform_machine == 'aarch64'"},
|
|
48
|
+
# Linux x86_64 - most common lab/server environment
|
|
49
|
+
{version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'linux' and platform_machine == 'x86_64'"}
|
|
50
|
+
]
|
|
51
|
+
# Scikit-learn with platform-specific compatibility
|
|
52
|
+
scikit-learn = [
|
|
53
|
+
# Windows - more conservative versions for stability
|
|
54
|
+
{version = ">=1.3.0,<1.7.0", markers = "sys_platform == 'win32'"},
|
|
55
|
+
# macOS ARM64 - optimized builds available
|
|
56
|
+
{version = ">=1.4.0,<1.8.0", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"},
|
|
57
|
+
# macOS x86_64 - standard builds
|
|
58
|
+
{version = ">=1.4.0,<1.8.0", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
|
|
59
|
+
# Linux ARM64 - may have limited pre-built wheels
|
|
60
|
+
{version = ">=1.3.0,<1.7.0", markers = "sys_platform == 'linux' and platform_machine == 'aarch64'"},
|
|
61
|
+
# Linux x86_64 - most stable, latest versions available
|
|
62
|
+
{version = ">=1.4.0,<1.8.0", markers = "sys_platform == 'linux' and platform_machine == 'x86_64'"}
|
|
63
|
+
]
|
|
64
|
+
pygbif = "0.6.5"
|
|
65
|
+
validators = "0.33.0"
|
|
66
|
+
tabulate = "0.9.0"
|
|
67
|
+
|
|
68
|
+
[tool.poetry.group.dev.dependencies]
|
|
69
|
+
jupyter = "^1.0.0"
|
|
70
|
+
ipykernel = "^6.29.5"
|
|
71
|
+
|
|
72
|
+
[build-system]
|
|
73
|
+
requires = ["poetry-core>=1.0.0"]
|
|
74
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
try:
|
|
2
|
+
import torch
|
|
3
|
+
import torchvision
|
|
4
|
+
except ImportError:
|
|
5
|
+
raise ImportError(
|
|
6
|
+
"PyTorch and Torchvision are not installed. "
|
|
7
|
+
"Please install them before using bplusplus by following the instructions "
|
|
8
|
+
"on the official PyTorch website: https://pytorch.org/get-started/locally/"
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from .collect import Group, collect
|
|
12
|
+
from .prepare import prepare
|
|
13
|
+
from .train import train
|
|
14
|
+
from .inference import inference
|
|
15
|
+
from .validation import validate
|