bplusplus 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bplusplus might be problematic. Click here for more details.
- bplusplus/__init__.py +3 -5
- bplusplus/collect.py +2 -0
- bplusplus/inference.py +891 -0
- bplusplus/prepare.py +429 -540
- bplusplus/{hierarchical/test.py → test.py} +99 -88
- bplusplus/tracker.py +261 -0
- bplusplus/{hierarchical/train.py → train.py} +29 -29
- bplusplus-1.2.3.dist-info/METADATA +101 -0
- bplusplus-1.2.3.dist-info/RECORD +11 -0
- {bplusplus-1.2.1.dist-info → bplusplus-1.2.3.dist-info}/WHEEL +1 -1
- bplusplus/resnet/test.py +0 -473
- bplusplus/resnet/train.py +0 -329
- bplusplus/train_validate.py +0 -11
- bplusplus-1.2.1.dist-info/METADATA +0 -252
- bplusplus-1.2.1.dist-info/RECORD +0 -12
- {bplusplus-1.2.1.dist-info → bplusplus-1.2.3.dist-info}/LICENSE +0 -0
|
@@ -14,7 +14,7 @@ import logging
|
|
|
14
14
|
from tqdm import tqdm
|
|
15
15
|
import sys
|
|
16
16
|
|
|
17
|
-
def
|
|
17
|
+
def train(batch_size=4, epochs=30, patience=3, img_size=640, data_dir='input', output_dir='./output', species_list=None):
|
|
18
18
|
"""
|
|
19
19
|
Main function to run the entire training pipeline.
|
|
20
20
|
Sets up datasets, model, training process and handles errors.
|
|
@@ -144,17 +144,17 @@ def train_multitask(batch_size=4, epochs=30, patience=3, img_size=640, data_dir=
|
|
|
144
144
|
def get_taxonomy(species_list):
|
|
145
145
|
"""
|
|
146
146
|
Retrieves taxonomic information for a list of species from GBIF API.
|
|
147
|
-
Creates a hierarchical taxonomy dictionary with
|
|
147
|
+
Creates a hierarchical taxonomy dictionary with family, genus, and species relationships.
|
|
148
148
|
"""
|
|
149
149
|
taxonomy = {1: [], 2: {}, 3: {}}
|
|
150
|
-
|
|
151
|
-
|
|
150
|
+
species_to_genus = {}
|
|
151
|
+
genus_to_family = {}
|
|
152
152
|
|
|
153
153
|
logger.info(f"Building taxonomy from GBIF for {len(species_list)} species")
|
|
154
154
|
|
|
155
155
|
print("\nTaxonomy Results:")
|
|
156
156
|
print("-" * 80)
|
|
157
|
-
print(f"{'Species':<30} {'
|
|
157
|
+
print(f"{'Species':<30} {'Family':<20} {'Genus':<20} {'Status'}")
|
|
158
158
|
print("-" * 80)
|
|
159
159
|
|
|
160
160
|
for species_name in species_list:
|
|
@@ -165,23 +165,23 @@ def get_taxonomy(species_list):
|
|
|
165
165
|
|
|
166
166
|
if data.get('status') == 'ACCEPTED' or data.get('status') == 'SYNONYM':
|
|
167
167
|
family = data.get('family')
|
|
168
|
-
|
|
168
|
+
genus = data.get('genus')
|
|
169
169
|
|
|
170
|
-
if family and
|
|
170
|
+
if family and genus:
|
|
171
171
|
status = "OK"
|
|
172
172
|
|
|
173
|
-
print(f"{species_name:<30} {
|
|
173
|
+
print(f"{species_name:<30} {family:<20} {genus:<20} {status}")
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
|
|
175
|
+
species_to_genus[species_name] = genus
|
|
176
|
+
genus_to_family[genus] = family
|
|
177
177
|
|
|
178
|
-
if
|
|
179
|
-
taxonomy[1].append(
|
|
178
|
+
if family not in taxonomy[1]:
|
|
179
|
+
taxonomy[1].append(family)
|
|
180
180
|
|
|
181
|
-
taxonomy[2][
|
|
182
|
-
taxonomy[3][species_name] =
|
|
181
|
+
taxonomy[2][genus] = family
|
|
182
|
+
taxonomy[3][species_name] = genus
|
|
183
183
|
else:
|
|
184
|
-
error_msg = f"Species '{species_name}' found in GBIF but family and
|
|
184
|
+
error_msg = f"Species '{species_name}' found in GBIF but family and genus not found, could be spelling error in species, check GBIF"
|
|
185
185
|
logger.error(error_msg)
|
|
186
186
|
print(f"{species_name:<30} {'Not found':<20} {'Not found':<20} ERROR")
|
|
187
187
|
print(f"Error: {error_msg}")
|
|
@@ -203,23 +203,23 @@ def get_taxonomy(species_list):
|
|
|
203
203
|
taxonomy[1] = sorted(list(set(taxonomy[1])))
|
|
204
204
|
print("-" * 80)
|
|
205
205
|
|
|
206
|
-
|
|
207
|
-
|
|
206
|
+
num_families = len(taxonomy[1])
|
|
207
|
+
num_genera = len(taxonomy[2])
|
|
208
208
|
num_species = len(taxonomy[3])
|
|
209
209
|
|
|
210
|
-
print("\nOrder indices:")
|
|
211
|
-
for i, order in enumerate(taxonomy[1]):
|
|
212
|
-
print(f" {i}: {order}")
|
|
213
|
-
|
|
214
210
|
print("\nFamily indices:")
|
|
215
|
-
for i, family in enumerate(taxonomy[
|
|
211
|
+
for i, family in enumerate(taxonomy[1]):
|
|
216
212
|
print(f" {i}: {family}")
|
|
217
213
|
|
|
214
|
+
print("\nGenus indices:")
|
|
215
|
+
for i, genus in enumerate(taxonomy[2].keys()):
|
|
216
|
+
print(f" {i}: {genus}")
|
|
217
|
+
|
|
218
218
|
print("\nSpecies indices:")
|
|
219
219
|
for i, species in enumerate(species_list):
|
|
220
220
|
print(f" {i}: {species}")
|
|
221
221
|
|
|
222
|
-
logger.info(f"Taxonomy built: {
|
|
222
|
+
logger.info(f"Taxonomy built: {num_families} families, {num_genera} genera, {num_species} species")
|
|
223
223
|
return taxonomy
|
|
224
224
|
|
|
225
225
|
def get_species_from_directory(train_dir):
|
|
@@ -276,15 +276,15 @@ class InsectDataset(Dataset):
|
|
|
276
276
|
self.level_to_idx = level_to_idx
|
|
277
277
|
self.samples = []
|
|
278
278
|
|
|
279
|
-
|
|
280
|
-
|
|
279
|
+
species_to_genus = {species: genus for species, genus in taxonomy[3].items()}
|
|
280
|
+
genus_to_family = {genus: family for genus, family in taxonomy[2].items()}
|
|
281
281
|
|
|
282
282
|
for species_name in os.listdir(root_dir):
|
|
283
283
|
species_path = os.path.join(root_dir, species_name)
|
|
284
284
|
if os.path.isdir(species_path):
|
|
285
|
-
if species_name in
|
|
286
|
-
|
|
287
|
-
|
|
285
|
+
if species_name in species_to_genus:
|
|
286
|
+
genus_name = species_to_genus[species_name]
|
|
287
|
+
family_name = genus_to_family[genus_name]
|
|
288
288
|
|
|
289
289
|
for img_file in os.listdir(species_path):
|
|
290
290
|
if img_file.endswith(('.jpg', '.png', '.jpeg')):
|
|
@@ -296,7 +296,7 @@ class InsectDataset(Dataset):
|
|
|
296
296
|
# Only add valid images to samples
|
|
297
297
|
self.samples.append({
|
|
298
298
|
'image_path': img_path,
|
|
299
|
-
'labels': [
|
|
299
|
+
'labels': [family_name, genus_name, species_name]
|
|
300
300
|
})
|
|
301
301
|
|
|
302
302
|
except Exception as e:
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: bplusplus
|
|
3
|
+
Version: 1.2.3
|
|
4
|
+
Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Titus Venverloo
|
|
7
|
+
Author-email: tvenver@mit.edu
|
|
8
|
+
Requires-Python: >=3.9.0,<4.0.0
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: pandas (==2.1.4)
|
|
18
|
+
Requires-Dist: pillow
|
|
19
|
+
Requires-Dist: prettytable (==3.7.0)
|
|
20
|
+
Requires-Dist: pygbif (>=0.6.4,<0.7.0)
|
|
21
|
+
Requires-Dist: pyyaml (==6.0.1)
|
|
22
|
+
Requires-Dist: requests (==2.25.1)
|
|
23
|
+
Requires-Dist: scikit-learn
|
|
24
|
+
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
25
|
+
Requires-Dist: torch (>=2.5.0,<3.0.0)
|
|
26
|
+
Requires-Dist: torchvision
|
|
27
|
+
Requires-Dist: tqdm (==4.66.4)
|
|
28
|
+
Requires-Dist: ultralytics (>=8.3.0)
|
|
29
|
+
Requires-Dist: validators (>=0.33.0,<0.34.0)
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# Domain-Agnostic Insect Classification Pipeline
|
|
33
|
+
|
|
34
|
+
This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
|
|
35
|
+
|
|
36
|
+
Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
|
|
37
|
+
|
|
38
|
+
## Key Features
|
|
39
|
+
|
|
40
|
+
- **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
|
|
41
|
+
- **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
|
|
42
|
+
- **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
|
|
43
|
+
- **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
|
|
44
|
+
## Pipeline Overview
|
|
45
|
+
|
|
46
|
+
The process is broken down into six main steps, all detailed in the `full_pipeline.ipynb` notebook:
|
|
47
|
+
|
|
48
|
+
1. **Collect Data**: Select your target species and fetch raw insect images from the web.
|
|
49
|
+
2. **Prepare Data**: Filter, clean, and prepare images for training.
|
|
50
|
+
3. **Train Model**: Train the hierarchical classification model.
|
|
51
|
+
4. **Download Weights**: Fetch pre-trained weights for the detection model.
|
|
52
|
+
5. **Test Model**: Evaluate the performance of the trained model.
|
|
53
|
+
6. **Run Inference**: Run the full pipeline on a video file for real-world application.
|
|
54
|
+
|
|
55
|
+
## How to Use
|
|
56
|
+
|
|
57
|
+
### Prerequisites
|
|
58
|
+
|
|
59
|
+
- Python 3.8+
|
|
60
|
+
- `venv` for creating a virtual environment (recommended)
|
|
61
|
+
|
|
62
|
+
### Setup
|
|
63
|
+
|
|
64
|
+
1. **Create and activate a virtual environment:**
|
|
65
|
+
```bash
|
|
66
|
+
python3 -m venv venv
|
|
67
|
+
source venv/bin/activate
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
2. **Install the required packages:**
|
|
71
|
+
```bash
|
|
72
|
+
pip install bplusplus
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Running the Pipeline
|
|
76
|
+
|
|
77
|
+
The entire workflow is contained within **`full_pipeline.ipynb`**. Open it with a Jupyter Notebook or JupyterLab environment and run the cells sequentially to execute the full pipeline.
|
|
78
|
+
|
|
79
|
+
### Customization
|
|
80
|
+
|
|
81
|
+
To train the model on different insect species, simply modify the `names` list in **Step 1** of the notebook:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
# a/full_pipeline.ipynb
|
|
85
|
+
|
|
86
|
+
# To use your own species, change the names in this list
|
|
87
|
+
names = [
|
|
88
|
+
"Vespa crabro", "Vespula vulgaris", "Dolichovespula media"
|
|
89
|
+
]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
The pipeline will automatically handle the rest, from data collection to training, for your new set of species.
|
|
93
|
+
|
|
94
|
+
## Directory Structure
|
|
95
|
+
|
|
96
|
+
The pipeline will create the following directories to store artifacts:
|
|
97
|
+
|
|
98
|
+
- `GBIF_data/`: Stores the raw images downloaded from GBIF.
|
|
99
|
+
- `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training.
|
|
100
|
+
- `trained_model/`: Saves the trained model weights (`best_multitask.pt`) and pre-trained detection weights.
|
|
101
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
bplusplus/__init__.py,sha256=GLqIx6Ln3Jt_Q95zkqXglKaSF3dbw-awax4dYry3tw0,145
|
|
2
|
+
bplusplus/collect.py,sha256=lEJHXPpOo4DALBw6zemdmFuqAXZ12-BKwgesvq5ACYs,7135
|
|
3
|
+
bplusplus/inference.py,sha256=3XmwzEfVTw5OFiMbMVgiuEa-r22HvMUWHVXESZsTIzo,37708
|
|
4
|
+
bplusplus/prepare.py,sha256=pdXUVAzY030tM6f0Gf_zml8I26lS38wuvH13R2F00Do,25829
|
|
5
|
+
bplusplus/test.py,sha256=kKjrsb3iCfljtRjot_kiVB5hopMkApoW9yvMcuI2O_U,30545
|
|
6
|
+
bplusplus/tracker.py,sha256=JixV1ICGywGhVMTvkq3hrk4MLUUWDh3XJW4VLm4JdO0,11250
|
|
7
|
+
bplusplus/train.py,sha256=wkHnKbTdZAFn2voJS7gSYXU7B9UVYVYmbTJCR0tFzs4,28058
|
|
8
|
+
bplusplus-1.2.3.dist-info/LICENSE,sha256=rRkeHptDnlmviR0_WWgNT9t696eys_cjfVUU8FEO4k4,1071
|
|
9
|
+
bplusplus-1.2.3.dist-info/METADATA,sha256=IDnokwF2CEyM_3xLmlRL30k2P9NglDjdjbxC7-UZoc4,4046
|
|
10
|
+
bplusplus-1.2.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
11
|
+
bplusplus-1.2.3.dist-info/RECORD,,
|