bplusplus 1.2.2__tar.gz → 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bplusplus might be problematic. Click here for more details.

@@ -0,0 +1,207 @@
1
+ Metadata-Version: 2.3
2
+ Name: bplusplus
3
+ Version: 1.2.4
4
+ Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
5
+ License: MIT
6
+ Author: Titus Venverloo
7
+ Author-email: tvenver@mit.edu
8
+ Requires-Python: >=3.10,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Dist: numpy (==1.26.4)
16
+ Requires-Dist: pandas (==2.1.4)
17
+ Requires-Dist: pillow (==11.3.0)
18
+ Requires-Dist: prettytable (==3.7.0)
19
+ Requires-Dist: pygbif (==0.6.5)
20
+ Requires-Dist: pyyaml (==6.0.1)
21
+ Requires-Dist: requests (==2.25.1)
22
+ Requires-Dist: scikit-learn (==1.7.1)
23
+ Requires-Dist: tabulate (==0.9.0)
24
+ Requires-Dist: tqdm (==4.66.4)
25
+ Requires-Dist: ultralytics (==8.3.173)
26
+ Requires-Dist: validators (==0.33.0)
27
+ Description-Content-Type: text/markdown
28
+
29
+ # B++ repository
30
+
31
+ [![DOI](https://zenodo.org/badge/765250194.svg)](https://zenodo.org/badge/latestdoi/765250194)
32
+ [![PyPi version](https://img.shields.io/pypi/v/bplusplus.svg)](https://pypi.org/project/bplusplus/)
33
+ [![Python versions](https://img.shields.io/pypi/pyversions/bplusplus.svg)](https://pypi.org/project/bplusplus/)
34
+ [![License](https://img.shields.io/pypi/l/bplusplus.svg)](https://pypi.org/project/bplusplus/)
35
+ [![Downloads](https://static.pepy.tech/badge/bplusplus)](https://pepy.tech/project/bplusplus)
36
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/month)](https://pepy.tech/project/bplusplus)
37
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/week)](https://pepy.tech/project/bplusplus)
38
+
39
+ This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
40
+
41
+ Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
42
+
43
+ ## Key Features
44
+
45
+ - **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
46
+ - **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
47
+ - **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
48
+ - **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
49
+ ## Pipeline Overview
50
+
51
+ The process is broken down into six main steps, all detailed in the `full_pipeline.ipynb` notebook:
52
+
53
+ 1. **Collect Data**: Select your target species and fetch raw insect images from the web.
54
+ 2. **Prepare Data**: Filter, clean, and prepare images for training.
55
+ 3. **Train Model**: Train the hierarchical classification model.
56
+ 4. **Download Weights**: Fetch pre-trained weights for the detection model.
57
+ 5. **Test Model**: Evaluate the performance of the trained model.
58
+ 6. **Run Inference**: Run the full pipeline on a video file for real-world application.
59
+
60
+ ## How to Use
61
+
62
+ ### Prerequisites
63
+
64
+ - Python 3.10+
65
+
66
+ ### Setup
67
+
68
+ 1. **Create and activate a virtual environment:**
69
+ ```bash
70
+ python3 -m venv venv
71
+ source venv/bin/activate
72
+ ```
73
+
74
+ 2. **Install the required packages:**
75
+ ```bash
76
+ pip install bplusplus
77
+ ```
78
+
79
+ ### Running the Pipeline
80
+
81
+ The pipeline can be run step-by-step using the functions from the `bplusplus` library. While the `full_pipeline.ipynb` notebook provides a complete, executable workflow, the core functions are described below.
82
+
83
+ #### Step 1: Collect Data
84
+ Download images for your target species from the GBIF database. You'll need to provide a list of scientific names.
85
+
86
+ ```python
87
+ import bplusplus
88
+ from pathlib import Path
89
+
90
+ # Define species and directories
91
+ names = ["Vespa crabro", "Vespula vulgaris", "Dolichovespula media"]
92
+ GBIF_DATA_DIR = Path("./GBIF_data")
93
+
94
+ # Define search parameters
95
+ search = {"scientificName": names}
96
+
97
+ # Run collection
98
+ bplusplus.collect(
99
+ group_by_key=bplusplus.Group.scientificName,
100
+ search_parameters=search,
101
+ images_per_group=200, # Recommended to download more than needed
102
+ output_directory=GBIF_DATA_DIR,
103
+ num_threads=5
104
+ )
105
+ ```
106
+
107
+ #### Step 2: Prepare Data
108
+ Process the raw images to extract, crop, and resize insects. This step uses a pre-trained model to ensure only high-quality images are used for training.
109
+
110
+ ```python
111
+ PREPARED_DATA_DIR = Path("./prepared_data")
112
+
113
+ bplusplus.prepare(
114
+ input_directory=GBIF_DATA_DIR,
115
+ output_directory=PREPARED_DATA_DIR,
116
+ img_size=640 # Target image size for training
117
+ )
118
+ ```
119
+
120
+ #### Step 3: Train Model
121
+ Train the hierarchical classification model on your prepared data. The model learns to identify family, genus, and species.
122
+
123
+ ```python
124
+ TRAINED_MODEL_DIR = Path("./trained_model")
125
+
126
+ bplusplus.train(
127
+ batch_size=4,
128
+ epochs=30,
129
+ patience=3,
130
+ img_size=640,
131
+ data_dir=PREPARED_DATA_DIR,
132
+ output_dir=TRAINED_MODEL_DIR,
133
+ species_list=names
134
+ # num_workers=0 # Optional: force single-process loading (most stable)
135
+ )
136
+ ```
137
+
138
+ **Note:** The `num_workers` parameter controls DataLoader multiprocessing (defaults to 0 for stability). You can increase it for potentially faster data loading.
139
+
140
+ #### Step 4: Download Detection Weights
141
+ The inference pipeline uses a separate, pre-trained YOLO model for initial insect detection. You need to download its weights manually.
142
+
143
+ You can download the weights file from [this link](https://github.com/Tvenver/Bplusplus/releases/download/v1.2.3/v11small-generic.pt).
144
+
145
+ Place it in the `trained_model` directory and ensure it is named `yolo_weights.pt`.
146
+
147
+ #### Step 5: Run Inference on Video
148
+ Process a video file to detect, classify, and track insects. The final output is an annotated video and a CSV file with aggregated results for each tracked insect.
149
+
150
+ ```python
151
+ VIDEO_INPUT_PATH = Path("my_video.mp4")
152
+ VIDEO_OUTPUT_PATH = Path("my_video_annotated.mp4")
153
+ HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
154
+ YOLO_WEIGHTS_PATH = TRAINED_MODEL_DIR / "yolo_weights.pt"
155
+
156
+ bplusplus.inference(
157
+ species_list=names,
158
+ yolo_model_path=YOLO_WEIGHTS_PATH,
159
+ hierarchical_model_path=HIERARCHICAL_MODEL_PATH,
160
+ confidence_threshold=0.35,
161
+ video_path=VIDEO_INPUT_PATH,
162
+ output_path=VIDEO_OUTPUT_PATH,
163
+ tracker_max_frames=60,
164
+ fps=15 # Optional: set processing FPS
165
+ )
166
+ ```
167
+
168
+ ### Customization
169
+
170
+ To train the model on your own set of insect species, you only need to change the `names` list in **Step 1**. The pipeline will automatically handle the rest.
171
+
172
+ ```python
173
+ # To use your own species, change the names in this list
174
+ names = [
175
+ "Vespa crabro",
176
+ "Vespula vulgaris",
177
+ "Dolichovespula media",
178
+ # Add your species here
179
+ ]
180
+ ```
181
+
182
+ #### Handling an "Unknown" Class
183
+ To train a model that can recognize an "unknown" class for insects that don't belong to your target species, add `"unknown"` to your `species_list`. You must also provide a corresponding `unknown` folder containing images of various other insects in your data directories (e.g., `prepared_data/train/unknown`).
184
+
185
+ ```python
186
+ # Example with an unknown class
187
+ names_with_unknown = [
188
+ "Vespa crabro",
189
+ "Vespula vulgaris",
190
+ "unknown"
191
+ ]
192
+ ```
193
+
194
+ ## Directory Structure
195
+
196
+ The pipeline will create the following directories to store artifacts:
197
+
198
+ - `GBIF_data/`: Stores the raw images downloaded from GBIF.
199
+ - `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training.
200
+ - `trained_model/`: Saves the trained model weights (`best_multitask.pt`) and pre-trained detection weights.
201
+
202
+ # Citation
203
+
204
+ All information in this GitHub is available under MIT license, as long as credit is given to the authors.
205
+
206
+ **Venverloo, T., Duarte, F., B++: Towards Real-Time Monitoring of Insect Species. MIT Senseable City Laboratory, AMS Institute.**
207
+
@@ -0,0 +1,178 @@
1
+ # B++ repository
2
+
3
+ [![DOI](https://zenodo.org/badge/765250194.svg)](https://zenodo.org/badge/latestdoi/765250194)
4
+ [![PyPi version](https://img.shields.io/pypi/v/bplusplus.svg)](https://pypi.org/project/bplusplus/)
5
+ [![Python versions](https://img.shields.io/pypi/pyversions/bplusplus.svg)](https://pypi.org/project/bplusplus/)
6
+ [![License](https://img.shields.io/pypi/l/bplusplus.svg)](https://pypi.org/project/bplusplus/)
7
+ [![Downloads](https://static.pepy.tech/badge/bplusplus)](https://pepy.tech/project/bplusplus)
8
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/month)](https://pepy.tech/project/bplusplus)
9
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/week)](https://pepy.tech/project/bplusplus)
10
+
11
+ This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
12
+
13
+ Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
14
+
15
+ ## Key Features
16
+
17
+ - **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
18
+ - **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
19
+ - **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
20
+ - **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
21
+ ## Pipeline Overview
22
+
23
+ The process is broken down into six main steps, all detailed in the `full_pipeline.ipynb` notebook:
24
+
25
+ 1. **Collect Data**: Select your target species and fetch raw insect images from the web.
26
+ 2. **Prepare Data**: Filter, clean, and prepare images for training.
27
+ 3. **Train Model**: Train the hierarchical classification model.
28
+ 4. **Download Weights**: Fetch pre-trained weights for the detection model.
29
+ 5. **Test Model**: Evaluate the performance of the trained model.
30
+ 6. **Run Inference**: Run the full pipeline on a video file for real-world application.
31
+
32
+ ## How to Use
33
+
34
+ ### Prerequisites
35
+
36
+ - Python 3.10+
37
+
38
+ ### Setup
39
+
40
+ 1. **Create and activate a virtual environment:**
41
+ ```bash
42
+ python3 -m venv venv
43
+ source venv/bin/activate
44
+ ```
45
+
46
+ 2. **Install the required packages:**
47
+ ```bash
48
+ pip install bplusplus
49
+ ```
50
+
51
+ ### Running the Pipeline
52
+
53
+ The pipeline can be run step-by-step using the functions from the `bplusplus` library. While the `full_pipeline.ipynb` notebook provides a complete, executable workflow, the core functions are described below.
54
+
55
+ #### Step 1: Collect Data
56
+ Download images for your target species from the GBIF database. You'll need to provide a list of scientific names.
57
+
58
+ ```python
59
+ import bplusplus
60
+ from pathlib import Path
61
+
62
+ # Define species and directories
63
+ names = ["Vespa crabro", "Vespula vulgaris", "Dolichovespula media"]
64
+ GBIF_DATA_DIR = Path("./GBIF_data")
65
+
66
+ # Define search parameters
67
+ search = {"scientificName": names}
68
+
69
+ # Run collection
70
+ bplusplus.collect(
71
+ group_by_key=bplusplus.Group.scientificName,
72
+ search_parameters=search,
73
+ images_per_group=200, # Recommended to download more than needed
74
+ output_directory=GBIF_DATA_DIR,
75
+ num_threads=5
76
+ )
77
+ ```
78
+
79
+ #### Step 2: Prepare Data
80
+ Process the raw images to extract, crop, and resize insects. This step uses a pre-trained model to ensure only high-quality images are used for training.
81
+
82
+ ```python
83
+ PREPARED_DATA_DIR = Path("./prepared_data")
84
+
85
+ bplusplus.prepare(
86
+ input_directory=GBIF_DATA_DIR,
87
+ output_directory=PREPARED_DATA_DIR,
88
+ img_size=640 # Target image size for training
89
+ )
90
+ ```
91
+
92
+ #### Step 3: Train Model
93
+ Train the hierarchical classification model on your prepared data. The model learns to identify family, genus, and species.
94
+
95
+ ```python
96
+ TRAINED_MODEL_DIR = Path("./trained_model")
97
+
98
+ bplusplus.train(
99
+ batch_size=4,
100
+ epochs=30,
101
+ patience=3,
102
+ img_size=640,
103
+ data_dir=PREPARED_DATA_DIR,
104
+ output_dir=TRAINED_MODEL_DIR,
105
+ species_list=names
106
+ # num_workers=0 # Optional: force single-process loading (most stable)
107
+ )
108
+ ```
109
+
110
+ **Note:** The `num_workers` parameter controls DataLoader multiprocessing (defaults to 0 for stability). You can increase it for potentially faster data loading.
111
+
112
+ #### Step 4: Download Detection Weights
113
+ The inference pipeline uses a separate, pre-trained YOLO model for initial insect detection. You need to download its weights manually.
114
+
115
+ You can download the weights file from [this link](https://github.com/Tvenver/Bplusplus/releases/download/v1.2.3/v11small-generic.pt).
116
+
117
+ Place it in the `trained_model` directory and ensure it is named `yolo_weights.pt`.
118
+
119
+ #### Step 5: Run Inference on Video
120
+ Process a video file to detect, classify, and track insects. The final output is an annotated video and a CSV file with aggregated results for each tracked insect.
121
+
122
+ ```python
123
+ VIDEO_INPUT_PATH = Path("my_video.mp4")
124
+ VIDEO_OUTPUT_PATH = Path("my_video_annotated.mp4")
125
+ HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
126
+ YOLO_WEIGHTS_PATH = TRAINED_MODEL_DIR / "yolo_weights.pt"
127
+
128
+ bplusplus.inference(
129
+ species_list=names,
130
+ yolo_model_path=YOLO_WEIGHTS_PATH,
131
+ hierarchical_model_path=HIERARCHICAL_MODEL_PATH,
132
+ confidence_threshold=0.35,
133
+ video_path=VIDEO_INPUT_PATH,
134
+ output_path=VIDEO_OUTPUT_PATH,
135
+ tracker_max_frames=60,
136
+ fps=15 # Optional: set processing FPS
137
+ )
138
+ ```
139
+
140
+ ### Customization
141
+
142
+ To train the model on your own set of insect species, you only need to change the `names` list in **Step 1**. The pipeline will automatically handle the rest.
143
+
144
+ ```python
145
+ # To use your own species, change the names in this list
146
+ names = [
147
+ "Vespa crabro",
148
+ "Vespula vulgaris",
149
+ "Dolichovespula media",
150
+ # Add your species here
151
+ ]
152
+ ```
153
+
154
+ #### Handling an "Unknown" Class
155
+ To train a model that can recognize an "unknown" class for insects that don't belong to your target species, add `"unknown"` to your `species_list`. You must also provide a corresponding `unknown` folder containing images of various other insects in your data directories (e.g., `prepared_data/train/unknown`).
156
+
157
+ ```python
158
+ # Example with an unknown class
159
+ names_with_unknown = [
160
+ "Vespa crabro",
161
+ "Vespula vulgaris",
162
+ "unknown"
163
+ ]
164
+ ```
165
+
166
+ ## Directory Structure
167
+
168
+ The pipeline will create the following directories to store artifacts:
169
+
170
+ - `GBIF_data/`: Stores the raw images downloaded from GBIF.
171
+ - `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training.
172
+ - `trained_model/`: Saves the trained model weights (`best_multitask.pt`) and pre-trained detection weights.
173
+
174
+ # Citation
175
+
176
+ All information in this GitHub is available under MIT license, as long as credit is given to the authors.
177
+
178
+ **Venverloo, T., Duarte, F., B++: Towards Real-Time Monitoring of Insect Species. MIT Senseable City Laboratory, AMS Institute.**
@@ -1,21 +1,25 @@
1
1
  [tool.poetry]
2
2
  name = "bplusplus"
3
- version = "1.2.2"
3
+ version = "1.2.4"
4
4
  description = "A simple method to create AI models for biodiversity, with collect and prepare pipeline"
5
- authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlando.closs@wur.nl>", "Ase Hatveit <aase@mit.edu>"]
5
+ authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlandocloss@pm.me>", "Ase Hatveit <aase@mit.edu>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
8
 
9
9
  [tool.poetry.dependencies]
10
- python = "^3.9.0"
10
+ python = "^3.10"
11
11
  requests = "2.25.1"
12
- ultralytics = "8.0.195"
13
- pygbif = "^0.6.4"
14
- validators = "^0.33.0"
12
+ pandas = "2.1.4"
13
+ ultralytics = "8.3.173"
14
+ pyyaml = "6.0.1"
15
+ tqdm = "4.66.4"
15
16
  prettytable = "3.7.0"
16
- scikit-learn = "^1.6.1"
17
- tabulate = "^0.9.0"
18
- torch = "2.5.0"
17
+ pillow = "11.3.0"
18
+ numpy = "1.26.4"
19
+ scikit-learn = "1.7.1"
20
+ pygbif = "0.6.5"
21
+ validators = "0.33.0"
22
+ tabulate = "0.9.0"
19
23
 
20
24
  [tool.poetry.group.dev.dependencies]
21
25
  jupyter = "^1.0.0"
@@ -0,0 +1,15 @@
1
+ try:
2
+ import torch
3
+ import torchvision
4
+ except ImportError:
5
+ raise ImportError(
6
+ "PyTorch and Torchvision are not installed. "
7
+ "Please install them before using bplusplus by following the instructions "
8
+ "on the official PyTorch website: https://pytorch.org/get-started/locally/"
9
+ )
10
+
11
+ from .collect import Group, collect
12
+ from .prepare import prepare
13
+ from .train import train
14
+ from .test import test
15
+ from .inference import inference