bplusplus 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Titus Venverloo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,252 @@
1
+ Metadata-Version: 2.3
2
+ Name: bplusplus
3
+ Version: 2.0.1
4
+ Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
5
+ License: MIT
6
+ Author: Titus Venverloo
7
+ Author-email: tvenver@mit.edu
8
+ Requires-Python: >=3.10,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Dist: numpy (>=1.26.0,<1.26.5) ; sys_platform == "win32"
16
+ Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "darwin" and platform_machine == "arm64"
17
+ Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
18
+ Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "linux" and platform_machine == "aarch64"
19
+ Requires-Dist: numpy (>=1.26.0,<1.27.0) ; sys_platform == "linux" and platform_machine == "x86_64"
20
+ Requires-Dist: pandas (==2.1.4)
21
+ Requires-Dist: pillow (>=10.0.0,<12.0.0) ; sys_platform == "darwin"
22
+ Requires-Dist: pillow (>=10.0.0,<12.0.0) ; sys_platform == "linux"
23
+ Requires-Dist: pillow (>=10.0.0,<12.0.0) ; sys_platform == "win32"
24
+ Requires-Dist: prettytable (==3.7.0)
25
+ Requires-Dist: pygbif (==0.6.5)
26
+ Requires-Dist: pyyaml (==6.0.1)
27
+ Requires-Dist: requests (==2.25.1)
28
+ Requires-Dist: scikit-learn (>=1.3.0,<1.7.0) ; sys_platform == "linux" and platform_machine == "aarch64"
29
+ Requires-Dist: scikit-learn (>=1.3.0,<1.7.0) ; sys_platform == "win32"
30
+ Requires-Dist: scikit-learn (>=1.4.0,<1.8.0) ; sys_platform == "darwin" and platform_machine == "arm64"
31
+ Requires-Dist: scikit-learn (>=1.4.0,<1.8.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
32
+ Requires-Dist: scikit-learn (>=1.4.0,<1.8.0) ; sys_platform == "linux" and platform_machine == "x86_64"
33
+ Requires-Dist: tabulate (==0.9.0)
34
+ Requires-Dist: torch (>=2.0.0,<2.8.0) ; sys_platform == "darwin" and platform_machine == "arm64"
35
+ Requires-Dist: torch (>=2.0.0,<2.8.0) ; sys_platform == "linux"
36
+ Requires-Dist: torch (>=2.0.0,<2.8.0) ; sys_platform == "win32"
37
+ Requires-Dist: torch (>=2.2.0,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
38
+ Requires-Dist: tqdm (==4.66.4)
39
+ Requires-Dist: ultralytics (==8.3.173)
40
+ Requires-Dist: validators (==0.33.0)
41
+ Description-Content-Type: text/markdown
42
+
43
+ # B++ repository
44
+
45
+ [![DOI](https://zenodo.org/badge/765250194.svg)](https://zenodo.org/badge/latestdoi/765250194)
46
+ [![PyPi version](https://img.shields.io/pypi/v/bplusplus.svg)](https://pypi.org/project/bplusplus/)
47
+ [![Python versions](https://img.shields.io/pypi/pyversions/bplusplus.svg)](https://pypi.org/project/bplusplus/)
48
+ [![License](https://img.shields.io/pypi/l/bplusplus.svg)](https://pypi.org/project/bplusplus/)
49
+ [![Downloads](https://static.pepy.tech/badge/bplusplus)](https://pepy.tech/project/bplusplus)
50
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/month)](https://pepy.tech/project/bplusplus)
51
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/week)](https://pepy.tech/project/bplusplus)
52
+
53
+ This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
54
+
55
+ Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
56
+
57
+ ## Key Features
58
+
59
+ - **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
60
+ - **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
61
+ - **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
62
+ - **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
63
+ ## Pipeline Overview
64
+
65
+ The process is broken down into five main steps, all detailed in the `full_pipeline.ipynb` notebook:
66
+
67
+ 1. **Collect Data**: Select your target species and fetch raw insect images from the web.
68
+ 2. **Prepare Data**: Filter, clean, and prepare images for training.
69
+ 3. **Train Model**: Train the hierarchical classification model.
70
+ 4. **Validate Model**: Evaluate the performance of the trained model.
71
+ 5. **Run Inference**: Run the full pipeline on a video file for real-world application.
72
+
73
+ ## How to Use
74
+
75
+ ### Prerequisites
76
+
77
+ - Python 3.10+
78
+
79
+ ### Setup
80
+
81
+ 1. **Create and activate a virtual environment:**
82
+ ```bash
83
+ python3 -m venv venv
84
+ source venv/bin/activate
85
+ ```
86
+
87
+ 2. **Install the required packages:**
88
+ ```bash
89
+ pip install bplusplus
90
+ ```
91
+
92
+ ### Running the Pipeline
93
+
94
+ The pipeline can be run step-by-step using the functions from the `bplusplus` library. While the `full_pipeline.ipynb` notebook provides a complete, executable workflow, the core functions are described below.
95
+
96
+ #### Step 1: Collect Data
97
+ Download images for your target species from the GBIF database. You'll need to provide a list of scientific names.
98
+
99
+ ```python
100
+ import bplusplus
101
+ from pathlib import Path
102
+
103
+ # Define species and directories
104
+ names = ["Vespa crabro", "Vespula vulgaris", "Dolichovespula media"]
105
+ GBIF_DATA_DIR = Path("./GBIF_data")
106
+
107
+ # Define search parameters
108
+ search = {"scientificName": names}
109
+
110
+ # Run collection
111
+ bplusplus.collect(
112
+ group_by_key=bplusplus.Group.scientificName,
113
+ search_parameters=search,
114
+ images_per_group=200, # Recommended to download more than needed
115
+ output_directory=GBIF_DATA_DIR,
116
+ num_threads=5
117
+ )
118
+ ```
119
+
120
+ #### Step 2: Prepare Data
121
+ Process the raw images to extract, crop, and resize insects. This step uses a pre-trained model to ensure only high-quality images are used for training.
122
+
123
+ ```python
124
+ PREPARED_DATA_DIR = Path("./prepared_data")
125
+
126
+ bplusplus.prepare(
127
+ input_directory=GBIF_DATA_DIR,
128
+ output_directory=PREPARED_DATA_DIR,
129
+ img_size=640, # Target image size for training
130
+ conf=0.6, # Detection confidence threshold (0-1)
131
+ valid=0.1, # Validation split ratio (0-1), set to 0 for no validation
132
+ )
133
+ ```
134
+
135
+ #### Step 3: Train Model
136
+ Train the hierarchical classification model on your prepared data. The model learns to identify family, genus, and species.
137
+
138
+ ```python
139
+ TRAINED_MODEL_DIR = Path("./trained_model")
140
+
141
+ bplusplus.train(
142
+ batch_size=4,
143
+ epochs=30,
144
+ patience=3,
145
+ img_size=640,
146
+ data_dir=PREPARED_DATA_DIR,
147
+ output_dir=TRAINED_MODEL_DIR,
148
+ species_list=names,
149
+ backbone="resnet50", # Choose: "resnet18", "resnet50", or "resnet101"
150
+ # num_workers=0, # Optional: force single-process loading (most stable)
151
+ # train_transforms=custom_transforms, # Optional: custom torchvision transforms
152
+ )
153
+ ```
154
+
155
+ **Note:** The `num_workers` parameter controls DataLoader multiprocessing (defaults to 0 for stability). The `backbone` parameter allows you to choose between different ResNet architectures—use `resnet18` for faster training or `resnet101` for potentially better accuracy.
156
+
157
+ #### Step 4: Validate Model
158
+ Evaluate the trained model on a held-out validation set. This calculates precision, recall, and F1-score at all taxonomic levels.
159
+
160
+ ```python
161
+ HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
162
+
163
+ results = bplusplus.validate(
164
+ species_list=names,
165
+ validation_dir=PREPARED_DATA_DIR / "valid",
166
+ hierarchical_weights=HIERARCHICAL_MODEL_PATH,
167
+ img_size=640, # Must match training
168
+ batch_size=32,
169
+ backbone="resnet50", # Must match training
170
+ )
171
+ ```
172
+
173
+ #### Step 5: Run Inference on Video
174
+ Process a video file to detect, classify, and track insects using motion-based detection. The pipeline uses background subtraction (GMM) to detect moving insects, tracks them across frames, and classifies confirmed tracks.
175
+
176
+ **Output files generated in `output_dir`:**
177
+ - `{video}_annotated.mp4` - Video showing confirmed tracks with classifications
178
+ - `{video}_debug.mp4` - Debug video with motion mask and all detections
179
+ - `{video}_results.csv` - Aggregated results per confirmed track
180
+ - `{video}_detections.csv` - Frame-by-frame detection data
181
+
182
+ ```python
183
+ VIDEO_INPUT_PATH = Path("my_video.mp4")
184
+ OUTPUT_DIR = Path("./output")
185
+ HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
186
+
187
+ results = bplusplus.inference(
188
+ species_list=names,
189
+ hierarchical_model_path=HIERARCHICAL_MODEL_PATH,
190
+ video_path=VIDEO_INPUT_PATH,
191
+ output_dir=OUTPUT_DIR,
192
+ fps=None, # None = process all frames
193
+ backbone="resnet50", # Must match training
194
+ )
195
+
196
+ print(f"Detected {results['tracks']} tracks ({results['confirmed_tracks']} confirmed)")
197
+ ```
198
+
199
+ **Custom Detection Configuration:**
200
+
201
+ For advanced control over detection parameters, provide a YAML config file:
202
+
203
+ ```python
204
+ results = bplusplus.inference(
205
+ ...,
206
+ config="detection_config.yaml"
207
+ )
208
+ ```
209
+
210
+ Download a template config from the [releases page](https://github.com/Tvenver/Bplusplus/releases). Parameters control cohesiveness filtering, shape filtering, tracking behavior, and path topology analysis for confirming insect-like movement.
211
+
212
+ ### Customization
213
+
214
+ To train the model on your own set of insect species, you only need to change the `names` list in **Step 1**. The pipeline will automatically handle the rest.
215
+
216
+ ```python
217
+ # To use your own species, change the names in this list
218
+ names = [
219
+ "Vespa crabro",
220
+ "Vespula vulgaris",
221
+ "Dolichovespula media",
222
+ # Add your species here
223
+ ]
224
+ ```
225
+
226
+ #### Handling an "Unknown" Class
227
+ To train a model that can recognize an "unknown" class for insects that don't belong to your target species, add `"unknown"` to your `species_list`. You must also provide a corresponding `unknown` folder containing images of various other insects in your data directories (e.g., `prepared_data/train/unknown`).
228
+
229
+ ```python
230
+ # Example with an unknown class
231
+ names_with_unknown = [
232
+ "Vespa crabro",
233
+ "Vespula vulgaris",
234
+ "unknown"
235
+ ]
236
+ ```
237
+
238
+ ## Directory Structure
239
+
240
+ The pipeline will create the following directories to store artifacts:
241
+
242
+ - `GBIF_data/`: Stores the raw images downloaded from GBIF.
243
+ - `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training (`train/` and optionally `valid/` subdirectories).
244
+ - `trained_model/`: Saves the trained model weights (`best_multitask.pt`).
245
+ - `output/`: Inference results including annotated videos and CSV files.
246
+
247
+ # Citation
248
+
249
+ All information in this GitHub is available under MIT license, as long as credit is given to the authors.
250
+
251
+ **Venverloo, T., Duarte, F., B++: Towards Real-Time Monitoring of Insect Species. MIT Senseable City Laboratory, AMS Institute.**
252
+
@@ -0,0 +1,209 @@
1
+ # B++ repository
2
+
3
+ [![DOI](https://zenodo.org/badge/765250194.svg)](https://zenodo.org/badge/latestdoi/765250194)
4
+ [![PyPi version](https://img.shields.io/pypi/v/bplusplus.svg)](https://pypi.org/project/bplusplus/)
5
+ [![Python versions](https://img.shields.io/pypi/pyversions/bplusplus.svg)](https://pypi.org/project/bplusplus/)
6
+ [![License](https://img.shields.io/pypi/l/bplusplus.svg)](https://pypi.org/project/bplusplus/)
7
+ [![Downloads](https://static.pepy.tech/badge/bplusplus)](https://pepy.tech/project/bplusplus)
8
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/month)](https://pepy.tech/project/bplusplus)
9
+ [![Downloads](https://static.pepy.tech/badge/bplusplus/week)](https://pepy.tech/project/bplusplus)
10
+
11
+ This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
12
+
13
+ Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
14
+
15
+ ## Key Features
16
+
17
+ - **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
18
+ - **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
19
+ - **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
20
+ - **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
21
+ ## Pipeline Overview
22
+
23
+ The process is broken down into five main steps, all detailed in the `full_pipeline.ipynb` notebook:
24
+
25
+ 1. **Collect Data**: Select your target species and fetch raw insect images from the web.
26
+ 2. **Prepare Data**: Filter, clean, and prepare images for training.
27
+ 3. **Train Model**: Train the hierarchical classification model.
28
+ 4. **Validate Model**: Evaluate the performance of the trained model.
29
+ 5. **Run Inference**: Run the full pipeline on a video file for real-world application.
30
+
31
+ ## How to Use
32
+
33
+ ### Prerequisites
34
+
35
+ - Python 3.10+
36
+
37
+ ### Setup
38
+
39
+ 1. **Create and activate a virtual environment:**
40
+ ```bash
41
+ python3 -m venv venv
42
+ source venv/bin/activate
43
+ ```
44
+
45
+ 2. **Install the required packages:**
46
+ ```bash
47
+ pip install bplusplus
48
+ ```
49
+
50
+ ### Running the Pipeline
51
+
52
+ The pipeline can be run step-by-step using the functions from the `bplusplus` library. While the `full_pipeline.ipynb` notebook provides a complete, executable workflow, the core functions are described below.
53
+
54
+ #### Step 1: Collect Data
55
+ Download images for your target species from the GBIF database. You'll need to provide a list of scientific names.
56
+
57
+ ```python
58
+ import bplusplus
59
+ from pathlib import Path
60
+
61
+ # Define species and directories
62
+ names = ["Vespa crabro", "Vespula vulgaris", "Dolichovespula media"]
63
+ GBIF_DATA_DIR = Path("./GBIF_data")
64
+
65
+ # Define search parameters
66
+ search = {"scientificName": names}
67
+
68
+ # Run collection
69
+ bplusplus.collect(
70
+ group_by_key=bplusplus.Group.scientificName,
71
+ search_parameters=search,
72
+ images_per_group=200, # Recommended to download more than needed
73
+ output_directory=GBIF_DATA_DIR,
74
+ num_threads=5
75
+ )
76
+ ```
77
+
78
+ #### Step 2: Prepare Data
79
+ Process the raw images to extract, crop, and resize insects. This step uses a pre-trained model to ensure only high-quality images are used for training.
80
+
81
+ ```python
82
+ PREPARED_DATA_DIR = Path("./prepared_data")
83
+
84
+ bplusplus.prepare(
85
+ input_directory=GBIF_DATA_DIR,
86
+ output_directory=PREPARED_DATA_DIR,
87
+ img_size=640, # Target image size for training
88
+ conf=0.6, # Detection confidence threshold (0-1)
89
+ valid=0.1, # Validation split ratio (0-1), set to 0 for no validation
90
+ )
91
+ ```
92
+
93
+ #### Step 3: Train Model
94
+ Train the hierarchical classification model on your prepared data. The model learns to identify family, genus, and species.
95
+
96
+ ```python
97
+ TRAINED_MODEL_DIR = Path("./trained_model")
98
+
99
+ bplusplus.train(
100
+ batch_size=4,
101
+ epochs=30,
102
+ patience=3,
103
+ img_size=640,
104
+ data_dir=PREPARED_DATA_DIR,
105
+ output_dir=TRAINED_MODEL_DIR,
106
+ species_list=names,
107
+ backbone="resnet50", # Choose: "resnet18", "resnet50", or "resnet101"
108
+ # num_workers=0, # Optional: force single-process loading (most stable)
109
+ # train_transforms=custom_transforms, # Optional: custom torchvision transforms
110
+ )
111
+ ```
112
+
113
+ **Note:** The `num_workers` parameter controls DataLoader multiprocessing (defaults to 0 for stability). The `backbone` parameter allows you to choose between different ResNet architectures—use `resnet18` for faster training or `resnet101` for potentially better accuracy.
114
+
115
+ #### Step 4: Validate Model
116
+ Evaluate the trained model on a held-out validation set. This calculates precision, recall, and F1-score at all taxonomic levels.
117
+
118
+ ```python
119
+ HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
120
+
121
+ results = bplusplus.validate(
122
+ species_list=names,
123
+ validation_dir=PREPARED_DATA_DIR / "valid",
124
+ hierarchical_weights=HIERARCHICAL_MODEL_PATH,
125
+ img_size=640, # Must match training
126
+ batch_size=32,
127
+ backbone="resnet50", # Must match training
128
+ )
129
+ ```
130
+
131
+ #### Step 5: Run Inference on Video
132
+ Process a video file to detect, classify, and track insects using motion-based detection. The pipeline uses background subtraction (GMM) to detect moving insects, tracks them across frames, and classifies confirmed tracks.
133
+
134
+ **Output files generated in `output_dir`:**
135
+ - `{video}_annotated.mp4` - Video showing confirmed tracks with classifications
136
+ - `{video}_debug.mp4` - Debug video with motion mask and all detections
137
+ - `{video}_results.csv` - Aggregated results per confirmed track
138
+ - `{video}_detections.csv` - Frame-by-frame detection data
139
+
140
+ ```python
141
+ VIDEO_INPUT_PATH = Path("my_video.mp4")
142
+ OUTPUT_DIR = Path("./output")
143
+ HIERARCHICAL_MODEL_PATH = TRAINED_MODEL_DIR / "best_multitask.pt"
144
+
145
+ results = bplusplus.inference(
146
+ species_list=names,
147
+ hierarchical_model_path=HIERARCHICAL_MODEL_PATH,
148
+ video_path=VIDEO_INPUT_PATH,
149
+ output_dir=OUTPUT_DIR,
150
+ fps=None, # None = process all frames
151
+ backbone="resnet50", # Must match training
152
+ )
153
+
154
+ print(f"Detected {results['tracks']} tracks ({results['confirmed_tracks']} confirmed)")
155
+ ```
156
+
157
+ **Custom Detection Configuration:**
158
+
159
+ For advanced control over detection parameters, provide a YAML config file:
160
+
161
+ ```python
162
+ results = bplusplus.inference(
163
+ ...,
164
+ config="detection_config.yaml"
165
+ )
166
+ ```
167
+
168
+ Download a template config from the [releases page](https://github.com/Tvenver/Bplusplus/releases). Parameters control cohesiveness filtering, shape filtering, tracking behavior, and path topology analysis for confirming insect-like movement.
169
+
170
+ ### Customization
171
+
172
+ To train the model on your own set of insect species, you only need to change the `names` list in **Step 1**. The pipeline will automatically handle the rest.
173
+
174
+ ```python
175
+ # To use your own species, change the names in this list
176
+ names = [
177
+ "Vespa crabro",
178
+ "Vespula vulgaris",
179
+ "Dolichovespula media",
180
+ # Add your species here
181
+ ]
182
+ ```
183
+
184
+ #### Handling an "Unknown" Class
185
+ To train a model that can recognize an "unknown" class for insects that don't belong to your target species, add `"unknown"` to your `species_list`. You must also provide a corresponding `unknown` folder containing images of various other insects in your data directories (e.g., `prepared_data/train/unknown`).
186
+
187
+ ```python
188
+ # Example with an unknown class
189
+ names_with_unknown = [
190
+ "Vespa crabro",
191
+ "Vespula vulgaris",
192
+ "unknown"
193
+ ]
194
+ ```
195
+
196
+ ## Directory Structure
197
+
198
+ The pipeline will create the following directories to store artifacts:
199
+
200
+ - `GBIF_data/`: Stores the raw images downloaded from GBIF.
201
+ - `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training (`train/` and optionally `valid/` subdirectories).
202
+ - `trained_model/`: Saves the trained model weights (`best_multitask.pt`).
203
+ - `output/`: Inference results including annotated videos and CSV files.
204
+
205
+ # Citation
206
+
207
+ All information in this GitHub is available under MIT license, as long as credit is given to the authors.
208
+
209
+ **Venverloo, T., Duarte, F., B++: Towards Real-Time Monitoring of Insect Species. MIT Senseable City Laboratory, AMS Institute.**
@@ -0,0 +1,74 @@
1
+ [tool.poetry]
2
+ name = "bplusplus"
3
+ version = "2.0.1"
4
+ description = "A simple method to create AI models for biodiversity, with collect and prepare pipeline"
5
+ authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlandocloss@pm.me>", "Ase Hatveit <aase@mit.edu>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.10"
11
+ requests = "2.25.1"
12
+ pandas = "2.1.4"
13
+ ultralytics = "8.3.173"
14
+ pyyaml = "6.0.1"
15
+ tqdm = "4.66.4"
16
+ prettytable = "3.7.0"
17
+ # Pillow with platform-specific compatibility
18
+ pillow = [
19
+ # Windows - stable version
20
+ {version = ">=10.0.0,<12.0.0", markers = "sys_platform == 'win32'"},
21
+ # macOS - all versions support latest
22
+ {version = ">=10.0.0,<12.0.0", markers = "sys_platform == 'darwin'"},
23
+ # Linux - most flexible
24
+ {version = ">=10.0.0,<12.0.0", markers = "sys_platform == 'linux'"}
25
+ ]
26
+ # PyTorch with platform-specific compatibility to handle discontinued macOS Intel support
27
+ torch = [
28
+ # Windows - stable version range
29
+ {version = ">=2.0.0,<2.8.0", markers = "sys_platform == 'win32'"},
30
+ # macOS Intel - DISCONTINUED after PyTorch 2.2.2, use last supported version
31
+ {version = ">=2.2.0,<2.3.0", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
32
+ # macOS ARM64 - full support, broader range
33
+ {version = ">=2.0.0,<2.8.0", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"},
34
+ # Linux - most flexible, best wheel support
35
+ {version = ">=2.0.0,<2.8.0", markers = "sys_platform == 'linux'"}
36
+ ]
37
+ # Comprehensive environment markers for numpy compatibility across all platforms and architectures
38
+ # Note: pandas 2.1.4 requires numpy >=1.26.0, so we must respect that constraint
39
+ numpy = [
40
+ # Windows (all architectures) - use exact version that works well with ultralytics
41
+ {version = ">=1.26.0,<1.26.5", markers = "sys_platform == 'win32'"},
42
+ # macOS ARM64 (Apple Silicon) - compatible with newer numpy
43
+ {version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"},
44
+ # macOS x86_64 (Intel) - compatible with newer numpy
45
+ {version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
46
+ # Linux ARM64 (aarch64) - common in cloud/lab environments
47
+ {version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'linux' and platform_machine == 'aarch64'"},
48
+ # Linux x86_64 - most common lab/server environment
49
+ {version = ">=1.26.0,<1.27.0", markers = "sys_platform == 'linux' and platform_machine == 'x86_64'"}
50
+ ]
51
+ # Scikit-learn with platform-specific compatibility
52
+ scikit-learn = [
53
+ # Windows - more conservative versions for stability
54
+ {version = ">=1.3.0,<1.7.0", markers = "sys_platform == 'win32'"},
55
+ # macOS ARM64 - optimized builds available
56
+ {version = ">=1.4.0,<1.8.0", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"},
57
+ # macOS x86_64 - standard builds
58
+ {version = ">=1.4.0,<1.8.0", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
59
+ # Linux ARM64 - may have limited pre-built wheels
60
+ {version = ">=1.3.0,<1.7.0", markers = "sys_platform == 'linux' and platform_machine == 'aarch64'"},
61
+ # Linux x86_64 - most stable, latest versions available
62
+ {version = ">=1.4.0,<1.8.0", markers = "sys_platform == 'linux' and platform_machine == 'x86_64'"}
63
+ ]
64
+ pygbif = "0.6.5"
65
+ validators = "0.33.0"
66
+ tabulate = "0.9.0"
67
+
68
+ [tool.poetry.group.dev.dependencies]
69
+ jupyter = "^1.0.0"
70
+ ipykernel = "^6.29.5"
71
+
72
+ [build-system]
73
+ requires = ["poetry-core>=1.0.0"]
74
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,15 @@
1
+ try:
2
+ import torch
3
+ import torchvision
4
+ except ImportError:
5
+ raise ImportError(
6
+ "PyTorch and Torchvision are not installed. "
7
+ "Please install them before using bplusplus by following the instructions "
8
+ "on the official PyTorch website: https://pytorch.org/get-started/locally/"
9
+ )
10
+
11
+ from .collect import Group, collect
12
+ from .prepare import prepare
13
+ from .train import train
14
+ from .inference import inference
15
+ from .validation import validate