bplusplus 1.2.1__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bplusplus might be problematic. Click here for more details.

@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.3
2
+ Name: bplusplus
3
+ Version: 1.2.3
4
+ Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
5
+ License: MIT
6
+ Author: Titus Venverloo
7
+ Author-email: tvenver@mit.edu
8
+ Requires-Python: >=3.9.0,<4.0.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: numpy
17
+ Requires-Dist: pandas (==2.1.4)
18
+ Requires-Dist: pillow
19
+ Requires-Dist: prettytable (==3.7.0)
20
+ Requires-Dist: pygbif (>=0.6.4,<0.7.0)
21
+ Requires-Dist: pyyaml (==6.0.1)
22
+ Requires-Dist: requests (==2.25.1)
23
+ Requires-Dist: scikit-learn
24
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0)
25
+ Requires-Dist: torch (>=2.5.0,<3.0.0)
26
+ Requires-Dist: torchvision
27
+ Requires-Dist: tqdm (==4.66.4)
28
+ Requires-Dist: ultralytics (>=8.3.0)
29
+ Requires-Dist: validators (>=0.33.0,<0.34.0)
30
+ Description-Content-Type: text/markdown
31
+
32
+ # Domain-Agnostic Insect Classification Pipeline
33
+
34
+ This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
35
+
36
+ Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
37
+
38
+ ## Key Features
39
+
40
+ - **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
41
+ - **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
42
+ - **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
43
+ - **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
44
+ ## Pipeline Overview
45
+
46
+ The process is broken down into six main steps, all detailed in the `full_pipeline.ipynb` notebook:
47
+
48
+ 1. **Collect Data**: Select your target species and fetch raw insect images from the web.
49
+ 2. **Prepare Data**: Filter, clean, and prepare images for training.
50
+ 3. **Train Model**: Train the hierarchical classification model.
51
+ 4. **Download Weights**: Fetch pre-trained weights for the detection model.
52
+ 5. **Test Model**: Evaluate the performance of the trained model.
53
+ 6. **Run Inference**: Run the full pipeline on a video file for real-world application.
54
+
55
+ ## How to Use
56
+
57
+ ### Prerequisites
58
+
59
+ - Python 3.8+
60
+ - `venv` for creating a virtual environment (recommended)
61
+
62
+ ### Setup
63
+
64
+ 1. **Create and activate a virtual environment:**
65
+ ```bash
66
+ python3 -m venv venv
67
+ source venv/bin/activate
68
+ ```
69
+
70
+ 2. **Install the required packages:**
71
+ ```bash
72
+ pip install bplusplus
73
+ ```
74
+
75
+ ### Running the Pipeline
76
+
77
+ The entire workflow is contained within **`full_pipeline.ipynb`**. Open it with a Jupyter Notebook or JupyterLab environment and run the cells sequentially to execute the full pipeline.
78
+
79
+ ### Customization
80
+
81
+ To train the model on different insect species, simply modify the `names` list in **Step 1** of the notebook:
82
+
83
+ ```python
84
+ # a/full_pipeline.ipynb
85
+
86
+ # To use your own species, change the names in this list
87
+ names = [
88
+ "Vespa crabro", "Vespula vulgaris", "Dolichovespula media"
89
+ ]
90
+ ```
91
+
92
+ The pipeline will automatically handle the rest, from data collection to training, for your new set of species.
93
+
94
+ ## Directory Structure
95
+
96
+ The pipeline will create the following directories to store artifacts:
97
+
98
+ - `GBIF_data/`: Stores the raw images downloaded from GBIF.
99
+ - `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training.
100
+ - `trained_model/`: Saves the trained model weights (`best_multitask.pt`) and pre-trained detection weights.
101
+
@@ -0,0 +1,69 @@
1
+ # Domain-Agnostic Insect Classification Pipeline
2
+
3
+ This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
4
+
5
+ Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
6
+
7
+ ## Key Features
8
+
9
+ - **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
10
+ - **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
11
+ - **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
12
+ - **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
13
+ ## Pipeline Overview
14
+
15
+ The process is broken down into six main steps, all detailed in the `full_pipeline.ipynb` notebook:
16
+
17
+ 1. **Collect Data**: Select your target species and fetch raw insect images from the web.
18
+ 2. **Prepare Data**: Filter, clean, and prepare images for training.
19
+ 3. **Train Model**: Train the hierarchical classification model.
20
+ 4. **Download Weights**: Fetch pre-trained weights for the detection model.
21
+ 5. **Test Model**: Evaluate the performance of the trained model.
22
+ 6. **Run Inference**: Run the full pipeline on a video file for real-world application.
23
+
24
+ ## How to Use
25
+
26
+ ### Prerequisites
27
+
28
+ - Python 3.8+
29
+ - `venv` for creating a virtual environment (recommended)
30
+
31
+ ### Setup
32
+
33
+ 1. **Create and activate a virtual environment:**
34
+ ```bash
35
+ python3 -m venv venv
36
+ source venv/bin/activate
37
+ ```
38
+
39
+ 2. **Install the required packages:**
40
+ ```bash
41
+ pip install bplusplus
42
+ ```
43
+
44
+ ### Running the Pipeline
45
+
46
+ The entire workflow is contained within **`full_pipeline.ipynb`**. Open it with a Jupyter Notebook or JupyterLab environment and run the cells sequentially to execute the full pipeline.
47
+
48
+ ### Customization
49
+
50
+ To train the model on different insect species, simply modify the `names` list in **Step 1** of the notebook:
51
+
52
+ ```python
53
+ # a/full_pipeline.ipynb
54
+
55
+ # To use your own species, change the names in this list
56
+ names = [
57
+ "Vespa crabro", "Vespula vulgaris", "Dolichovespula media"
58
+ ]
59
+ ```
60
+
61
+ The pipeline will automatically handle the rest, from data collection to training, for your new set of species.
62
+
63
+ ## Directory Structure
64
+
65
+ The pipeline will create the following directories to store artifacts:
66
+
67
+ - `GBIF_data/`: Stores the raw images downloaded from GBIF.
68
+ - `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training.
69
+ - `trained_model/`: Saves the trained model weights (`best_multitask.pt`) and pre-trained detection weights.
@@ -1,21 +1,27 @@
1
1
  [tool.poetry]
2
2
  name = "bplusplus"
3
- version = "1.2.1"
3
+ version = "1.2.3"
4
4
  description = "A simple method to create AI models for biodiversity, with collect and prepare pipeline"
5
- authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlando.closs@wur.nl>", "Ase Hatveit <aase@mit.edu>"]
5
+ authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlandocloss@pm.me>", "Ase Hatveit <aase@mit.edu>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
8
 
9
9
  [tool.poetry.dependencies]
10
10
  python = "^3.9.0"
11
11
  requests = "2.25.1"
12
- ultralytics = "8.0.195"
12
+ pandas = "2.1.4"
13
+ ultralytics = ">=8.3.0"
14
+ pyyaml = "6.0.1"
15
+ tqdm = "4.66.4"
16
+ prettytable = "3.7.0"
17
+ torch = "^2.5.0"
18
+ torchvision = "*"
19
+ pillow = "*"
20
+ numpy = "*"
21
+ scikit-learn = "*"
13
22
  pygbif = "^0.6.4"
14
23
  validators = "^0.33.0"
15
- prettytable = "3.7.0"
16
- scikit-learn = "^1.6.1"
17
24
  tabulate = "^0.9.0"
18
- torch = "2.5.0"
19
25
 
20
26
  [tool.poetry.group.dev.dependencies]
21
27
  jupyter = "^1.0.0"
@@ -0,0 +1,5 @@
1
+ from .collect import Group, collect
2
+ from .prepare import prepare
3
+ from .train import train
4
+ from .test import test
5
+ from .inference import inference
@@ -114,6 +114,8 @@ def __next_batch(parameters: dict[str, Any], total_limit: int, offset: int, curr
114
114
  parameters["limit"] = total_limit
115
115
  parameters["offset"] = offset
116
116
  parameters["mediaType"] = ["StillImage"]
117
+ parameters["basisOfRecord"] = ["HUMAN_OBSERVATION", "LIVING_SPECIMEN", "MACHINE_OBSERVATION", "OBSERVATION", "OCCURRENCE"]
118
+ parameters["lifeStage"] = ["Adult"]
117
119
  search = pygbif.occurrences.search(**parameters)
118
120
  occurrences = search["results"]
119
121
  if search["endOfRecords"] or len(current) >= total_limit: