bplusplus 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bplusplus might be problematic. Click here for more details.

@@ -14,7 +14,7 @@ import logging
14
14
  from tqdm import tqdm
15
15
  import sys
16
16
 
17
- def train_multitask(batch_size=4, epochs=30, patience=3, img_size=640, data_dir='/mnt/nvme0n1p1/datasets/insect/bjerge-train2', output_dir='./output', species_list=None):
17
+ def train(batch_size=4, epochs=30, patience=3, img_size=640, data_dir='input', output_dir='./output', species_list=None):
18
18
  """
19
19
  Main function to run the entire training pipeline.
20
20
  Sets up datasets, model, training process and handles errors.
@@ -144,17 +144,17 @@ def train_multitask(batch_size=4, epochs=30, patience=3, img_size=640, data_dir=
144
144
  def get_taxonomy(species_list):
145
145
  """
146
146
  Retrieves taxonomic information for a list of species from GBIF API.
147
- Creates a hierarchical taxonomy dictionary with order, family, and species relationships.
147
+ Creates a hierarchical taxonomy dictionary with family, genus, and species relationships.
148
148
  """
149
149
  taxonomy = {1: [], 2: {}, 3: {}}
150
- species_to_family = {}
151
- family_to_order = {}
150
+ species_to_genus = {}
151
+ genus_to_family = {}
152
152
 
153
153
  logger.info(f"Building taxonomy from GBIF for {len(species_list)} species")
154
154
 
155
155
  print("\nTaxonomy Results:")
156
156
  print("-" * 80)
157
- print(f"{'Species':<30} {'Order':<20} {'Family':<20} {'Status'}")
157
+ print(f"{'Species':<30} {'Family':<20} {'Genus':<20} {'Status'}")
158
158
  print("-" * 80)
159
159
 
160
160
  for species_name in species_list:
@@ -165,23 +165,23 @@ def get_taxonomy(species_list):
165
165
 
166
166
  if data.get('status') == 'ACCEPTED' or data.get('status') == 'SYNONYM':
167
167
  family = data.get('family')
168
- order = data.get('order')
168
+ genus = data.get('genus')
169
169
 
170
- if family and order:
170
+ if family and genus:
171
171
  status = "OK"
172
172
 
173
- print(f"{species_name:<30} {order:<20} {family:<20} {status}")
173
+ print(f"{species_name:<30} {family:<20} {genus:<20} {status}")
174
174
 
175
- species_to_family[species_name] = family
176
- family_to_order[family] = order
175
+ species_to_genus[species_name] = genus
176
+ genus_to_family[genus] = family
177
177
 
178
- if order not in taxonomy[1]:
179
- taxonomy[1].append(order)
178
+ if family not in taxonomy[1]:
179
+ taxonomy[1].append(family)
180
180
 
181
- taxonomy[2][family] = order
182
- taxonomy[3][species_name] = family
181
+ taxonomy[2][genus] = family
182
+ taxonomy[3][species_name] = genus
183
183
  else:
184
- error_msg = f"Species '{species_name}' found in GBIF but family and order not found, could be spelling error in species, check GBIF"
184
+ error_msg = f"Species '{species_name}' found in GBIF but family and genus not found, could be spelling error in species, check GBIF"
185
185
  logger.error(error_msg)
186
186
  print(f"{species_name:<30} {'Not found':<20} {'Not found':<20} ERROR")
187
187
  print(f"Error: {error_msg}")
@@ -203,23 +203,23 @@ def get_taxonomy(species_list):
203
203
  taxonomy[1] = sorted(list(set(taxonomy[1])))
204
204
  print("-" * 80)
205
205
 
206
- num_orders = len(taxonomy[1])
207
- num_families = len(taxonomy[2])
206
+ num_families = len(taxonomy[1])
207
+ num_genera = len(taxonomy[2])
208
208
  num_species = len(taxonomy[3])
209
209
 
210
- print("\nOrder indices:")
211
- for i, order in enumerate(taxonomy[1]):
212
- print(f" {i}: {order}")
213
-
214
210
  print("\nFamily indices:")
215
- for i, family in enumerate(taxonomy[2].keys()):
211
+ for i, family in enumerate(taxonomy[1]):
216
212
  print(f" {i}: {family}")
217
213
 
214
+ print("\nGenus indices:")
215
+ for i, genus in enumerate(taxonomy[2].keys()):
216
+ print(f" {i}: {genus}")
217
+
218
218
  print("\nSpecies indices:")
219
219
  for i, species in enumerate(species_list):
220
220
  print(f" {i}: {species}")
221
221
 
222
- logger.info(f"Taxonomy built: {num_orders} orders, {num_families} families, {num_species} species")
222
+ logger.info(f"Taxonomy built: {num_families} families, {num_genera} genera, {num_species} species")
223
223
  return taxonomy
224
224
 
225
225
  def get_species_from_directory(train_dir):
@@ -276,15 +276,15 @@ class InsectDataset(Dataset):
276
276
  self.level_to_idx = level_to_idx
277
277
  self.samples = []
278
278
 
279
- species_to_family = {species: family for species, family in taxonomy[3].items()}
280
- family_to_order = {family: order for family, order in taxonomy[2].items()}
279
+ species_to_genus = {species: genus for species, genus in taxonomy[3].items()}
280
+ genus_to_family = {genus: family for genus, family in taxonomy[2].items()}
281
281
 
282
282
  for species_name in os.listdir(root_dir):
283
283
  species_path = os.path.join(root_dir, species_name)
284
284
  if os.path.isdir(species_path):
285
- if species_name in species_to_family:
286
- family_name = species_to_family[species_name]
287
- order_name = family_to_order[family_name]
285
+ if species_name in species_to_genus:
286
+ genus_name = species_to_genus[species_name]
287
+ family_name = genus_to_family[genus_name]
288
288
 
289
289
  for img_file in os.listdir(species_path):
290
290
  if img_file.endswith(('.jpg', '.png', '.jpeg')):
@@ -296,7 +296,7 @@ class InsectDataset(Dataset):
296
296
  # Only add valid images to samples
297
297
  self.samples.append({
298
298
  'image_path': img_path,
299
- 'labels': [order_name, family_name, species_name]
299
+ 'labels': [family_name, genus_name, species_name]
300
300
  })
301
301
 
302
302
  except Exception as e:
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.3
2
+ Name: bplusplus
3
+ Version: 1.2.3
4
+ Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
5
+ License: MIT
6
+ Author: Titus Venverloo
7
+ Author-email: tvenver@mit.edu
8
+ Requires-Python: >=3.9.0,<4.0.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: numpy
17
+ Requires-Dist: pandas (==2.1.4)
18
+ Requires-Dist: pillow
19
+ Requires-Dist: prettytable (==3.7.0)
20
+ Requires-Dist: pygbif (>=0.6.4,<0.7.0)
21
+ Requires-Dist: pyyaml (==6.0.1)
22
+ Requires-Dist: requests (==2.25.1)
23
+ Requires-Dist: scikit-learn
24
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0)
25
+ Requires-Dist: torch (>=2.5.0,<3.0.0)
26
+ Requires-Dist: torchvision
27
+ Requires-Dist: tqdm (==4.66.4)
28
+ Requires-Dist: ultralytics (>=8.3.0)
29
+ Requires-Dist: validators (>=0.33.0,<0.34.0)
30
+ Description-Content-Type: text/markdown
31
+
32
+ # Domain-Agnostic Insect Classification Pipeline
33
+
34
+ This project provides a complete, end-to-end pipeline for building a custom insect classification system. The framework is designed to be **domain-agnostic**, allowing you to train a powerful detection and classification model for **any insect species** by simply providing a list of names.
35
+
36
+ Using the `Bplusplus` library, this pipeline automates the entire machine learning workflow, from data collection to video inference.
37
+
38
+ ## Key Features
39
+
40
+ - **Automated Data Collection**: Downloads hundreds of images for any species from the GBIF database.
41
+ - **Intelligent Data Preparation**: Uses a pre-trained model to automatically find, crop, and resize insects from raw images, ensuring high-quality training data.
42
+ - **Hierarchical Classification**: Trains a model to identify insects at three taxonomic levels: **family, genus, and species**.
43
+ - **Video Inference & Tracking**: Processes video files to detect, classify, and track individual insects over time, providing aggregated predictions.
44
+ ## Pipeline Overview
45
+
46
+ The process is broken down into six main steps, all detailed in the `full_pipeline.ipynb` notebook:
47
+
48
+ 1. **Collect Data**: Select your target species and fetch raw insect images from the web.
49
+ 2. **Prepare Data**: Filter, clean, and prepare images for training.
50
+ 3. **Train Model**: Train the hierarchical classification model.
51
+ 4. **Download Weights**: Fetch pre-trained weights for the detection model.
52
+ 5. **Test Model**: Evaluate the performance of the trained model.
53
+ 6. **Run Inference**: Run the full pipeline on a video file for real-world application.
54
+
55
+ ## How to Use
56
+
57
+ ### Prerequisites
58
+
59
+ - Python 3.8+
60
+ - `venv` for creating a virtual environment (recommended)
61
+
62
+ ### Setup
63
+
64
+ 1. **Create and activate a virtual environment:**
65
+ ```bash
66
+ python3 -m venv venv
67
+ source venv/bin/activate
68
+ ```
69
+
70
+ 2. **Install the required packages:**
71
+ ```bash
72
+ pip install bplusplus
73
+ ```
74
+
75
+ ### Running the Pipeline
76
+
77
+ The entire workflow is contained within **`full_pipeline.ipynb`**. Open it with a Jupyter Notebook or JupyterLab environment and run the cells sequentially to execute the full pipeline.
78
+
79
+ ### Customization
80
+
81
+ To train the model on different insect species, simply modify the `names` list in **Step 1** of the notebook:
82
+
83
+ ```python
84
+ # a/full_pipeline.ipynb
85
+
86
+ # To use your own species, change the names in this list
87
+ names = [
88
+ "Vespa crabro", "Vespula vulgaris", "Dolichovespula media"
89
+ ]
90
+ ```
91
+
92
+ The pipeline will automatically handle the rest, from data collection to training, for your new set of species.
93
+
94
+ ## Directory Structure
95
+
96
+ The pipeline will create the following directories to store artifacts:
97
+
98
+ - `GBIF_data/`: Stores the raw images downloaded from GBIF.
99
+ - `prepared_data/`: Contains the cleaned, cropped, and resized images ready for training.
100
+ - `trained_model/`: Saves the trained model weights (`best_multitask.pt`) and pre-trained detection weights.
101
+
@@ -0,0 +1,11 @@
1
+ bplusplus/__init__.py,sha256=GLqIx6Ln3Jt_Q95zkqXglKaSF3dbw-awax4dYry3tw0,145
2
+ bplusplus/collect.py,sha256=lEJHXPpOo4DALBw6zemdmFuqAXZ12-BKwgesvq5ACYs,7135
3
+ bplusplus/inference.py,sha256=3XmwzEfVTw5OFiMbMVgiuEa-r22HvMUWHVXESZsTIzo,37708
4
+ bplusplus/prepare.py,sha256=pdXUVAzY030tM6f0Gf_zml8I26lS38wuvH13R2F00Do,25829
5
+ bplusplus/test.py,sha256=kKjrsb3iCfljtRjot_kiVB5hopMkApoW9yvMcuI2O_U,30545
6
+ bplusplus/tracker.py,sha256=JixV1ICGywGhVMTvkq3hrk4MLUUWDh3XJW4VLm4JdO0,11250
7
+ bplusplus/train.py,sha256=wkHnKbTdZAFn2voJS7gSYXU7B9UVYVYmbTJCR0tFzs4,28058
8
+ bplusplus-1.2.3.dist-info/LICENSE,sha256=rRkeHptDnlmviR0_WWgNT9t696eys_cjfVUU8FEO4k4,1071
9
+ bplusplus-1.2.3.dist-info/METADATA,sha256=IDnokwF2CEyM_3xLmlRL30k2P9NglDjdjbxC7-UZoc4,4046
10
+ bplusplus-1.2.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
11
+ bplusplus-1.2.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any