PyPI - bplusplus - Versions diffs - 1.2.1__tar.gz → 1.2.2__tar.gz - Mend

bplusplus 1.2.1tar.gz → 1.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bplusplus might be problematic. Click here for more details.

Files changed (12) hide show

{bplusplus-1.2.1 → bplusplus-1.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: bplusplus
-Version: 1.2.1
+Version: 1.2.2
 Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
 License: MIT
 Author: Titus Venverloo
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Requires-Dist: prettytable (==3.7.0)
 Requires-Dist: pygbif (>=0.6.4,<0.7.0)
 Requires-Dist: requests (==2.25.1)
@@ -111,14 +112,21 @@ This function takes three arguments:
 - **sizes: list = None** - List of sizes to filter by. If None, all sizes will be used, ["large", "medium", "small"].
 ```python
-# Prepare data
+# Prepare data (one stage small insects)
 bplusplus.prepare(
     input_directory='/dataset/selected-species',
     output_directory='/dataset/prepared-data',
-    with_background=False,
-    one_stage=False,
+    with_background=True,
+    one_stage=True,
     size_filter=True,
-    sizes=["large"]
+    sizes=["small"]
+)
+# Prepare data (two stage)
+bplusplus.prepare(
+    input_directory='/dataset/selected-species',
+    output_directory='/dataset/prepared-data',
+    one_stage=False
 )
 ```

{bplusplus-1.2.1 → bplusplus-1.2.2}/README.md RENAMED Viewed

@@ -87,14 +87,21 @@ This function takes three arguments:
 - **sizes: list = None** - List of sizes to filter by. If None, all sizes will be used, ["large", "medium", "small"].
 ```python
-# Prepare data
+# Prepare data (one stage small insects)
 bplusplus.prepare(
     input_directory='/dataset/selected-species',
     output_directory='/dataset/prepared-data',
-    with_background=False,
-    one_stage=False,
+    with_background=True,
+    one_stage=True,
     size_filter=True,
-    sizes=["large"]
+    sizes=["small"]
+)
+# Prepare data (two stage)
+bplusplus.prepare(
+    input_directory='/dataset/selected-species',
+    output_directory='/dataset/prepared-data',
+    one_stage=False
 )
 ```

{bplusplus-1.2.1 → bplusplus-1.2.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bplusplus"
-version = "1.2.1"
+version = "1.2.2"
 description = "A simple method to create AI models for biodiversity, with collect and prepare pipeline"
 authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlando.closs@wur.nl>", "Ase Hatveit <aase@mit.edu>"]
 license = "MIT"

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/collect.py RENAMED Viewed

@@ -114,6 +114,8 @@ def __next_batch(parameters: dict[str, Any], total_limit: int, offset: int, curr
         parameters["limit"] = total_limit
         parameters["offset"] = offset
         parameters["mediaType"] = ["StillImage"]
+        parameters["basisOfRecord"] = ["HUMAN_OBSERVATION", "LIVING_SPECIMEN", "MACHINE_OBSERVATION", "OBSERVATION", "OCCURRENCE"]
+        parameters["lifeStage"] = ["Adult"]
         search = pygbif.occurrences.search(**parameters)
         occurrences = search["results"]
         if search["endOfRecords"] or len(current) >= total_limit:

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/hierarchical/test.py RENAMED Viewed

@@ -115,17 +115,17 @@ class HierarchicalInsectClassifier(nn.Module):
 def get_taxonomy(species_list):
     """
     Retrieves taxonomic information for a list of species from GBIF API.
-    Creates a hierarchical taxonomy dictionary with order, family, and species relationships.
+    Creates a hierarchical taxonomy dictionary with family, genus, and species relationships.
     """
     taxonomy = {1: [], 2: {}, 3: {}}
-    species_to_family = {}
-    family_to_order = {}
+    species_to_genus = {}
+    genus_to_family = {}
     logger.info(f"Building taxonomy from GBIF for {len(species_list)} species")
     print("\nTaxonomy Results:")
     print("-" * 80)
-    print(f"{'Species':<30} {'Order':<20} {'Family':<20} {'Status'}")
+    print(f"{'Species':<30} {'Family':<20} {'Genus':<20} {'Status'}")
     print("-" * 80)
     for species_name in species_list:
@@ -136,23 +136,23 @@ def get_taxonomy(species_list):
             if data.get('status') == 'ACCEPTED' or data.get('status') == 'SYNONYM':
                 family = data.get('family')
-                order = data.get('order')
+                genus = data.get('genus')
-                if family and order:
+                if family and genus:
                     status = "OK"
-                    print(f"{species_name:<30} {order:<20} {family:<20} {status}")
+                    print(f"{species_name:<30} {family:<20} {genus:<20} {status}")
-                    species_to_family[species_name] = family
-                    family_to_order[family] = order
+                    species_to_genus[species_name] = genus
+                    genus_to_family[genus] = family
-                    if order not in taxonomy[1]:
-                        taxonomy[1].append(order)
+                    if family not in taxonomy[1]:
+                        taxonomy[1].append(family)
-                    taxonomy[2][family] = order
-                    taxonomy[3][species_name] = family
+                    taxonomy[2][genus] = family
+                    taxonomy[3][species_name] = genus
                 else:
-                    error_msg = f"Species '{species_name}' found in GBIF but family and order not found, could be spelling error in species, check GBIF"
+                    error_msg = f"Species '{species_name}' found in GBIF but family and genus not found, could be spelling error in species, check GBIF"
                     logger.error(error_msg)
                     print(f"{species_name:<30} {'Not found':<20} {'Not found':<20} ERROR")
                     print(f"Error: {error_msg}")
@@ -174,24 +174,24 @@ def get_taxonomy(species_list):
     taxonomy[1] = sorted(list(set(taxonomy[1])))
     print("-" * 80)
-    num_orders = len(taxonomy[1])
-    num_families = len(taxonomy[2])
+    num_families = len(taxonomy[1])
+    num_genera = len(taxonomy[2])
     num_species = len(taxonomy[3])
-    print("\nOrder indices:")
-    for i, order in enumerate(taxonomy[1]):
-        print(f"  {i}: {order}")
     print("\nFamily indices:")
-    for i, family in enumerate(taxonomy[2].keys()):
+    for i, family in enumerate(taxonomy[1]):
         print(f"  {i}: {family}")
+    print("\nGenus indices:")
+    for i, genus in enumerate(taxonomy[2].keys()):
+        print(f"  {i}: {genus}")
     print("\nSpecies indices:")
     for i, species in enumerate(species_list):
         print(f"  {i}: {species}")
-    logger.info(f"Taxonomy built: {num_orders} orders, {num_families} families, {num_species} species")
-    return taxonomy, species_to_family, family_to_order
+    logger.info(f"Taxonomy built: {num_families} families, {num_genera} genera, {num_species} species")
+    return taxonomy, species_to_genus, genus_to_family
 def create_mappings(taxonomy):
     """Create index mappings from taxonomy"""
@@ -244,12 +244,12 @@ class TestTwoStage:
                     saved_species = checkpoint["species_list"]
                     print(f"Saved model was trained on: {', '.join(saved_species)}")
-                taxonomy, species_to_family, family_to_order = get_taxonomy(species_names)
+                taxonomy, species_to_genus, genus_to_family = get_taxonomy(species_names)
             else:
-                taxonomy, species_to_family, family_to_order = get_taxonomy(species_names)
+                taxonomy, species_to_genus, genus_to_family = get_taxonomy(species_names)
         else:
             state_dict = checkpoint
-            taxonomy, species_to_family, family_to_order = get_taxonomy(species_names)
+            taxonomy, species_to_genus, genus_to_family = get_taxonomy(species_names)
         level_to_idx, idx_to_level = create_mappings(taxonomy)
@@ -259,8 +259,6 @@ class TestTwoStage:
         if hasattr(taxonomy, "items"):
             num_classes_per_level = [len(classes) if isinstance(classes, list) else len(classes.keys())
                                     for level, classes in taxonomy.items()]
-        else:
-            num_classes_per_level = [4, 5, 9]  # Example values, adjust as needed
         print(f"Using model with class counts: {num_classes_per_level}")
@@ -296,8 +294,8 @@ class TestTwoStage:
         print("Model successfully loaded")
         print(f"Using species: {', '.join(species_names)}")
-        self.species_to_family = species_to_family
-        self.family_to_order = family_to_order
+        self.species_to_genus = species_to_genus
+        self.genus_to_family = genus_to_family
     def get_frames(self, test_dir):
         image_dir = os.path.join(test_dir, "images")
@@ -305,10 +303,10 @@ class TestTwoStage:
         predicted_frames = []
         predicted_family_frames = []
-        predicted_order_frames = []
+        predicted_genus_frames = []
         true_species_frames = []
         true_family_frames = []
-        true_order_frames = []
+        true_genus_frames = []
         image_names = []
         start_time = time.time()  # Start timing
@@ -326,7 +324,7 @@ class TestTwoStage:
             detections = results[0].boxes
             predicted_frame = []
             predicted_family_frame = []
-            predicted_order_frame = []
+            predicted_genus_frame = []
             if detections:
                 for box in detections:
@@ -346,13 +344,13 @@ class TestTwoStage:
                         outputs = self.classification_model(input_tensor)
                     # Get all taxonomic level predictions
-                    order_output = outputs[0]    # First output is order (level 1)
-                    family_output = outputs[1]   # Second output is family (level 2)
+                    family_output = outputs[0]   # First output is family (level 1)
+                    genus_output = outputs[1]    # Second output is genus (level 2)
                     species_output = outputs[2]  # Third output is species (level 3)
                     # Get prediction indices
-                    order_idx = order_output.argmax(dim=1).item()
                     family_idx = family_output.argmax(dim=1).item()
+                    genus_idx = genus_output.argmax(dim=1).item()
                     species_idx = species_output.argmax(dim=1).item()
                     img_height, img_width, _ = frame.shape
@@ -367,15 +365,15 @@ class TestTwoStage:
                     # Add predictions for each taxonomic level
                     predicted_frame.append([species_idx] + box_coords)
                     predicted_family_frame.append([family_idx] + box_coords)
-                    predicted_order_frame.append([order_idx] + box_coords)
+                    predicted_genus_frame.append([genus_idx] + box_coords)
             predicted_frames.append(predicted_frame if predicted_frame else [])
             predicted_family_frames.append(predicted_family_frame if predicted_family_frame else [])
-            predicted_order_frames.append(predicted_order_frame if predicted_order_frame else [])
+            predicted_genus_frames.append(predicted_genus_frame if predicted_genus_frame else [])
             true_species_frame = []
             true_family_frame = []
-            true_order_frame = []
+            true_genus_frame = []
             if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
                 with open(label_path, 'r') as f:
@@ -389,22 +387,22 @@ class TestTwoStage:
                         if species_idx < len(self.species_names):
                             species_name = self.species_names[species_idx]
-                            if species_name in self.species_to_family:
-                                family_name = self.species_to_family[species_name]
-                                # Get the index of the family in the level_to_idx mapping
-                                if 2 in self.level_to_idx and family_name in self.level_to_idx[2]:
-                                    family_idx = self.level_to_idx[2][family_name]
-                                    true_family_frame.append([family_idx] + box_coords)
+                            if species_name in self.species_to_genus:
+                                genus_name = self.species_to_genus[species_name]
+                                # Get the index of the genus in the level_to_idx mapping
+                                if 2 in self.level_to_idx and genus_name in self.level_to_idx[2]:
+                                    genus_idx = self.level_to_idx[2][genus_name]
+                                    true_genus_frame.append([genus_idx] + box_coords)
-                                if family_name in self.family_to_order:
-                                    order_name = self.family_to_order[family_name]
-                                    if 1 in self.level_to_idx and order_name in self.level_to_idx[1]:
-                                        order_idx = self.level_to_idx[1][order_name]
-                                        true_order_frame.append([order_idx] + box_coords)
+                                if genus_name in self.genus_to_family:
+                                    family_name = self.genus_to_family[genus_name]
+                                    if 1 in self.level_to_idx and family_name in self.level_to_idx[1]:
+                                        family_idx = self.level_to_idx[1][family_name]
+                                        true_family_frame.append([family_idx] + box_coords)
             true_species_frames.append(true_species_frame if true_species_frame else [])
             true_family_frames.append(true_family_frame if true_family_frame else [])
-            true_order_frames.append(true_order_frame if true_order_frame else [])
+            true_genus_frames.append(true_genus_frame if true_genus_frame else [])
         end_time = time.time()  # End timing
@@ -416,42 +414,42 @@ class TestTwoStage:
             writer.writerow([
                 "Image Name",
                 "True Species Detections",
+                "True Genus Detections",
                 "True Family Detections",
-                "True Order Detections",
                 "Species Detections",
-                "Family Detections",
-                "Order Detections"
+                "Genus Detections",
+                "Family Detections"
             ])
-            for image_name, true_species, true_family, true_order, species_pred, family_pred, order_pred in zip(
+            for image_name, true_species, true_genus, true_family, species_pred, genus_pred, family_pred in zip(
                 image_names,
                 true_species_frames,
+                true_genus_frames,
                 true_family_frames,
-                true_order_frames,
                 predicted_frames,
-                predicted_family_frames,
-                predicted_order_frames
+                predicted_genus_frames,
+                predicted_family_frames
             ):
                 writer.writerow([
                     image_name,
                     true_species,
+                    true_genus,
                     true_family,
-                    true_order,
                     species_pred,
-                    family_pred,
-                    order_pred
+                    genus_pred,
+                    family_pred
                 ])
         print(f"Results saved to {output_file}")
-        return predicted_frames, true_species_frames, end_time - start_time, predicted_family_frames, predicted_order_frames, true_family_frames, true_order_frames
+        return predicted_frames, true_species_frames, end_time - start_time, predicted_genus_frames, predicted_family_frames, true_genus_frames, true_family_frames
     def run(self, test_dir):
         results = self.get_frames(test_dir)
         predicted_frames, true_species_frames, total_time = results[0], results[1], results[2]
-        predicted_family_frames = results[3]
-        predicted_order_frames = results[4]
-        true_family_frames = results[5]
-        true_order_frames = results[6]
+        predicted_genus_frames = results[3]
+        predicted_family_frames = results[4]
+        true_genus_frames = results[5]
+        true_family_frames = results[6]
         num_frames = len(os.listdir(os.path.join(test_dir, 'images')))
         avg_time_per_frame = total_time / num_frames
@@ -461,29 +459,29 @@ class TestTwoStage:
         self.calculate_metrics(
             predicted_frames, true_species_frames,
-            predicted_family_frames, true_family_frames,
-            predicted_order_frames, true_order_frames
+            predicted_genus_frames, true_genus_frames,
+            predicted_family_frames, true_family_frames
         )
     def calculate_metrics(self, predicted_species_frames, true_species_frames,
-                         predicted_family_frames, true_family_frames,
-                         predicted_order_frames, true_order_frames):
+                         predicted_genus_frames, true_genus_frames,
+                         predicted_family_frames, true_family_frames):
         """Calculate metrics at all taxonomic levels"""
-        # Get list of species, families and orders
+        # Get list of species, families and genera
         species_list = self.species_names
-        family_list = sorted(list(set(self.species_to_family.values())))
-        order_list = sorted(list(set(self.family_to_order.values())))
+        genus_list = sorted(list(set(self.species_to_genus.values())))
+        family_list = sorted(list(set(self.genus_to_family.values())))
         # Print the index mappings we're using for evaluation
         print("\nUsing the following index mappings for evaluation:")
-        print("\nOrder indices:")
-        for i, order in enumerate(order_list):
-            print(f"  {i}: {order}")
         print("\nFamily indices:")
         for i, family in enumerate(family_list):
             print(f"  {i}: {family}")
+        print("\nGenus indices:")
+        for i, genus in enumerate(genus_list):
+            print(f"  {i}: {genus}")
         print("\nSpecies indices:")
         for i, species in enumerate(species_list):
             print(f"  {i}: {species}")
@@ -491,11 +489,11 @@ class TestTwoStage:
         # Dictionary to track prediction category counts for debugging
         prediction_counts = {
             "true_species_boxes": sum(len(frame) for frame in true_species_frames),
+            "true_genus_boxes": sum(len(frame) for frame in true_genus_frames),
             "true_family_boxes": sum(len(frame) for frame in true_family_frames),
-            "true_order_boxes": sum(len(frame) for frame in true_order_frames),
             "predicted_species": sum(len(frame) for frame in predicted_species_frames),
-            "predicted_family": sum(len(frame) for frame in predicted_family_frames),
-            "predicted_order": sum(len(frame) for frame in predicted_order_frames)
+            "predicted_genus": sum(len(frame) for frame in predicted_genus_frames),
+            "predicted_family": sum(len(frame) for frame in predicted_family_frames)
         }
         print(f"Prediction counts: {prediction_counts}")
@@ -504,11 +502,11 @@ class TestTwoStage:
         print("\n=== Species-level Metrics ===")
         self.get_metrics(predicted_species_frames, true_species_frames, species_list)
+        print("\n=== Genus-level Metrics ===")
+        self.get_metrics(predicted_genus_frames, true_genus_frames, genus_list)
         print("\n=== Family-level Metrics ===")
         self.get_metrics(predicted_family_frames, true_family_frames, family_list)
-        print("\n=== Order-level Metrics ===")
-        self.get_metrics(predicted_order_frames, true_order_frames, order_list)
     def get_metrics(self, predicted_frames, true_frames, labels):
         """Calculate metrics for object detection predictions"""

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/hierarchical/train.py RENAMED Viewed

@@ -144,17 +144,17 @@ def train_multitask(batch_size=4, epochs=30, patience=3, img_size=640, data_dir=
 def get_taxonomy(species_list):
     """
     Retrieves taxonomic information for a list of species from GBIF API.
-    Creates a hierarchical taxonomy dictionary with order, family, and species relationships.
+    Creates a hierarchical taxonomy dictionary with family, genus, and species relationships.
     """
     taxonomy = {1: [], 2: {}, 3: {}}
-    species_to_family = {}
-    family_to_order = {}
+    species_to_genus = {}
+    genus_to_family = {}
     logger.info(f"Building taxonomy from GBIF for {len(species_list)} species")
     print("\nTaxonomy Results:")
     print("-" * 80)
-    print(f"{'Species':<30} {'Order':<20} {'Family':<20} {'Status'}")
+    print(f"{'Species':<30} {'Family':<20} {'Genus':<20} {'Status'}")
     print("-" * 80)
     for species_name in species_list:
@@ -165,23 +165,23 @@ def get_taxonomy(species_list):
             if data.get('status') == 'ACCEPTED' or data.get('status') == 'SYNONYM':
                 family = data.get('family')
-                order = data.get('order')
+                genus = data.get('genus')
-                if family and order:
+                if family and genus:
                     status = "OK"
-                    print(f"{species_name:<30} {order:<20} {family:<20} {status}")
+                    print(f"{species_name:<30} {family:<20} {genus:<20} {status}")
-                    species_to_family[species_name] = family
-                    family_to_order[family] = order
+                    species_to_genus[species_name] = genus
+                    genus_to_family[genus] = family
-                    if order not in taxonomy[1]:
-                        taxonomy[1].append(order)
+                    if family not in taxonomy[1]:
+                        taxonomy[1].append(family)
-                    taxonomy[2][family] = order
-                    taxonomy[3][species_name] = family
+                    taxonomy[2][genus] = family
+                    taxonomy[3][species_name] = genus
                 else:
-                    error_msg = f"Species '{species_name}' found in GBIF but family and order not found, could be spelling error in species, check GBIF"
+                    error_msg = f"Species '{species_name}' found in GBIF but family and genus not found, could be spelling error in species, check GBIF"
                     logger.error(error_msg)
                     print(f"{species_name:<30} {'Not found':<20} {'Not found':<20} ERROR")
                     print(f"Error: {error_msg}")
@@ -203,23 +203,23 @@ def get_taxonomy(species_list):
     taxonomy[1] = sorted(list(set(taxonomy[1])))
     print("-" * 80)
-    num_orders = len(taxonomy[1])
-    num_families = len(taxonomy[2])
+    num_families = len(taxonomy[1])
+    num_genera = len(taxonomy[2])
     num_species = len(taxonomy[3])
-    print("\nOrder indices:")
-    for i, order in enumerate(taxonomy[1]):
-        print(f"  {i}: {order}")
     print("\nFamily indices:")
-    for i, family in enumerate(taxonomy[2].keys()):
+    for i, family in enumerate(taxonomy[1]):
         print(f"  {i}: {family}")
+    print("\nGenus indices:")
+    for i, genus in enumerate(taxonomy[2].keys()):
+        print(f"  {i}: {genus}")
     print("\nSpecies indices:")
     for i, species in enumerate(species_list):
         print(f"  {i}: {species}")
-    logger.info(f"Taxonomy built: {num_orders} orders, {num_families} families, {num_species} species")
+    logger.info(f"Taxonomy built: {num_families} families, {num_genera} genera, {num_species} species")
     return taxonomy
 def get_species_from_directory(train_dir):
@@ -276,15 +276,15 @@ class InsectDataset(Dataset):
         self.level_to_idx = level_to_idx
         self.samples = []
-        species_to_family = {species: family for species, family in taxonomy[3].items()}
-        family_to_order = {family: order for family, order in taxonomy[2].items()}
+        species_to_genus = {species: genus for species, genus in taxonomy[3].items()}
+        genus_to_family = {genus: family for genus, family in taxonomy[2].items()}
         for species_name in os.listdir(root_dir):
             species_path = os.path.join(root_dir, species_name)
             if os.path.isdir(species_path):
-                if species_name in species_to_family:
-                    family_name = species_to_family[species_name]
-                    order_name = family_to_order[family_name]
+                if species_name in species_to_genus:
+                    genus_name = species_to_genus[species_name]
+                    family_name = genus_to_family[genus_name]
                     for img_file in os.listdir(species_path):
                         if img_file.endswith(('.jpg', '.png', '.jpeg')):
@@ -296,7 +296,7 @@ class InsectDataset(Dataset):
                                 # Only add valid images to samples
                                 self.samples.append({
                                     'image_path': img_path,
-                                    'labels': [order_name, family_name, species_name]
+                                    'labels': [family_name, genus_name, species_name]
                                 })
                             except Exception as e:

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/prepare.py RENAMED Viewed

@@ -94,19 +94,73 @@ def prepare(input_directory: str, output_directory: str, one_stage: bool = False
             DetectionModel, Sequential, Conv, Conv2d, BatchNorm2d,
             SiLU, ReLU, LeakyReLU, MaxPool2d, Linear, Dropout, Upsample,
             Module, ModuleList, ModuleDict,
-            Bottleneck, C2f, SPPF, Detect, Concat, DFL
+            Bottleneck, C2f, SPPF, Detect, Concat, DFL,
+            # Add torch internal classes
+            torch.nn.parameter.Parameter,
+            torch.Tensor,
+            torch._utils._rebuild_tensor_v2,
+            torch._utils._rebuild_parameter
         ])
-        model = YOLO(weights_path)
-        model.predict(images_path, conf=0.25, save=True, save_txt=True, project=temp_dir_path)
-        labels_path = temp_dir_path / "predict" / "labels"
-        if size_filter and len(sizes) <= 2:
-            filtered=filter_by_size(images_path, labels_path, sizes)
-            print(f"\nFiltered {len(list(images_path.glob('*.jpg')))} images by size out of {original_image_count} input images.\n NOTE: Some images may be filtered due to corruption or inaccurate labels.")
+        labels_path = temp_dir_path / "predict" / "labels"
+        try:
+            print(f"Loading YOLO model from {weights_path}")
+            model = YOLO(weights_path)
+            # Get list of all image files
+            image_files = list(images_path.glob('*.jpg'))
+            print(f"Found {len(image_files)} images to process")
+            # Ensure predict directory exists
+            predict_dir = temp_dir_path / "predict"
+            predict_dir.mkdir(exist_ok=True)
+            labels_path.mkdir(parents=True, exist_ok=True)
+            result_count = 0
+            error_count = 0
+            for img_path in image_files:
+                try:
+                    results = model.predict(
+                        source=str(img_path),
+                        conf=0.5,
+                        save=True,
+                        save_txt=True,
+                        project=temp_dir_path,
+                        name="predict",
+                        exist_ok=True,
+                        verbose=True
+                    )
+                    result_count += 1
+                except Exception as e:
+                    error_count += 1
+                    print(f"Error processing {img_path.name}: {e}")
+                    continue
+            print(f"Model prediction completed: {result_count} successful, {error_count} failed")
+            print(f"Checking for labels in {labels_path}")
+            # Verify labels were created
+            label_files = list(labels_path.glob("*.txt"))
+            print(f"Found {len(label_files)} label files")
+            if len(label_files) == 0:
+                print("WARNING: No label files were created by the model prediction!")
+        except Exception as e:
+            print(f"Error during model prediction setup: {e}")
+            import traceback
+            traceback.print_exc()
         if one_stage:
+            if size_filter and len(sizes) <= 2:
+                __filter_by_size(images_path, labels_path, sizes)
+                print(f"\nFiltered {len(list(images_path.glob('*.jpg')))} images by size out of {original_image_count} input images.\n NOTE: Some images may be filtered due to corruption or inaccurate labels.")
             __delete_orphaned_images_and_inferences(images_path, labels_path)
             __delete_invalid_txt_files(images_path, labels_path)
             class_idxs = update_labels(class_mapping, labels_path)
@@ -142,15 +196,17 @@ def prepare(input_directory: str, output_directory: str, one_stage: bool = False
             __make_yaml_file(output_directory, class_idxs)
         else:
-            try:
-                sized_dir = temp_dir_path / "sized"
-                sized_dir.mkdir(parents=True, exist_ok=True)
-                __two_stage_update(class_mapping, filtered, sized_dir, images_path)
-                __classification_split(sized_dir, output_directory)
-                __count_classification_split(output_directory, class_mapping)
-            except:
-                __classification_split(images_path, output_directory)
-                __count_classification_split(output_directory, class_mapping)
+            # try:
+            #     sized_dir = temp_dir_path / "sized"
+            #     sized_dir.mkdir(parents=True, exist_ok=True)
+            #     __two_stage_update(class_mapping, filtered, sized_dir, images_path)
+            #     __classification_split(sized_dir, output_directory)
+            #     __count_classification_split(output_directory, class_mapping)
+            # except:
+            __delete_orphaned_images_and_inferences(images_path, labels_path)
+            __delete_invalid_txt_files(images_path, labels_path)
+            __classification_split(images_path, labels_path, output_directory, class_mapping)
+            __count_classification_split(output_directory, class_mapping)
 def __count_classification_split(output_directory: str, class_mapping: dict):
     """
@@ -186,17 +242,21 @@ def __count_classification_split(output_directory: str, class_mapping: dict):
             class_counts[class_name]
         ])
     print(table)
-    print(f"Saved in {output_directory}")
+    # print(f"Saved in {output_directory}")
-def __classification_split(input_directory: str, output_directory: str):
+def __classification_split(input_directory: str, labels_directory: str, output_directory: str, class_mapping: dict):
     """
-    Splits the data into train and validation sets for classification tasks.
+    Splits the data into train and validation sets for classification tasks,
+    cropping images according to their YOLO labels but preserving original class structure.
     Args:
-        input_directory (str): Path to the input directory containing subdirectories of class names.
+        input_directory (str): Path to the input directory containing images.
+        labels_directory (str): Path to the directory containing YOLO label files.
         output_directory (str): Path to the output directory where train and valid splits will be created.
+        class_mapping (dict): Dictionary mapping class names to image file names.
     """
     input_directory = Path(input_directory)
+    labels_directory = Path(labels_directory)
     output_directory = Path(output_directory)
     # Create train and valid directories
@@ -206,45 +266,108 @@ def __classification_split(input_directory: str, output_directory: str):
     train_dir.mkdir(parents=True, exist_ok=True)
     valid_dir.mkdir(parents=True, exist_ok=True)
-    # Process each class directory
-    for class_dir in input_directory.iterdir():
-        if not class_dir.is_dir():
-            continue
-        class_name = class_dir.name
-        print(f"Processing class: {class_name}")
-        # Create corresponding class directories in train and valid
+    # Create class directories based on class_mapping
+    for class_name in class_mapping:
         (train_dir / class_name).mkdir(exist_ok=True)
         (valid_dir / class_name).mkdir(exist_ok=True)
+        print(f"Created directory for class: {class_name}")
+    # Process each class folder and its images
+    valid_images = []
+    # First, collect all valid label files
+    valid_label_stems = {label_file.stem for label_file in labels_directory.glob("*.txt")
+                        if label_file.exists() and os.path.getsize(label_file) > 0}
+    print(f"Found {len(valid_label_stems)} valid label files")
+    for class_name, image_names in class_mapping.items():
+        print(f"Processing class: {class_name} with {len(image_names)} images")
-        # Get all image files
-        image_files = list(class_dir.glob('*.jpg')) + list(class_dir.glob('*.jpeg')) + list(class_dir.glob('*.png'))
-        if not image_files:
-            print(f"Warning: No images found in {class_dir}")
-            continue
+        for image_name in image_names:
+            # Check if the image exists directly in the input directory
+            image_path = input_directory / image_name
-        # Shuffle the files to ensure random distribution
-        np.random.shuffle(image_files)
-        # Split into train (90%) and valid (10%)
-        split_idx = int(len(image_files) * 0.9)
-        train_files = image_files[:split_idx]
-        valid_files = image_files[split_idx:]
-        # Copy files to respective directories
-        for img_file in train_files:
-            shutil.copy(img_file, train_dir / class_name / img_file.name)
+            if not image_path.exists():
+                continue
+            # Skip images that don't have a valid label
+            if image_path.stem not in valid_label_stems:
+                continue
+            label_file = labels_directory / (image_path.stem + '.txt')
-        for img_file in valid_files:
-            shutil.copy(img_file, valid_dir / class_name / img_file.name)
-        print(f"  - {len(train_files)} images in train, {len(valid_files)} images in valid")
+            try:
+                img = Image.open(image_path)
+                if label_file.exists():
+                    # If label exists, crop the image
+                    with open(label_file, 'r') as f:
+                        lines = f.readlines()
+                        if lines:
+                            parts = lines[0].strip().split()
+                            if len(parts) >= 5:
+                                x_center, y_center, width, height = map(float, parts[1:5])
+                                img_width, img_height = img.size
+                                x_min = int((x_center - width/2) * img_width)
+                                y_min = int((y_center - height/2) * img_height)
+                                x_max = int((x_center + width/2) * img_width)
+                                y_max = int((y_center + height/2) * img_height)
+                                x_min = max(0, x_min)
+                                y_min = max(0, y_min)
+                                x_max = min(img_width, x_max)
+                                y_max = min(img_height, y_max)
+                                img = img.crop((x_min, y_min, x_max, y_max))
+                img_width, img_height = img.size
+                if img_width < img_height:
+                    # Width is smaller, make it 40
+                    new_width = 40
+                    new_height = int((img_height / img_width) * 40)
+                else:
+                    # Height is smaller, make it 40
+                    new_height = 40
+                    new_width = int((img_width / img_height) * 40)
+                #blur the image
+                img = img.resize((new_width, new_height), Image.LANCZOS)
+                valid_images.append((image_path, img, class_name))
+            except Exception as e:
+                print(f"Error processing {image_path}: {e}")
+    print(f"Successfully processed {len(valid_images)} valid images for classification")
+    # Shuffle and split images
+    random.shuffle(valid_images)
+    split_idx = int(len(valid_images) * 0.9)
+    train_images = valid_images[:split_idx]
+    valid_images = valid_images[split_idx:]
+    print(f"Split into {len(train_images)} training images and {len(valid_images)} validation images")
+    # Save images to train/valid directories
+    for image_set, dest_dir in [(train_images, train_dir), (valid_images, valid_dir)]:
+        for orig_file, img, class_name in image_set:
+            output_path = dest_dir / class_name / (orig_file.stem + '.jpg')
+            # Convert any non-RGB mode to RGB before saving
+            if img.mode != 'RGB':
+                img = img.convert('RGB')
+            img.save(output_path, format='JPEG', quality=95)
-    print(f"\nData split complete. Train and validation sets created in {output_directory}")
+    # Print summary
+    print(f"\nData split complete. Images saved to train and validation sets in {output_directory}")
+    for class_name in class_mapping:
+        train_count = len(list((train_dir / class_name).glob('*.*')))
+        valid_count = len(list((valid_dir / class_name).glob('*.*')))
+        print(f"  - {class_name}: {train_count} images in train, {valid_count} images in valid")
-def filter_by_size(images_path: Path, labels_path: Path, sizes: list):
+def __filter_by_size(images_path: Path, labels_path: Path, sizes: list):
     """
     Filters images by size and updates labels accordingly.
@@ -285,7 +408,6 @@ def filter_by_size(images_path: Path, labels_path: Path, sizes: list):
                 label_file.unlink()
             except FileNotFoundError:
                 pass
-    return filtered_images
 def __two_stage_update(class_mapping: dict, filtered_images: Path, output_directory: Path, images_path: Path):
     """
@@ -367,7 +489,7 @@ def __delete_orphaned_images_and_inferences(images_path: Path, labels_path: Path
         image_file_jpeg = images_path / (txt_file.stem + ".jpeg")
         if not (image_file_jpg.exists() or image_file_jpeg.exists()):
-            print(f"Deleting {txt_file.name} - No corresponding image file")
+            # print(f"Deleting {txt_file.name} - No corresponding image file")
             txt_file.unlink()
     label_stems = {txt_file.stem for txt_file in labels_path.glob("*.txt")}
@@ -375,7 +497,7 @@ def __delete_orphaned_images_and_inferences(images_path: Path, labels_path: Path
     for image_file in image_files:
         if image_file.stem not in label_stems:
-            print(f"Deleting orphaned image: {image_file.name}")
+            # print(f"Deleting orphaned image: {image_file.name}")
             image_file.unlink()
     print("Orphaned images files without corresponding labels have been deleted.")
@@ -400,7 +522,7 @@ def __delete_invalid_txt_files(images_path: Path, labels_path: Path):
             lines = file.readlines()
         if len(lines) == 0 or len(lines) > 1:
-            print(f"Deleting {txt_file.name} - Invalid file")
+            # print(f"Deleting {txt_file.name} - Invalid file")
             txt_file.unlink()
             image_file_jpg = images_path / (txt_file.stem + ".jpg")
@@ -408,10 +530,10 @@ def __delete_invalid_txt_files(images_path: Path, labels_path: Path):
             if image_file_jpg.exists():
                 image_file_jpg.unlink()
-                print(f"Deleted corresponding image file: {image_file_jpg.name}")
+                # print(f"Deleted corresponding image file: {image_file_jpg.name}")
             elif image_file_jpeg.exists():
                 image_file_jpeg.unlink()
-                print(f"Deleted corresponding image file: {image_file_jpeg.name}")
+                # print(f"Deleted corresponding image file: {image_file_jpeg.name}")
     print("Invalid text files and their corresponding images files have been deleted.")

{bplusplus-1.2.1 → bplusplus-1.2.2}/LICENSE RENAMED Viewed

File without changes

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/__init__.py RENAMED Viewed

File without changes

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/resnet/test.py RENAMED Viewed

File without changes

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/resnet/train.py RENAMED Viewed

File without changes

{bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/train_validate.py RENAMED Viewed

File without changes

bplusplus 1.2.1__tar.gz → 1.2.2__tar.gz

Potentially problematic release.

bplusplus 1.2.1tar.gz → 1.2.2tar.gz