PyPI - geoai-py - Versions diffs - 0.15.0__py2.py3-none-any.whl → 0.18.0__py2.py3-none-any.whl - Mend

geoai-py 0.15.0py2.py3-none-any.whl → 0.18.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

geoai/__init__.py +16 -1
geoai/agents/__init__.py +4 -0
geoai/agents/catalog_models.py +51 -0
geoai/agents/catalog_tools.py +907 -0
geoai/agents/geo_agents.py +934 -42
geoai/agents/stac_models.py +67 -0
geoai/agents/stac_tools.py +435 -0
geoai/change_detection.py +32 -7
geoai/download.py +5 -1
geoai/geoai.py +3 -0
geoai/timm_segment.py +4 -1
geoai/tools/__init__.py +65 -0
geoai/tools/cloudmask.py +431 -0
geoai/tools/multiclean.py +357 -0
geoai/train.py +694 -35
geoai/utils.py +752 -208
{geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/METADATA +6 -2
geoai_py-0.18.0.dist-info/RECORD +33 -0
geoai_py-0.15.0.dist-info/RECORD +0 -26
{geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/WHEEL +0 -0
{geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/entry_points.txt +0 -0
{geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/licenses/LICENSE +0 -0
{geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/top_level.txt +0 -0

geoai/train.py CHANGED Viewed

@@ -34,6 +34,14 @@ try:
 except ImportError:
     SMP_AVAILABLE = False
+# Additional imports for Lightly Train
+try:
+    import lightly_train
+    LIGHTLY_TRAIN_AVAILABLE = True
+except ImportError:
+    LIGHTLY_TRAIN_AVAILABLE = False
 def parse_coco_annotations(
     coco_json_path: str, images_dir: str, labels_dir: str
@@ -1428,8 +1436,12 @@ def instance_segmentation_inference_on_geotiff(
         # Apply Non-Maximum Suppression to handle overlapping detections
         if len(all_detections) > 0:
             # Convert to tensors for NMS
-            boxes = torch.tensor([det["box"] for det in all_detections])
-            scores = torch.tensor([det["score"] for det in all_detections])
+            boxes = torch.tensor(
+                [det["box"] for det in all_detections], dtype=torch.float32
+            )
+            scores = torch.tensor(
+                [det["score"] for det in all_detections], dtype=torch.float32
+            )
             # Apply NMS with IoU threshold
             nms_threshold = 0.3  # IoU threshold for NMS
@@ -1909,6 +1921,96 @@ class SemanticRandomHorizontalFlip:
         return image, mask
+class SemanticRandomVerticalFlip:
+    """Random vertical flip transform for semantic segmentation."""
+    def __init__(self, prob: float = 0.5) -> None:
+        self.prob = prob
+    def __call__(
+        self, image: torch.Tensor, mask: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if random.random() < self.prob:
+            # Flip image and mask along height dimension
+            image = torch.flip(image, dims=[1])
+            mask = torch.flip(mask, dims=[0])
+        return image, mask
+class SemanticRandomRotation90:
+    """Random 90-degree rotation transform for semantic segmentation."""
+    def __init__(self, prob: float = 0.5) -> None:
+        self.prob = prob
+    def __call__(
+        self, image: torch.Tensor, mask: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if random.random() < self.prob:
+            # Randomly rotate by 90, 180, or 270 degrees
+            k = random.randint(1, 3)
+            image = torch.rot90(image, k, dims=[1, 2])
+            mask = torch.rot90(mask, k, dims=[0, 1])
+        return image, mask
+class SemanticBrightnessAdjustment:
+    """Random brightness adjustment transform for semantic segmentation."""
+    def __init__(
+        self, brightness_range: Tuple[float, float] = (0.8, 1.2), prob: float = 0.5
+    ) -> None:
+        """
+        Initialize brightness adjustment transform.
+        Args:
+            brightness_range: Tuple of (min, max) brightness factors.
+            prob: Probability of applying the transform.
+        """
+        self.brightness_range = brightness_range
+        self.prob = prob
+    def __call__(
+        self, image: torch.Tensor, mask: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if random.random() < self.prob:
+            # Apply random brightness adjustment
+            factor = self.brightness_range[0] + random.random() * (
+                self.brightness_range[1] - self.brightness_range[0]
+            )
+            image = torch.clamp(image * factor, 0, 1)
+        return image, mask
+class SemanticContrastAdjustment:
+    """Random contrast adjustment transform for semantic segmentation."""
+    def __init__(
+        self, contrast_range: Tuple[float, float] = (0.8, 1.2), prob: float = 0.5
+    ) -> None:
+        """
+        Initialize contrast adjustment transform.
+        Args:
+            contrast_range: Tuple of (min, max) contrast factors.
+            prob: Probability of applying the transform.
+        """
+        self.contrast_range = contrast_range
+        self.prob = prob
+    def __call__(
+        self, image: torch.Tensor, mask: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if random.random() < self.prob:
+            # Apply random contrast adjustment
+            factor = self.contrast_range[0] + random.random() * (
+                self.contrast_range[1] - self.contrast_range[0]
+            )
+            mean = image.mean(dim=(1, 2), keepdim=True)
+            image = torch.clamp((image - mean) * factor + mean, 0, 1)
+        return image, mask
 def get_semantic_transform(train: bool) -> Any:
     """
     Get transforms for semantic segmentation data augmentation.
@@ -2025,14 +2127,14 @@ def get_smp_model(
         )
-def dice_coefficient(
+def f1_score(
     pred: torch.Tensor,
     target: torch.Tensor,
     smooth: float = 1e-6,
     num_classes: Optional[int] = None,
 ) -> float:
     """
-    Calculate Dice coefficient for segmentation (binary or multi-class).
+    Calculate F1 score (also known as Dice coefficient) for segmentation (binary or multi-class).
     Args:
         pred (torch.Tensor): Predicted mask (probabilities or logits) with shape [C, H, W] or [H, W].
@@ -2041,7 +2143,7 @@ def dice_coefficient(
         num_classes (int, optional): Number of classes. If None, auto-detected.
     Returns:
-        float: Mean Dice coefficient across all classes.
+        float: Mean F1 score across all classes.
     """
     # Convert predictions to class predictions
     if pred.dim() == 3:  # [C, H, W] format
@@ -2056,8 +2158,8 @@ def dice_coefficient(
     if num_classes is None:
         num_classes = max(pred_classes.max().item(), target.max().item()) + 1
-    # Calculate Dice for each class and average
-    dice_scores = []
+    # Calculate F1 score for each class and average
+    f1_scores = []
     for class_id in range(num_classes):
         pred_class = (pred_classes == class_id).float()
         target_class = (target == class_id).float()
@@ -2066,10 +2168,10 @@ def dice_coefficient(
         union = pred_class.sum() + target_class.sum()
         if union > 0:
-            dice = (2.0 * intersection + smooth) / (union + smooth)
-            dice_scores.append(dice.item())
+            f1 = (2.0 * intersection + smooth) / (union + smooth)
+            f1_scores.append(f1.item())
-    return sum(dice_scores) / len(dice_scores) if dice_scores else 0.0
+    return sum(f1_scores) / len(f1_scores) if f1_scores else 0.0
 def iou_coefficient(
@@ -2119,6 +2221,108 @@ def iou_coefficient(
     return sum(iou_scores) / len(iou_scores) if iou_scores else 0.0
+def precision_score(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+    smooth: float = 1e-6,
+    num_classes: Optional[int] = None,
+) -> float:
+    """
+    Calculate precision score for segmentation (binary or multi-class).
+    Precision = TP / (TP + FP), where:
+    - TP (True Positives): Correctly predicted positive pixels
+    - FP (False Positives): Incorrectly predicted positive pixels
+    Args:
+        pred (torch.Tensor): Predicted mask (probabilities or logits) with shape [C, H, W] or [H, W].
+        target (torch.Tensor): Ground truth mask with shape [H, W].
+        smooth (float): Smoothing factor to avoid division by zero.
+        num_classes (int, optional): Number of classes. If None, auto-detected.
+    Returns:
+        float: Mean precision score across all classes.
+    """
+    # Convert predictions to class predictions
+    if pred.dim() == 3:  # [C, H, W] format
+        pred = torch.softmax(pred, dim=0)
+        pred_classes = torch.argmax(pred, dim=0)
+    elif pred.dim() == 2:  # [H, W] format
+        pred_classes = pred
+    else:
+        raise ValueError(f"Unexpected prediction dimensions: {pred.shape}")
+    # Auto-detect number of classes if not provided
+    if num_classes is None:
+        num_classes = max(pred_classes.max().item(), target.max().item()) + 1
+    # Calculate precision for each class and average
+    precision_scores = []
+    for class_id in range(num_classes):
+        pred_class = (pred_classes == class_id).float()
+        target_class = (target == class_id).float()
+        true_positives = (pred_class * target_class).sum()
+        predicted_positives = pred_class.sum()
+        if predicted_positives > 0:
+            precision = (true_positives + smooth) / (predicted_positives + smooth)
+            precision_scores.append(precision.item())
+    return sum(precision_scores) / len(precision_scores) if precision_scores else 0.0
+def recall_score(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+    smooth: float = 1e-6,
+    num_classes: Optional[int] = None,
+) -> float:
+    """
+    Calculate recall score (also known as sensitivity) for segmentation (binary or multi-class).
+    Recall = TP / (TP + FN), where:
+    - TP (True Positives): Correctly predicted positive pixels
+    - FN (False Negatives): Incorrectly predicted negative pixels
+    Args:
+        pred (torch.Tensor): Predicted mask (probabilities or logits) with shape [C, H, W] or [H, W].
+        target (torch.Tensor): Ground truth mask with shape [H, W].
+        smooth (float): Smoothing factor to avoid division by zero.
+        num_classes (int, optional): Number of classes. If None, auto-detected.
+    Returns:
+        float: Mean recall score across all classes.
+    """
+    # Convert predictions to class predictions
+    if pred.dim() == 3:  # [C, H, W] format
+        pred = torch.softmax(pred, dim=0)
+        pred_classes = torch.argmax(pred, dim=0)
+    elif pred.dim() == 2:  # [H, W] format
+        pred_classes = pred
+    else:
+        raise ValueError(f"Unexpected prediction dimensions: {pred.shape}")
+    # Auto-detect number of classes if not provided
+    if num_classes is None:
+        num_classes = max(pred_classes.max().item(), target.max().item()) + 1
+    # Calculate recall for each class and average
+    recall_scores = []
+    for class_id in range(num_classes):
+        pred_class = (pred_classes == class_id).float()
+        target_class = (target == class_id).float()
+        true_positives = (pred_class * target_class).sum()
+        actual_positives = target_class.sum()
+        if actual_positives > 0:
+            recall = (true_positives + smooth) / (actual_positives + smooth)
+            recall_scores.append(recall.item())
+    return sum(recall_scores) / len(recall_scores) if recall_scores else 0.0
 def train_semantic_one_epoch(
     model: torch.nn.Module,
     optimizer: torch.optim.Optimizer,
@@ -2200,13 +2404,15 @@ def evaluate_semantic(
         num_classes (int): Number of classes for evaluation metrics.
     Returns:
-        dict: Evaluation metrics including loss, IoU, and Dice.
+        dict: Evaluation metrics including loss, IoU, F1, precision, and recall.
     """
     model.eval()
     total_loss = 0
-    dice_scores = []
+    f1_scores = []
     iou_scores = []
+    precision_scores = []
+    recall_scores = []
     num_batches = len(data_loader)
     with torch.no_grad():
@@ -2222,17 +2428,31 @@ def evaluate_semantic(
             # Calculate metrics for each sample in the batch
             for pred, target in zip(outputs, targets):
-                dice = dice_coefficient(pred, target, num_classes=num_classes)
+                f1 = f1_score(pred, target, num_classes=num_classes)
                 iou = iou_coefficient(pred, target, num_classes=num_classes)
-                dice_scores.append(dice)
+                precision = precision_score(pred, target, num_classes=num_classes)
+                recall = recall_score(pred, target, num_classes=num_classes)
+                f1_scores.append(f1)
                 iou_scores.append(iou)
+                precision_scores.append(precision)
+                recall_scores.append(recall)
     # Calculate metrics
     avg_loss = total_loss / num_batches
-    avg_dice = sum(dice_scores) / len(dice_scores) if dice_scores else 0
+    avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
     avg_iou = sum(iou_scores) / len(iou_scores) if iou_scores else 0
-    return {"loss": avg_loss, "Dice": avg_dice, "IoU": avg_iou}
+    avg_precision = (
+        sum(precision_scores) / len(precision_scores) if precision_scores else 0
+    )
+    avg_recall = sum(recall_scores) / len(recall_scores) if recall_scores else 0
+    return {
+        "loss": avg_loss,
+        "F1": avg_f1,
+        "IoU": avg_iou,
+        "Precision": avg_precision,
+        "Recall": avg_recall,
+    }
 def train_segmentation_model(
@@ -2261,6 +2481,9 @@ def train_segmentation_model(
     target_size: Optional[Tuple[int, int]] = None,
     resize_mode: str = "resize",
     num_workers: Optional[int] = None,
+    early_stopping_patience: Optional[int] = None,
+    train_transforms: Optional[Callable] = None,
+    val_transforms: Optional[Callable] = None,
     **kwargs: Any,
 ) -> torch.nn.Module:
     """
@@ -2313,6 +2536,17 @@ def train_segmentation_model(
             'resize' - Resize images to target_size (may change aspect ratio)
             'pad' - Pad images to target_size (preserves aspect ratio). Defaults to 'resize'.
         num_workers (int): Number of workers for data loading. If None, uses 0 on macOS and Windows, 8 otherwise.
+            Both image and mask should be torch.Tensor objects. The image tensor is expected to be in
+            CHW format (channels, height, width), and the mask tensor in HW format (height, width).
+            If None, uses default transforms (horizontal flip with 0.5 probability). Defaults to None.
+        val_transforms (callable, optional): Custom transforms for validation data.
+            Should be a callable that accepts (image, mask) tensors and returns transformed (image, mask).
+            The image tensor is expected to be in CHW format (channels, height, width), and the mask tensor in HW format (height, width).
+            Both image and mask should be torch.Tensor objects. If None, uses default transforms
+            (horizontal flip with 0.5 probability). Defaults to None.
+        val_transforms (callable, optional): Custom transforms for validation data.
+            Should be a callable that accepts (image, mask) tensors and returns transformed (image, mask).
+            If None, uses default transforms (no augmentation). Defaults to None.
         **kwargs: Additional arguments passed to smp.create_model().
     Returns:
         None: Model weights are saved to output_dir.
@@ -2455,10 +2689,22 @@ def train_segmentation_model(
                 print("No resizing needed.")
     # Create datasets
+    # Use custom transforms if provided, otherwise use default transforms
+    train_transform = (
+        train_transforms
+        if train_transforms is not None
+        else get_semantic_transform(train=True)
+    )
+    val_transform = (
+        val_transforms
+        if val_transforms is not None
+        else get_semantic_transform(train=False)
+    )
     train_dataset = SemanticSegmentationDataset(
         train_imgs,
         train_labels,
-        transforms=get_semantic_transform(train=True),
+        transforms=train_transform,
         num_channels=num_channels,
         target_size=target_size,
         resize_mode=resize_mode,
@@ -2467,7 +2713,7 @@ def train_segmentation_model(
     val_dataset = SemanticSegmentationDataset(
         val_imgs,
         val_labels,
-        transforms=get_semantic_transform(train=False),
+        transforms=val_transform,
         num_channels=num_channels,
         target_size=target_size,
         resize_mode=resize_mode,
@@ -2542,7 +2788,7 @@ def train_segmentation_model(
         print(f"Using {torch.cuda.device_count()} GPUs for training")
         model = torch.nn.DataParallel(model)
-    # Set up loss function (CrossEntropyLoss for multi-class, can also use DiceLoss)
+    # Set up loss function (CrossEntropyLoss for multi-class, can also use F1Loss)
     criterion = torch.nn.CrossEntropyLoss()
     # Set up optimizer
@@ -2560,8 +2806,11 @@ def train_segmentation_model(
     train_losses = []
     val_losses = []
     val_ious = []
-    val_dices = []
+    val_f1s = []
+    val_precisions = []
+    val_recalls = []
     start_epoch = 0
+    epochs_without_improvement = 0
     # Load checkpoint if provided
     if checkpoint_path is not None:
@@ -2596,8 +2845,15 @@ def train_segmentation_model(
                         val_losses = checkpoint["val_losses"]
                     if "val_ious" in checkpoint:
                         val_ious = checkpoint["val_ious"]
-                    if "val_dices" in checkpoint:
-                        val_dices = checkpoint["val_dices"]
+                    if "val_f1s" in checkpoint:
+                        val_f1s = checkpoint["val_f1s"]
+                    # Also check for old val_dices format for backward compatibility
+                    elif "val_dices" in checkpoint:
+                        val_f1s = checkpoint["val_dices"]
+                    if "val_precisions" in checkpoint:
+                        val_precisions = checkpoint["val_precisions"]
+                    if "val_recalls" in checkpoint:
+                        val_recalls = checkpoint["val_recalls"]
                     print(f"Resuming training from epoch {start_epoch}")
                     print(f"Previous best IoU: {best_iou:.4f}")
@@ -2637,7 +2893,9 @@ def train_segmentation_model(
         )
         val_losses.append(eval_metrics["loss"])
         val_ious.append(eval_metrics["IoU"])
-        val_dices.append(eval_metrics["Dice"])
+        val_f1s.append(eval_metrics["F1"])
+        val_precisions.append(eval_metrics["Precision"])
+        val_recalls.append(eval_metrics["Recall"])
         # Update learning rate
         lr_scheduler.step(eval_metrics["loss"])
@@ -2648,14 +2906,28 @@ def train_segmentation_model(
             f"Train Loss: {train_loss:.4f}, "
             f"Val Loss: {eval_metrics['loss']:.4f}, "
             f"Val IoU: {eval_metrics['IoU']:.4f}, "
-            f"Val Dice: {eval_metrics['Dice']:.4f}"
+            f"Val F1: {eval_metrics['F1']:.4f}, "
+            f"Val Precision: {eval_metrics['Precision']:.4f}, "
+            f"Val Recall: {eval_metrics['Recall']:.4f}"
         )
-        # Save best model
+        # Save best model and check for early stopping
         if eval_metrics["IoU"] > best_iou:
             best_iou = eval_metrics["IoU"]
+            epochs_without_improvement = 0
             print(f"Saving best model with IoU: {best_iou:.4f}")
             torch.save(model.state_dict(), os.path.join(output_dir, "best_model.pth"))
+        else:
+            epochs_without_improvement += 1
+            if (
+                early_stopping_patience is not None
+                and epochs_without_improvement >= early_stopping_patience
+            ):
+                print(
+                    f"\nEarly stopping triggered after {epochs_without_improvement} epochs without improvement"
+                )
+                print(f"Best validation IoU: {best_iou:.4f}")
+                break
         # Save checkpoint every 10 epochs (if not save_best_only)
         if not save_best_only and ((epoch + 1) % 10 == 0 or epoch == num_epochs - 1):
@@ -2673,7 +2945,9 @@ def train_segmentation_model(
                     "train_losses": train_losses,
                     "val_losses": val_losses,
                     "val_ious": val_ious,
-                    "val_dices": val_dices,
+                    "val_f1s": val_f1s,
+                    "val_precisions": val_precisions,
+                    "val_recalls": val_recalls,
                 },
                 os.path.join(output_dir, f"checkpoint_epoch_{epoch+1}.pth"),
             )
@@ -2686,7 +2960,9 @@ def train_segmentation_model(
         "train_losses": train_losses,
         "val_losses": val_losses,
         "val_ious": val_ious,
-        "val_dices": val_dices,
+        "val_f1s": val_f1s,
+        "val_precisions": val_precisions,
+        "val_recalls": val_recalls,
     }
     torch.save(history, os.path.join(output_dir, "training_history.pth"))
@@ -2702,7 +2978,9 @@ def train_segmentation_model(
         f.write(f"Total epochs: {num_epochs}\n")
         f.write(f"Best validation IoU: {best_iou:.4f}\n")
         f.write(f"Final validation IoU: {val_ious[-1]:.4f}\n")
-        f.write(f"Final validation Dice: {val_dices[-1]:.4f}\n")
+        f.write(f"Final validation F1: {val_f1s[-1]:.4f}\n")
+        f.write(f"Final validation Precision: {val_precisions[-1]:.4f}\n")
+        f.write(f"Final validation Recall: {val_recalls[-1]:.4f}\n")
         f.write(f"Final validation loss: {val_losses[-1]:.4f}\n")
     print(f"Training complete! Best IoU: {best_iou:.4f}")
@@ -2731,10 +3009,10 @@ def train_segmentation_model(
             plt.grid(True)
             plt.subplot(1, 3, 3)
-            plt.plot(val_dices, label="Val Dice")
-            plt.title("Dice Score")
+            plt.plot(val_f1s, label="Val F1")
+            plt.title("F1 Score")
             plt.xlabel("Epoch")
-            plt.ylabel("Dice")
+            plt.ylabel("F1")
             plt.legend()
             plt.grid(True)
@@ -2764,6 +3042,7 @@ def semantic_inference_on_geotiff(
     device: Optional[torch.device] = None,
     probability_path: Optional[str] = None,
     probability_threshold: Optional[float] = None,
+    save_class_probabilities: bool = False,
     quiet: bool = False,
     **kwargs: Any,
 ) -> Tuple[str, float]:
@@ -2785,6 +3064,8 @@ def semantic_inference_on_geotiff(
         probability_threshold (float, optional): Probability threshold for binary classification.
             Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
             are classified as class 1, otherwise class 0. If None (default), uses argmax.
+        save_class_probabilities (bool): If True and probability_path is provided, saves each
+            class probability as a separate single-band file. Defaults to False.
         quiet (bool): If True, suppress progress bar. Defaults to False.
         **kwargs: Additional arguments.
@@ -3001,7 +3282,7 @@ def semantic_inference_on_geotiff(
             prob_meta = meta.copy()
             prob_meta.update({"count": num_classes, "dtype": "float32"})
-            # Save normalized probabilities
+            # Save normalized probabilities as multi-band raster
             with rasterio.open(probability_path, "w", **prob_meta) as dst:
                 for class_idx in range(num_classes):
                     # Normalize probabilities
@@ -3015,6 +3296,36 @@ def semantic_inference_on_geotiff(
             if not quiet:
                 print(f"Saved probability map to {probability_path}")
+            # Save individual class probabilities if requested
+            if save_class_probabilities:
+                # Prepare single-band metadata
+                single_band_meta = meta.copy()
+                single_band_meta.update({"count": 1, "dtype": "float32"})
+                # Get base filename and extension
+                prob_base = os.path.splitext(probability_path)[0]
+                prob_ext = os.path.splitext(probability_path)[1]
+                for class_idx in range(num_classes):
+                    # Create filename for this class
+                    class_prob_path = f"{prob_base}_class_{class_idx}{prob_ext}"
+                    # Normalize probabilities
+                    prob_band = np.zeros((height, width), dtype=np.float32)
+                    prob_band[valid_pixels] = (
+                        prob_accumulator[class_idx, valid_pixels]
+                        / count_accumulator[valid_pixels]
+                    )
+                    # Save single-band file
+                    with rasterio.open(class_prob_path, "w", **single_band_meta) as dst:
+                        dst.write(prob_band, 1)
+                    if not quiet:
+                        print(
+                            f"Saved class {class_idx} probability to {class_prob_path}"
+                        )
         return output_path, inference_time
@@ -3031,6 +3342,7 @@ def semantic_inference_on_image(
     binary_output: bool = True,
     probability_path: Optional[str] = None,
     probability_threshold: Optional[float] = None,
+    save_class_probabilities: bool = False,
     quiet: bool = False,
     **kwargs: Any,
 ) -> Tuple[str, float]:
@@ -3053,6 +3365,8 @@ def semantic_inference_on_image(
         probability_threshold (float, optional): Probability threshold for binary classification.
             Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
             are classified as class 1, otherwise class 0. If None (default), uses argmax.
+        save_class_probabilities (bool): If True and probability_path is provided, saves each
+            class probability as a separate single-band file. Defaults to False.
         quiet (bool): If True, suppress progress bar. Defaults to False.
         **kwargs: Additional arguments.
@@ -3331,7 +3645,7 @@ def semantic_inference_on_image(
                 "transform": transform,
             }
-            # Save normalized probabilities
+            # Save normalized probabilities as multi-band raster
             with rasterio.open(probability_path, "w", **prob_meta) as dst:
                 for class_idx in range(num_classes):
                     # Normalize probabilities
@@ -3342,6 +3656,39 @@ def semantic_inference_on_image(
             if not quiet:
                 print(f"Saved probability map to {probability_path}")
+            # Save individual class probabilities if requested
+            if save_class_probabilities:
+                # Prepare single-band metadata
+                single_band_meta = {
+                    "driver": "GTiff",
+                    "height": height,
+                    "width": width,
+                    "count": 1,
+                    "dtype": "float32",
+                    "transform": transform,
+                }
+                # Get base filename and extension
+                prob_base = os.path.splitext(probability_path)[0]
+                prob_ext = os.path.splitext(probability_path)[1]
+                for class_idx in range(num_classes):
+                    # Create filename for this class
+                    class_prob_path = f"{prob_base}_class_{class_idx}{prob_ext}"
+                    # Normalize probabilities
+                    prob_band = np.zeros((height, width), dtype=np.float32)
+                    prob_band[valid_pixels] = normalized_probs[class_idx, valid_pixels]
+                    # Save single-band file
+                    with rasterio.open(class_prob_path, "w", **single_band_meta) as dst:
+                        dst.write(prob_band, 1)
+                    if not quiet:
+                        print(
+                            f"Saved class {class_idx} probability to {class_prob_path}"
+                        )
         return output_path, inference_time
@@ -3359,6 +3706,7 @@ def semantic_segmentation(
     device: Optional[torch.device] = None,
     probability_path: Optional[str] = None,
     probability_threshold: Optional[float] = None,
+    save_class_probabilities: bool = False,
     quiet: bool = False,
     **kwargs: Any,
 ) -> None:
@@ -3381,11 +3729,16 @@ def semantic_segmentation(
         batch_size (int): Batch size for inference.
         device (torch.device, optional): Device to run inference on.
         probability_path (str, optional): Path to save probability map. If provided,
-            the normalized class probabilities will be saved as a multi-band raster.
+            the normalized class probabilities will be saved as a multi-band raster
+            where each band contains probabilities for each class.
         probability_threshold (float, optional): Probability threshold for binary classification.
             Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
             are classified as class 1, otherwise class 0. If None (default), uses argmax.
             Must be between 0 and 1.
+        save_class_probabilities (bool): If True and probability_path is provided, saves each
+            class probability as a separate single-band file. Files will be named like
+            "probability_class_0.tif", "probability_class_1.tif", etc. in the same directory
+            as probability_path. Defaults to False.
         quiet (bool): If True, suppress progress bar. Defaults to False.
         **kwargs: Additional arguments.
@@ -3462,6 +3815,7 @@ def semantic_segmentation(
             device=device,
             probability_path=probability_path,
             probability_threshold=probability_threshold,
+            save_class_probabilities=save_class_probabilities,
             quiet=quiet,
             **kwargs,
         )
@@ -3482,6 +3836,7 @@ def semantic_segmentation(
             binary_output=True,  # Convert to binary output for better visualization
             probability_path=probability_path,
             probability_threshold=probability_threshold,
+            save_class_probabilities=save_class_probabilities,
             quiet=quiet,
             **kwargs,
         )
@@ -3903,3 +4258,307 @@ def instance_segmentation_batch(
             continue
     print(f"Batch processing completed. Results saved to {output_dir}")
+def lightly_train_model(
+    data_dir: str,
+    output_dir: str,
+    model: str = "torchvision/resnet50",
+    method: str = "dinov2_distillation",
+    epochs: int = 100,
+    batch_size: int = 64,
+    learning_rate: float = 1e-4,
+    **kwargs: Any,
+) -> str:
+    """
+    Train a model using Lightly Train for self-supervised pretraining.
+    Args:
+        data_dir (str): Directory containing unlabeled images for training.
+        output_dir (str): Directory to save training outputs and model checkpoints.
+        model (str): Model architecture to train. Supports models from torchvision,
+            timm, ultralytics, etc. Default is "torchvision/resnet50".
+        method (str): Self-supervised learning method. Options include:
+            - "simclr": Works with CNN models (ResNet, EfficientNet, etc.)
+            - "dino": Works with both CNNs and ViTs
+            - "dinov2": Requires ViT models only
+            - "dinov2_distillation": Requires ViT models only (recommended for ViTs)
+            Default is "dinov2_distillation".
+        epochs (int): Number of training epochs. Default is 100.
+        batch_size (int): Batch size for training. Default is 64.
+        learning_rate (float): Learning rate for training. Default is 1e-4.
+        **kwargs: Additional arguments passed to lightly_train.train().
+    Returns:
+        str: Path to the exported model file.
+    Raises:
+        ImportError: If lightly-train is not installed.
+        ValueError: If data_dir does not exist, is empty, or incompatible model/method.
+    Note:
+        Model/Method compatibility:
+        - CNN models (ResNet, EfficientNet): Use "simclr" or "dino"
+        - ViT models: Use "dinov2", "dinov2_distillation", or "dino"
+    Example:
+        >>> # For CNN models (ResNet, EfficientNet)
+        >>> model_path = lightly_train_model(
+        ...     data_dir="path/to/unlabeled/images",
+        ...     output_dir="path/to/output",
+        ...     model="torchvision/resnet50",
+        ...     method="simclr",  # Use simclr for CNNs
+        ...     epochs=50
+        ... )
+        >>> # For ViT models
+        >>> model_path = lightly_train_model(
+        ...     data_dir="path/to/unlabeled/images",
+        ...     output_dir="path/to/output",
+        ...     model="timm/vit_base_patch16_224",
+        ...     method="dinov2",  # dinov2 requires ViT
+        ...     epochs=50
+        ... )
+    """
+    if not LIGHTLY_TRAIN_AVAILABLE:
+        raise ImportError(
+            "lightly-train is not installed. Please install it with: "
+            "pip install lightly-train"
+        )
+    if not os.path.exists(data_dir):
+        raise ValueError(f"Data directory does not exist: {data_dir}")
+    # Check if data directory contains images
+    image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.tif", "*.tiff", "*.bmp"]
+    image_files = []
+    for ext in image_extensions:
+        image_files.extend(glob.glob(os.path.join(data_dir, "**", ext), recursive=True))
+    if not image_files:
+        raise ValueError(f"No image files found in {data_dir}")
+    # Validate model/method compatibility
+    is_vit_model = "vit" in model.lower() or "vision_transformer" in model.lower()
+    if method in ["dinov2", "dinov2_distillation"] and not is_vit_model:
+        raise ValueError(
+            f"Method '{method}' requires a Vision Transformer (ViT) model, but got '{model}'.\n"
+            f"Solutions:\n"
+            f"  1. Use a ViT model: model='timm/vit_base_patch16_224'\n"
+            f"  2. Use a CNN-compatible method: method='simclr' or method='dino'\n"
+            f"\nFor CNN models (ResNet, EfficientNet), use 'simclr' or 'dino'.\n"
+            f"For ViT models, use 'dinov2', 'dinov2_distillation', or 'dino'."
+        )
+    print(f"Found {len(image_files)} images in {data_dir}")
+    print(f"Starting self-supervised pretraining with {method} method...")
+    print(f"Model: {model}")
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+    # Detect if running in notebook environment and set appropriate configuration
+    def is_notebook():
+        try:
+            from IPython import get_ipython
+            if get_ipython() is not None:
+                return True
+        except (ImportError, NameError):
+            pass
+        return False
+    # Force single-device training in notebooks to avoid DDP strategy issues
+    if is_notebook():
+        # Only override if not explicitly set by user
+        if "accelerator" not in kwargs:
+            # Use CPU in notebooks to avoid DDP incompatibility
+            # Users can still override by passing accelerator='gpu'
+            kwargs["accelerator"] = "cpu"
+        if "devices" not in kwargs:
+            kwargs["devices"] = 1  # Force single device
+    # Train the model using Lightly Train
+    lightly_train.train(
+        out=output_dir,
+        data=data_dir,
+        model=model,
+        method=method,
+        epochs=epochs,
+        batch_size=batch_size,
+        **kwargs,
+    )
+    # Return path to the exported model
+    exported_model_path = os.path.join(
+        output_dir, "exported_models", "exported_last.pt"
+    )
+    if os.path.exists(exported_model_path):
+        print(
+            f"Model training completed. Exported model saved to: {exported_model_path}"
+        )
+        return exported_model_path
+    else:
+        # Check for alternative export paths
+        possible_paths = [
+            os.path.join(output_dir, "exported_models", "exported_best.pt"),
+            os.path.join(output_dir, "checkpoints", "last.ckpt"),
+        ]
+        for path in possible_paths:
+            if os.path.exists(path):
+                print(f"Model training completed. Exported model saved to: {path}")
+                return path
+        print(f"Model training completed. Output saved to: {output_dir}")
+        return output_dir
+def load_lightly_pretrained_model(
+    model_path: str,
+    model_architecture: str = "torchvision/resnet50",
+    device: str = None,
+) -> torch.nn.Module:
+    """
+    Load a pretrained model from Lightly Train.
+    Args:
+        model_path (str): Path to the pretrained model file (.pt format).
+        model_architecture (str): Architecture of the model to load.
+            Default is "torchvision/resnet50".
+        device (str): Device to load the model on. If None, uses CPU.
+    Returns:
+        torch.nn.Module: Loaded pretrained model ready for fine-tuning.
+    Raises:
+        FileNotFoundError: If model_path does not exist.
+        ImportError: If required libraries are not available.
+    Example:
+        >>> model = load_lightly_pretrained_model(
+        ...     model_path="path/to/pretrained_model.pt",
+        ...     model_architecture="torchvision/resnet50",
+        ...     device="cuda"
+        ... )
+        >>> # Fine-tune the model with your existing training pipeline
+    """
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file not found: {model_path}")
+    print(f"Loading pretrained model from: {model_path}")
+    # Load the model based on architecture
+    if model_architecture.startswith("torchvision/"):
+        model_name = model_architecture.replace("torchvision/", "")
+        # Import the model from torchvision
+        if hasattr(torchvision.models, model_name):
+            model = getattr(torchvision.models, model_name)()
+        else:
+            raise ValueError(f"Unknown torchvision model: {model_name}")
+    elif model_architecture.startswith("timm/"):
+        try:
+            import timm
+            model_name = model_architecture.replace("timm/", "")
+            model = timm.create_model(model_name)
+        except ImportError:
+            raise ImportError(
+                "timm is required for TIMM models. Install with: pip install timm"
+            )
+    else:
+        # For other architectures, try to import from torchvision as default
+        try:
+            model = getattr(torchvision.models, model_architecture)()
+        except AttributeError:
+            raise ValueError(f"Unsupported model architecture: {model_architecture}")
+    # Load the pretrained weights
+    try:
+        state_dict = torch.load(model_path, map_location=device, weights_only=True)
+    except TypeError:
+        # For backward compatibility with older PyTorch versions
+        state_dict = torch.load(model_path, map_location=device)
+    model.load_state_dict(state_dict)
+    print(f"Successfully loaded pretrained model: {model_architecture}")
+    return model
+def lightly_embed_images(
+    data_dir: str,
+    model_path: str,
+    output_path: str,
+    model_architecture: str = None,  # Deprecated, kept for backwards compatibility
+    batch_size: int = 64,
+    **kwargs: Any,
+) -> str:
+    """
+    Generate embeddings for images using a Lightly Train pretrained model.
+    Args:
+        data_dir (str): Directory containing images to embed.
+        model_path (str): Path to the pretrained model checkpoint file (.ckpt).
+        output_path (str): Path to save the embeddings (as .pt file).
+        model_architecture (str): Architecture of the pretrained model (deprecated,
+            kept for backwards compatibility but not used). The model architecture
+            is automatically loaded from the checkpoint.
+        batch_size (int): Batch size for embedding generation. Default is 64.
+        **kwargs: Additional arguments passed to lightly_train.embed().
+            Supported kwargs include: image_size, num_workers, accelerator, etc.
+    Returns:
+        str: Path to the saved embeddings file.
+    Raises:
+        ImportError: If lightly-train is not installed.
+        FileNotFoundError: If data_dir or model_path does not exist.
+    Note:
+        The model_path should point to a .ckpt file from the training output,
+        typically located at: output_dir/checkpoints/last.ckpt
+    Example:
+        >>> embeddings_path = lightly_embed_images(
+        ...     data_dir="path/to/images",
+        ...     model_path="output_dir/checkpoints/last.ckpt",
+        ...     output_path="embeddings.pt",
+        ...     batch_size=32
+        ... )
+        >>> print(f"Embeddings saved to: {embeddings_path}")
+    """
+    if not LIGHTLY_TRAIN_AVAILABLE:
+        raise ImportError(
+            "lightly-train is not installed. Please install it with: "
+            "pip install lightly-train"
+        )
+    if not os.path.exists(data_dir):
+        raise FileNotFoundError(f"Data directory does not exist: {data_dir}")
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file does not exist: {model_path}")
+    print(f"Generating embeddings for images in: {data_dir}")
+    print(f"Using pretrained model: {model_path}")
+    output_dir = os.path.dirname(output_path)
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+    # Generate embeddings using Lightly Train
+    # Note: model_architecture is not used - it's inferred from the checkpoint
+    lightly_train.embed(
+        out=output_path,
+        data=data_dir,
+        checkpoint=model_path,
+        batch_size=batch_size,
+        **kwargs,
+    )
+    print(f"Embeddings saved to: {output_path}")
+    return output_path

geoai-py 0.15.0__py2.py3-none-any.whl → 0.18.0__py2.py3-none-any.whl

geoai-py 0.15.0py2.py3-none-any.whl → 0.18.0py2.py3-none-any.whl