PyPI - geoai-py - Versions diffs - 0.14.0__py2.py3-none-any.whl → 0.16.0__py2.py3-none-any.whl - Mend

geoai-py 0.14.0py2.py3-none-any.whl → 0.16.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

geoai/__init__.py +21 -1
geoai/change_detection.py +16 -6
geoai/geoai.py +3 -0
geoai/timm_segment.py +1097 -0
geoai/timm_train.py +658 -0
geoai/train.py +796 -107
geoai/utils.py +1427 -245
{geoai_py-0.14.0.dist-info → geoai_py-0.16.0.dist-info}/METADATA +9 -1
{geoai_py-0.14.0.dist-info → geoai_py-0.16.0.dist-info}/RECORD +13 -11
{geoai_py-0.14.0.dist-info → geoai_py-0.16.0.dist-info}/licenses/LICENSE +1 -2
{geoai_py-0.14.0.dist-info → geoai_py-0.16.0.dist-info}/WHEEL +0 -0
{geoai_py-0.14.0.dist-info → geoai_py-0.16.0.dist-info}/entry_points.txt +0 -0
{geoai_py-0.14.0.dist-info → geoai_py-0.16.0.dist-info}/top_level.txt +0 -0

geoai/train.py CHANGED Viewed

@@ -34,6 +34,97 @@ try:
 except ImportError:
     SMP_AVAILABLE = False
+# Additional imports for Lightly Train
+try:
+    import lightly_train
+    LIGHTLY_TRAIN_AVAILABLE = True
+except ImportError:
+    LIGHTLY_TRAIN_AVAILABLE = False
+def parse_coco_annotations(
+    coco_json_path: str, images_dir: str, labels_dir: str
+) -> Tuple[List[str], List[str]]:
+    """
+    Parse COCO format annotations and return lists of image and label paths.
+    Args:
+        coco_json_path (str): Path to COCO annotations JSON file (instances.json).
+        images_dir (str): Directory containing image files.
+        labels_dir (str): Directory containing label mask files.
+    Returns:
+        Tuple[List[str], List[str]]: Lists of image paths and corresponding label paths.
+    """
+    import json
+    with open(coco_json_path, "r") as f:
+        coco_data = json.load(f)
+    # Create mapping from image_id to filename
+    image_files = []
+    label_files = []
+    for img_info in coco_data["images"]:
+        img_filename = img_info["file_name"]
+        img_path = os.path.join(images_dir, img_filename)
+        # Derive label filename (same as image filename)
+        label_path = os.path.join(labels_dir, img_filename)
+        if os.path.exists(img_path) and os.path.exists(label_path):
+            image_files.append(img_path)
+            label_files.append(label_path)
+    return image_files, label_files
+def parse_yolo_annotations(
+    data_dir: str, images_subdir: str = "images", labels_subdir: str = "labels"
+) -> Tuple[List[str], List[str]]:
+    """
+    Parse YOLO format annotations and return lists of image and label paths.
+    YOLO format structure:
+    - data_dir/images/: Contains image files (.tif, .png, .jpg)
+    - data_dir/labels/: Contains label masks (.tif, .png) and YOLO .txt files
+    - data_dir/classes.txt: Class names (one per line)
+    Args:
+        data_dir (str): Root directory containing YOLO-format data.
+        images_subdir (str): Subdirectory name for images. Defaults to 'images'.
+        labels_subdir (str): Subdirectory name for labels. Defaults to 'labels'.
+    Returns:
+        Tuple[List[str], List[str]]: Lists of image paths and corresponding label paths.
+    """
+    images_dir = os.path.join(data_dir, images_subdir)
+    labels_dir = os.path.join(data_dir, labels_subdir)
+    if not os.path.exists(images_dir):
+        raise FileNotFoundError(f"Images directory not found: {images_dir}")
+    if not os.path.exists(labels_dir):
+        raise FileNotFoundError(f"Labels directory not found: {labels_dir}")
+    # Get all image files
+    image_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
+    image_files = []
+    label_files = []
+    for img_file in os.listdir(images_dir):
+        if img_file.lower().endswith(image_extensions):
+            img_path = os.path.join(images_dir, img_file)
+            # Find corresponding label mask (same filename)
+            label_path = os.path.join(labels_dir, img_file)
+            if os.path.exists(label_path):
+                image_files.append(img_path)
+                label_files.append(label_path)
+    return sorted(image_files), sorted(label_files)
 def get_instance_segmentation_model(
     num_classes: int = 2, num_channels: int = 3, pretrained: bool = True
@@ -617,6 +708,7 @@ def train_MaskRCNN_model(
     images_dir: str,
     labels_dir: str,
     output_dir: str,
+    input_format: str = "directory",
     num_channels: int = 3,
     model: Optional[torch.nn.Module] = None,
     pretrained: bool = True,
@@ -640,9 +732,17 @@ def train_MaskRCNN_model(
     the backbone or to continue training from a specific checkpoint.
     Args:
-        images_dir (str): Directory containing image GeoTIFF files.
-        labels_dir (str): Directory containing label GeoTIFF files.
+        images_dir (str): Directory containing image GeoTIFF files (for 'directory' format),
+            or root directory containing images/ subdirectory (for 'yolo' format),
+            or directory containing images (for 'coco' format).
+        labels_dir (str): Directory containing label GeoTIFF files (for 'directory' format),
+            or path to COCO annotations JSON file (for 'coco' format),
+            or not used (for 'yolo' format - labels are in images_dir/labels/).
         output_dir (str): Directory to save model checkpoints and results.
+        input_format (str): Input data format - 'directory' (default), 'coco', or 'yolo'.
+            - 'directory': Standard directory structure with separate images_dir and labels_dir
+            - 'coco': COCO JSON format (labels_dir should be path to instances.json)
+            - 'yolo': YOLO format (images_dir is root with images/ and labels/ subdirectories)
         num_channels (int, optional): Number of input channels. If None, auto-detected.
             Defaults to 3.
         model (torch.nn.Module, optional): Predefined model. If None, a new model is created.
@@ -688,45 +788,63 @@ def train_MaskRCNN_model(
         device = get_device()
     print(f"Using device: {device}")
-    # Get all image and label files
-    # Support multiple image formats: GeoTIFF, PNG, JPG, JPEG, TIF, TIFF
-    image_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
-    label_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
+    # Get all image and label files based on input format
+    if input_format.lower() == "coco":
+        # Parse COCO format annotations
+        if verbose:
+            print(f"Loading COCO format annotations from {labels_dir}")
+        # For COCO format, labels_dir is path to instances.json
+        # Labels are typically in a "labels" directory parallel to "annotations"
+        coco_root = os.path.dirname(os.path.dirname(labels_dir))  # Go up two levels
+        labels_directory = os.path.join(coco_root, "labels")
+        image_files, label_files = parse_coco_annotations(
+            labels_dir, images_dir, labels_directory
+        )
+    elif input_format.lower() == "yolo":
+        # Parse YOLO format annotations
+        if verbose:
+            print(f"Loading YOLO format data from {images_dir}")
+        image_files, label_files = parse_yolo_annotations(images_dir)
+    else:
+        # Default: directory format
+        # Support multiple image formats: GeoTIFF, PNG, JPG, JPEG, TIF, TIFF
+        image_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
+        label_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
+        image_files = sorted(
+            [
+                os.path.join(images_dir, f)
+                for f in os.listdir(images_dir)
+                if f.lower().endswith(image_extensions)
+            ]
+        )
+        label_files = sorted(
+            [
+                os.path.join(labels_dir, f)
+                for f in os.listdir(labels_dir)
+                if f.lower().endswith(label_extensions)
+            ]
+        )
-    image_files = sorted(
-        [
-            os.path.join(images_dir, f)
-            for f in os.listdir(images_dir)
-            if f.lower().endswith(image_extensions)
-        ]
-    )
-    label_files = sorted(
-        [
-            os.path.join(labels_dir, f)
-            for f in os.listdir(labels_dir)
-            if f.lower().endswith(label_extensions)
-        ]
-    )
+        # Ensure matching files
+        if len(image_files) != len(label_files):
+            print("Warning: Number of image files and label files don't match!")
+            # Find matching files by basename
+            basenames = [os.path.basename(f) for f in image_files]
+            label_files = [
+                os.path.join(labels_dir, os.path.basename(f))
+                for f in image_files
+                if os.path.exists(os.path.join(labels_dir, os.path.basename(f)))
+            ]
+            image_files = [
+                f
+                for f, b in zip(image_files, basenames)
+                if os.path.exists(os.path.join(labels_dir, b))
+            ]
+            print(f"Using {len(image_files)} matching files")
     print(f"Found {len(image_files)} image files and {len(label_files)} label files")
-    # Ensure matching files
-    if len(image_files) != len(label_files):
-        print("Warning: Number of image files and label files don't match!")
-        # Find matching files by basename
-        basenames = [os.path.basename(f) for f in image_files]
-        label_files = [
-            os.path.join(labels_dir, os.path.basename(f))
-            for f in image_files
-            if os.path.exists(os.path.join(labels_dir, os.path.basename(f)))
-        ]
-        image_files = [
-            f
-            for f, b in zip(image_files, basenames)
-            if os.path.exists(os.path.join(labels_dir, b))
-        ]
-        print(f"Using {len(image_files)} matching files")
     # Split data into train and validation sets
     train_imgs, val_imgs, train_labels, val_labels = train_test_split(
         image_files, label_files, test_size=val_split, random_state=seed
@@ -1915,14 +2033,14 @@ def get_smp_model(
         )
-def dice_coefficient(
+def f1_score(
     pred: torch.Tensor,
     target: torch.Tensor,
     smooth: float = 1e-6,
     num_classes: Optional[int] = None,
 ) -> float:
     """
-    Calculate Dice coefficient for segmentation (binary or multi-class).
+    Calculate F1 score (also known as Dice coefficient) for segmentation (binary or multi-class).
     Args:
         pred (torch.Tensor): Predicted mask (probabilities or logits) with shape [C, H, W] or [H, W].
@@ -1931,7 +2049,7 @@ def dice_coefficient(
         num_classes (int, optional): Number of classes. If None, auto-detected.
     Returns:
-        float: Mean Dice coefficient across all classes.
+        float: Mean F1 score across all classes.
     """
     # Convert predictions to class predictions
     if pred.dim() == 3:  # [C, H, W] format
@@ -1946,8 +2064,8 @@ def dice_coefficient(
     if num_classes is None:
         num_classes = max(pred_classes.max().item(), target.max().item()) + 1
-    # Calculate Dice for each class and average
-    dice_scores = []
+    # Calculate F1 score for each class and average
+    f1_scores = []
     for class_id in range(num_classes):
         pred_class = (pred_classes == class_id).float()
         target_class = (target == class_id).float()
@@ -1956,10 +2074,10 @@ def dice_coefficient(
         union = pred_class.sum() + target_class.sum()
         if union > 0:
-            dice = (2.0 * intersection + smooth) / (union + smooth)
-            dice_scores.append(dice.item())
+            f1 = (2.0 * intersection + smooth) / (union + smooth)
+            f1_scores.append(f1.item())
-    return sum(dice_scores) / len(dice_scores) if dice_scores else 0.0
+    return sum(f1_scores) / len(f1_scores) if f1_scores else 0.0
 def iou_coefficient(
@@ -2009,6 +2127,108 @@ def iou_coefficient(
     return sum(iou_scores) / len(iou_scores) if iou_scores else 0.0
+def precision_score(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+    smooth: float = 1e-6,
+    num_classes: Optional[int] = None,
+) -> float:
+    """
+    Calculate precision score for segmentation (binary or multi-class).
+    Precision = TP / (TP + FP), where:
+    - TP (True Positives): Correctly predicted positive pixels
+    - FP (False Positives): Incorrectly predicted positive pixels
+    Args:
+        pred (torch.Tensor): Predicted mask (probabilities or logits) with shape [C, H, W] or [H, W].
+        target (torch.Tensor): Ground truth mask with shape [H, W].
+        smooth (float): Smoothing factor to avoid division by zero.
+        num_classes (int, optional): Number of classes. If None, auto-detected.
+    Returns:
+        float: Mean precision score across all classes.
+    """
+    # Convert predictions to class predictions
+    if pred.dim() == 3:  # [C, H, W] format
+        pred = torch.softmax(pred, dim=0)
+        pred_classes = torch.argmax(pred, dim=0)
+    elif pred.dim() == 2:  # [H, W] format
+        pred_classes = pred
+    else:
+        raise ValueError(f"Unexpected prediction dimensions: {pred.shape}")
+    # Auto-detect number of classes if not provided
+    if num_classes is None:
+        num_classes = max(pred_classes.max().item(), target.max().item()) + 1
+    # Calculate precision for each class and average
+    precision_scores = []
+    for class_id in range(num_classes):
+        pred_class = (pred_classes == class_id).float()
+        target_class = (target == class_id).float()
+        true_positives = (pred_class * target_class).sum()
+        predicted_positives = pred_class.sum()
+        if predicted_positives > 0:
+            precision = (true_positives + smooth) / (predicted_positives + smooth)
+            precision_scores.append(precision.item())
+    return sum(precision_scores) / len(precision_scores) if precision_scores else 0.0
+def recall_score(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+    smooth: float = 1e-6,
+    num_classes: Optional[int] = None,
+) -> float:
+    """
+    Calculate recall score (also known as sensitivity) for segmentation (binary or multi-class).
+    Recall = TP / (TP + FN), where:
+    - TP (True Positives): Correctly predicted positive pixels
+    - FN (False Negatives): Incorrectly predicted negative pixels
+    Args:
+        pred (torch.Tensor): Predicted mask (probabilities or logits) with shape [C, H, W] or [H, W].
+        target (torch.Tensor): Ground truth mask with shape [H, W].
+        smooth (float): Smoothing factor to avoid division by zero.
+        num_classes (int, optional): Number of classes. If None, auto-detected.
+    Returns:
+        float: Mean recall score across all classes.
+    """
+    # Convert predictions to class predictions
+    if pred.dim() == 3:  # [C, H, W] format
+        pred = torch.softmax(pred, dim=0)
+        pred_classes = torch.argmax(pred, dim=0)
+    elif pred.dim() == 2:  # [H, W] format
+        pred_classes = pred
+    else:
+        raise ValueError(f"Unexpected prediction dimensions: {pred.shape}")
+    # Auto-detect number of classes if not provided
+    if num_classes is None:
+        num_classes = max(pred_classes.max().item(), target.max().item()) + 1
+    # Calculate recall for each class and average
+    recall_scores = []
+    for class_id in range(num_classes):
+        pred_class = (pred_classes == class_id).float()
+        target_class = (target == class_id).float()
+        true_positives = (pred_class * target_class).sum()
+        actual_positives = target_class.sum()
+        if actual_positives > 0:
+            recall = (true_positives + smooth) / (actual_positives + smooth)
+            recall_scores.append(recall.item())
+    return sum(recall_scores) / len(recall_scores) if recall_scores else 0.0
 def train_semantic_one_epoch(
     model: torch.nn.Module,
     optimizer: torch.optim.Optimizer,
@@ -2090,13 +2310,15 @@ def evaluate_semantic(
         num_classes (int): Number of classes for evaluation metrics.
     Returns:
-        dict: Evaluation metrics including loss, IoU, and Dice.
+        dict: Evaluation metrics including loss, IoU, F1, precision, and recall.
     """
     model.eval()
     total_loss = 0
-    dice_scores = []
+    f1_scores = []
     iou_scores = []
+    precision_scores = []
+    recall_scores = []
     num_batches = len(data_loader)
     with torch.no_grad():
@@ -2112,23 +2334,38 @@ def evaluate_semantic(
             # Calculate metrics for each sample in the batch
             for pred, target in zip(outputs, targets):
-                dice = dice_coefficient(pred, target, num_classes=num_classes)
+                f1 = f1_score(pred, target, num_classes=num_classes)
                 iou = iou_coefficient(pred, target, num_classes=num_classes)
-                dice_scores.append(dice)
+                precision = precision_score(pred, target, num_classes=num_classes)
+                recall = recall_score(pred, target, num_classes=num_classes)
+                f1_scores.append(f1)
                 iou_scores.append(iou)
+                precision_scores.append(precision)
+                recall_scores.append(recall)
     # Calculate metrics
     avg_loss = total_loss / num_batches
-    avg_dice = sum(dice_scores) / len(dice_scores) if dice_scores else 0
+    avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
     avg_iou = sum(iou_scores) / len(iou_scores) if iou_scores else 0
-    return {"loss": avg_loss, "Dice": avg_dice, "IoU": avg_iou}
+    avg_precision = (
+        sum(precision_scores) / len(precision_scores) if precision_scores else 0
+    )
+    avg_recall = sum(recall_scores) / len(recall_scores) if recall_scores else 0
+    return {
+        "loss": avg_loss,
+        "F1": avg_f1,
+        "IoU": avg_iou,
+        "Precision": avg_precision,
+        "Recall": avg_recall,
+    }
 def train_segmentation_model(
     images_dir: str,
     labels_dir: str,
     output_dir: str,
+    input_format: str = "directory",
     architecture: str = "unet",
     encoder_name: str = "resnet34",
     encoder_weights: Optional[str] = "imagenet",
@@ -2150,6 +2387,7 @@ def train_segmentation_model(
     target_size: Optional[Tuple[int, int]] = None,
     resize_mode: str = "resize",
     num_workers: Optional[int] = None,
+    early_stopping_patience: Optional[int] = None,
     **kwargs: Any,
 ) -> torch.nn.Module:
     """
@@ -2160,9 +2398,17 @@ def train_segmentation_model(
     this approach treats the task as pixel-level binary classification.
     Args:
-        images_dir (str): Directory containing image GeoTIFF files.
-        labels_dir (str): Directory containing label GeoTIFF files.
+        images_dir (str): Directory containing image GeoTIFF files (for 'directory' format),
+            or root directory containing images/ subdirectory (for 'yolo' format),
+            or directory containing images (for 'coco' format).
+        labels_dir (str): Directory containing label GeoTIFF files (for 'directory' format),
+            or path to COCO annotations JSON file (for 'coco' format),
+            or not used (for 'yolo' format - labels are in images_dir/labels/).
         output_dir (str): Directory to save model checkpoints and results.
+        input_format (str): Input data format - 'directory' (default), 'coco', or 'yolo'.
+            - 'directory': Standard directory structure with separate images_dir and labels_dir
+            - 'coco': COCO JSON format (labels_dir should be path to instances.json)
+            - 'yolo': YOLO format (images_dir is root with images/ and labels/ subdirectories)
         architecture (str): Model architecture ('unet', 'deeplabv3', 'deeplabv3plus', 'fpn',
             'pspnet', 'linknet', 'manet'). Defaults to 'unet'.
         encoder_name (str): Encoder backbone name (e.g., 'resnet34', 'resnet50', 'efficientnet-b0').
@@ -2194,6 +2440,8 @@ def train_segmentation_model(
             'resize' - Resize images to target_size (may change aspect ratio)
             'pad' - Pad images to target_size (preserves aspect ratio). Defaults to 'resize'.
         num_workers (int): Number of workers for data loading. If None, uses 0 on macOS and Windows, 8 otherwise.
+        early_stopping_patience (int, optional): Number of epochs with no improvement after which
+            training will be stopped. If None, early stopping is disabled. Defaults to None.
         **kwargs: Additional arguments passed to smp.create_model().
     Returns:
         None: Model weights are saved to output_dir.
@@ -2225,45 +2473,63 @@ def train_segmentation_model(
         device = get_device()
     print(f"Using device: {device}")
-    # Get all image and label files
-    # Support multiple image formats: GeoTIFF, PNG, JPG, JPEG, TIF, TIFF
-    image_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
-    label_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
+    # Get all image and label files based on input format
+    if input_format.lower() == "coco":
+        # Parse COCO format annotations
+        if verbose:
+            print(f"Loading COCO format annotations from {labels_dir}")
+        # For COCO format, labels_dir is path to instances.json
+        # Labels are typically in a "labels" directory parallel to "annotations"
+        coco_root = os.path.dirname(os.path.dirname(labels_dir))  # Go up two levels
+        labels_directory = os.path.join(coco_root, "labels")
+        image_files, label_files = parse_coco_annotations(
+            labels_dir, images_dir, labels_directory
+        )
+    elif input_format.lower() == "yolo":
+        # Parse YOLO format annotations
+        if verbose:
+            print(f"Loading YOLO format data from {images_dir}")
+        image_files, label_files = parse_yolo_annotations(images_dir)
+    else:
+        # Default: directory format
+        # Support multiple image formats: GeoTIFF, PNG, JPG, JPEG, TIF, TIFF
+        image_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
+        label_extensions = (".tif", ".tiff", ".png", ".jpg", ".jpeg")
+        image_files = sorted(
+            [
+                os.path.join(images_dir, f)
+                for f in os.listdir(images_dir)
+                if f.lower().endswith(image_extensions)
+            ]
+        )
+        label_files = sorted(
+            [
+                os.path.join(labels_dir, f)
+                for f in os.listdir(labels_dir)
+                if f.lower().endswith(label_extensions)
+            ]
+        )
-    image_files = sorted(
-        [
-            os.path.join(images_dir, f)
-            for f in os.listdir(images_dir)
-            if f.lower().endswith(image_extensions)
-        ]
-    )
-    label_files = sorted(
-        [
-            os.path.join(labels_dir, f)
-            for f in os.listdir(labels_dir)
-            if f.lower().endswith(label_extensions)
-        ]
-    )
+        # Ensure matching files
+        if len(image_files) != len(label_files):
+            print("Warning: Number of image files and label files don't match!")
+            # Find matching files by basename
+            basenames = [os.path.basename(f) for f in image_files]
+            label_files = [
+                os.path.join(labels_dir, os.path.basename(f))
+                for f in image_files
+                if os.path.exists(os.path.join(labels_dir, os.path.basename(f)))
+            ]
+            image_files = [
+                f
+                for f, b in zip(image_files, basenames)
+                if os.path.exists(os.path.join(labels_dir, b))
+            ]
+            print(f"Using {len(image_files)} matching files")
     print(f"Found {len(image_files)} image files and {len(label_files)} label files")
-    # Ensure matching files
-    if len(image_files) != len(label_files):
-        print("Warning: Number of image files and label files don't match!")
-        # Find matching files by basename
-        basenames = [os.path.basename(f) for f in image_files]
-        label_files = [
-            os.path.join(labels_dir, os.path.basename(f))
-            for f in image_files
-            if os.path.exists(os.path.join(labels_dir, os.path.basename(f)))
-        ]
-        image_files = [
-            f
-            for f, b in zip(image_files, basenames)
-            if os.path.exists(os.path.join(labels_dir, b))
-        ]
-        print(f"Using {len(image_files)} matching files")
     if len(image_files) == 0:
         raise FileNotFoundError("No matching image and label files found")
@@ -2405,7 +2671,7 @@ def train_segmentation_model(
         print(f"Using {torch.cuda.device_count()} GPUs for training")
         model = torch.nn.DataParallel(model)
-    # Set up loss function (CrossEntropyLoss for multi-class, can also use DiceLoss)
+    # Set up loss function (CrossEntropyLoss for multi-class, can also use F1Loss)
     criterion = torch.nn.CrossEntropyLoss()
     # Set up optimizer
@@ -2423,8 +2689,11 @@ def train_segmentation_model(
     train_losses = []
     val_losses = []
     val_ious = []
-    val_dices = []
+    val_f1s = []
+    val_precisions = []
+    val_recalls = []
     start_epoch = 0
+    epochs_without_improvement = 0
     # Load checkpoint if provided
     if checkpoint_path is not None:
@@ -2459,8 +2728,15 @@ def train_segmentation_model(
                         val_losses = checkpoint["val_losses"]
                     if "val_ious" in checkpoint:
                         val_ious = checkpoint["val_ious"]
-                    if "val_dices" in checkpoint:
-                        val_dices = checkpoint["val_dices"]
+                    if "val_f1s" in checkpoint:
+                        val_f1s = checkpoint["val_f1s"]
+                    # Also check for old val_dices format for backward compatibility
+                    elif "val_dices" in checkpoint:
+                        val_f1s = checkpoint["val_dices"]
+                    if "val_precisions" in checkpoint:
+                        val_precisions = checkpoint["val_precisions"]
+                    if "val_recalls" in checkpoint:
+                        val_recalls = checkpoint["val_recalls"]
                     print(f"Resuming training from epoch {start_epoch}")
                     print(f"Previous best IoU: {best_iou:.4f}")
@@ -2500,7 +2776,9 @@ def train_segmentation_model(
         )
         val_losses.append(eval_metrics["loss"])
         val_ious.append(eval_metrics["IoU"])
-        val_dices.append(eval_metrics["Dice"])
+        val_f1s.append(eval_metrics["F1"])
+        val_precisions.append(eval_metrics["Precision"])
+        val_recalls.append(eval_metrics["Recall"])
         # Update learning rate
         lr_scheduler.step(eval_metrics["loss"])
@@ -2511,14 +2789,28 @@ def train_segmentation_model(
             f"Train Loss: {train_loss:.4f}, "
             f"Val Loss: {eval_metrics['loss']:.4f}, "
             f"Val IoU: {eval_metrics['IoU']:.4f}, "
-            f"Val Dice: {eval_metrics['Dice']:.4f}"
+            f"Val F1: {eval_metrics['F1']:.4f}, "
+            f"Val Precision: {eval_metrics['Precision']:.4f}, "
+            f"Val Recall: {eval_metrics['Recall']:.4f}"
         )
-        # Save best model
+        # Save best model and check for early stopping
         if eval_metrics["IoU"] > best_iou:
             best_iou = eval_metrics["IoU"]
+            epochs_without_improvement = 0
             print(f"Saving best model with IoU: {best_iou:.4f}")
             torch.save(model.state_dict(), os.path.join(output_dir, "best_model.pth"))
+        else:
+            epochs_without_improvement += 1
+            if (
+                early_stopping_patience is not None
+                and epochs_without_improvement >= early_stopping_patience
+            ):
+                print(
+                    f"\nEarly stopping triggered after {epochs_without_improvement} epochs without improvement"
+                )
+                print(f"Best validation IoU: {best_iou:.4f}")
+                break
         # Save checkpoint every 10 epochs (if not save_best_only)
         if not save_best_only and ((epoch + 1) % 10 == 0 or epoch == num_epochs - 1):
@@ -2536,7 +2828,9 @@ def train_segmentation_model(
                     "train_losses": train_losses,
                     "val_losses": val_losses,
                     "val_ious": val_ious,
-                    "val_dices": val_dices,
+                    "val_f1s": val_f1s,
+                    "val_precisions": val_precisions,
+                    "val_recalls": val_recalls,
                 },
                 os.path.join(output_dir, f"checkpoint_epoch_{epoch+1}.pth"),
             )
@@ -2549,7 +2843,9 @@ def train_segmentation_model(
         "train_losses": train_losses,
         "val_losses": val_losses,
         "val_ious": val_ious,
-        "val_dices": val_dices,
+        "val_f1s": val_f1s,
+        "val_precisions": val_precisions,
+        "val_recalls": val_recalls,
     }
     torch.save(history, os.path.join(output_dir, "training_history.pth"))
@@ -2565,7 +2861,9 @@ def train_segmentation_model(
         f.write(f"Total epochs: {num_epochs}\n")
         f.write(f"Best validation IoU: {best_iou:.4f}\n")
         f.write(f"Final validation IoU: {val_ious[-1]:.4f}\n")
-        f.write(f"Final validation Dice: {val_dices[-1]:.4f}\n")
+        f.write(f"Final validation F1: {val_f1s[-1]:.4f}\n")
+        f.write(f"Final validation Precision: {val_precisions[-1]:.4f}\n")
+        f.write(f"Final validation Recall: {val_recalls[-1]:.4f}\n")
         f.write(f"Final validation loss: {val_losses[-1]:.4f}\n")
     print(f"Training complete! Best IoU: {best_iou:.4f}")
@@ -2594,10 +2892,10 @@ def train_segmentation_model(
             plt.grid(True)
             plt.subplot(1, 3, 3)
-            plt.plot(val_dices, label="Val Dice")
-            plt.title("Dice Score")
+            plt.plot(val_f1s, label="Val F1")
+            plt.title("F1 Score")
             plt.xlabel("Epoch")
-            plt.ylabel("Dice")
+            plt.ylabel("F1")
             plt.legend()
             plt.grid(True)
@@ -2627,6 +2925,7 @@ def semantic_inference_on_geotiff(
     device: Optional[torch.device] = None,
     probability_path: Optional[str] = None,
     probability_threshold: Optional[float] = None,
+    save_class_probabilities: bool = False,
     quiet: bool = False,
     **kwargs: Any,
 ) -> Tuple[str, float]:
@@ -2648,6 +2947,8 @@ def semantic_inference_on_geotiff(
         probability_threshold (float, optional): Probability threshold for binary classification.
             Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
             are classified as class 1, otherwise class 0. If None (default), uses argmax.
+        save_class_probabilities (bool): If True and probability_path is provided, saves each
+            class probability as a separate single-band file. Defaults to False.
         quiet (bool): If True, suppress progress bar. Defaults to False.
         **kwargs: Additional arguments.
@@ -2864,7 +3165,7 @@ def semantic_inference_on_geotiff(
             prob_meta = meta.copy()
             prob_meta.update({"count": num_classes, "dtype": "float32"})
-            # Save normalized probabilities
+            # Save normalized probabilities as multi-band raster
             with rasterio.open(probability_path, "w", **prob_meta) as dst:
                 for class_idx in range(num_classes):
                     # Normalize probabilities
@@ -2878,6 +3179,36 @@ def semantic_inference_on_geotiff(
             if not quiet:
                 print(f"Saved probability map to {probability_path}")
+            # Save individual class probabilities if requested
+            if save_class_probabilities:
+                # Prepare single-band metadata
+                single_band_meta = meta.copy()
+                single_band_meta.update({"count": 1, "dtype": "float32"})
+                # Get base filename and extension
+                prob_base = os.path.splitext(probability_path)[0]
+                prob_ext = os.path.splitext(probability_path)[1]
+                for class_idx in range(num_classes):
+                    # Create filename for this class
+                    class_prob_path = f"{prob_base}_class_{class_idx}{prob_ext}"
+                    # Normalize probabilities
+                    prob_band = np.zeros((height, width), dtype=np.float32)
+                    prob_band[valid_pixels] = (
+                        prob_accumulator[class_idx, valid_pixels]
+                        / count_accumulator[valid_pixels]
+                    )
+                    # Save single-band file
+                    with rasterio.open(class_prob_path, "w", **single_band_meta) as dst:
+                        dst.write(prob_band, 1)
+                    if not quiet:
+                        print(
+                            f"Saved class {class_idx} probability to {class_prob_path}"
+                        )
         return output_path, inference_time
@@ -2894,6 +3225,7 @@ def semantic_inference_on_image(
     binary_output: bool = True,
     probability_path: Optional[str] = None,
     probability_threshold: Optional[float] = None,
+    save_class_probabilities: bool = False,
     quiet: bool = False,
     **kwargs: Any,
 ) -> Tuple[str, float]:
@@ -2916,6 +3248,8 @@ def semantic_inference_on_image(
         probability_threshold (float, optional): Probability threshold for binary classification.
             Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
             are classified as class 1, otherwise class 0. If None (default), uses argmax.
+        save_class_probabilities (bool): If True and probability_path is provided, saves each
+            class probability as a separate single-band file. Defaults to False.
         quiet (bool): If True, suppress progress bar. Defaults to False.
         **kwargs: Additional arguments.
@@ -3194,7 +3528,7 @@ def semantic_inference_on_image(
                 "transform": transform,
             }
-            # Save normalized probabilities
+            # Save normalized probabilities as multi-band raster
             with rasterio.open(probability_path, "w", **prob_meta) as dst:
                 for class_idx in range(num_classes):
                     # Normalize probabilities
@@ -3205,6 +3539,39 @@ def semantic_inference_on_image(
             if not quiet:
                 print(f"Saved probability map to {probability_path}")
+            # Save individual class probabilities if requested
+            if save_class_probabilities:
+                # Prepare single-band metadata
+                single_band_meta = {
+                    "driver": "GTiff",
+                    "height": height,
+                    "width": width,
+                    "count": 1,
+                    "dtype": "float32",
+                    "transform": transform,
+                }
+                # Get base filename and extension
+                prob_base = os.path.splitext(probability_path)[0]
+                prob_ext = os.path.splitext(probability_path)[1]
+                for class_idx in range(num_classes):
+                    # Create filename for this class
+                    class_prob_path = f"{prob_base}_class_{class_idx}{prob_ext}"
+                    # Normalize probabilities
+                    prob_band = np.zeros((height, width), dtype=np.float32)
+                    prob_band[valid_pixels] = normalized_probs[class_idx, valid_pixels]
+                    # Save single-band file
+                    with rasterio.open(class_prob_path, "w", **single_band_meta) as dst:
+                        dst.write(prob_band, 1)
+                    if not quiet:
+                        print(
+                            f"Saved class {class_idx} probability to {class_prob_path}"
+                        )
         return output_path, inference_time
@@ -3222,6 +3589,7 @@ def semantic_segmentation(
     device: Optional[torch.device] = None,
     probability_path: Optional[str] = None,
     probability_threshold: Optional[float] = None,
+    save_class_probabilities: bool = False,
     quiet: bool = False,
     **kwargs: Any,
 ) -> None:
@@ -3244,11 +3612,16 @@ def semantic_segmentation(
         batch_size (int): Batch size for inference.
         device (torch.device, optional): Device to run inference on.
         probability_path (str, optional): Path to save probability map. If provided,
-            the normalized class probabilities will be saved as a multi-band raster.
+            the normalized class probabilities will be saved as a multi-band raster
+            where each band contains probabilities for each class.
         probability_threshold (float, optional): Probability threshold for binary classification.
             Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
             are classified as class 1, otherwise class 0. If None (default), uses argmax.
             Must be between 0 and 1.
+        save_class_probabilities (bool): If True and probability_path is provided, saves each
+            class probability as a separate single-band file. Files will be named like
+            "probability_class_0.tif", "probability_class_1.tif", etc. in the same directory
+            as probability_path. Defaults to False.
         quiet (bool): If True, suppress progress bar. Defaults to False.
         **kwargs: Additional arguments.
@@ -3325,6 +3698,7 @@ def semantic_segmentation(
             device=device,
             probability_path=probability_path,
             probability_threshold=probability_threshold,
+            save_class_probabilities=save_class_probabilities,
             quiet=quiet,
             **kwargs,
         )
@@ -3345,6 +3719,7 @@ def semantic_segmentation(
             binary_output=True,  # Convert to binary output for better visualization
             probability_path=probability_path,
             probability_threshold=probability_threshold,
+            save_class_probabilities=save_class_probabilities,
             quiet=quiet,
             **kwargs,
         )
@@ -3527,6 +3902,7 @@ def train_instance_segmentation_model(
     images_dir: str,
     labels_dir: str,
     output_dir: str,
+    input_format: str = "directory",
     num_classes: int = 2,
     num_channels: int = 3,
     batch_size: int = 4,
@@ -3545,9 +3921,17 @@ def train_instance_segmentation_model(
     This is a wrapper function for train_MaskRCNN_model with clearer naming.
     Args:
-        images_dir (str): Directory containing image GeoTIFF files.
-        labels_dir (str): Directory containing label GeoTIFF files.
+        images_dir (str): Directory containing image GeoTIFF files (for 'directory' format),
+            or root directory containing images/ subdirectory (for 'yolo' format),
+            or directory containing images (for 'coco' format).
+        labels_dir (str): Directory containing label GeoTIFF files (for 'directory' format),
+            or path to COCO annotations JSON file (for 'coco' format),
+            or not used (for 'yolo' format - labels are in images_dir/labels/).
         output_dir (str): Directory to save model checkpoints and results.
+        input_format (str): Input data format - 'directory' (default), 'coco', or 'yolo'.
+            - 'directory': Standard directory structure with separate images_dir and labels_dir
+            - 'coco': COCO JSON format (labels_dir should be path to instances.json)
+            - 'yolo': YOLO format (images_dir is root with images/ and labels/ subdirectories)
         num_classes (int): Number of classes (including background). Defaults to 2.
         num_channels (int): Number of input channels. Defaults to 3.
         batch_size (int): Batch size for training. Defaults to 4.
@@ -3572,6 +3956,7 @@ def train_instance_segmentation_model(
         images_dir=images_dir,
         labels_dir=labels_dir,
         output_dir=output_dir,
+        input_format=input_format,
         num_channels=num_channels,
         model=model,
         batch_size=batch_size,
@@ -3756,3 +4141,307 @@ def instance_segmentation_batch(
             continue
     print(f"Batch processing completed. Results saved to {output_dir}")
+def lightly_train_model(
+    data_dir: str,
+    output_dir: str,
+    model: str = "torchvision/resnet50",
+    method: str = "dinov2_distillation",
+    epochs: int = 100,
+    batch_size: int = 64,
+    learning_rate: float = 1e-4,
+    **kwargs: Any,
+) -> str:
+    """
+    Train a model using Lightly Train for self-supervised pretraining.
+    Args:
+        data_dir (str): Directory containing unlabeled images for training.
+        output_dir (str): Directory to save training outputs and model checkpoints.
+        model (str): Model architecture to train. Supports models from torchvision,
+            timm, ultralytics, etc. Default is "torchvision/resnet50".
+        method (str): Self-supervised learning method. Options include:
+            - "simclr": Works with CNN models (ResNet, EfficientNet, etc.)
+            - "dino": Works with both CNNs and ViTs
+            - "dinov2": Requires ViT models only
+            - "dinov2_distillation": Requires ViT models only (recommended for ViTs)
+            Default is "dinov2_distillation".
+        epochs (int): Number of training epochs. Default is 100.
+        batch_size (int): Batch size for training. Default is 64.
+        learning_rate (float): Learning rate for training. Default is 1e-4.
+        **kwargs: Additional arguments passed to lightly_train.train().
+    Returns:
+        str: Path to the exported model file.
+    Raises:
+        ImportError: If lightly-train is not installed.
+        ValueError: If data_dir does not exist, is empty, or incompatible model/method.
+    Note:
+        Model/Method compatibility:
+        - CNN models (ResNet, EfficientNet): Use "simclr" or "dino"
+        - ViT models: Use "dinov2", "dinov2_distillation", or "dino"
+    Example:
+        >>> # For CNN models (ResNet, EfficientNet)
+        >>> model_path = lightly_train_model(
+        ...     data_dir="path/to/unlabeled/images",
+        ...     output_dir="path/to/output",
+        ...     model="torchvision/resnet50",
+        ...     method="simclr",  # Use simclr for CNNs
+        ...     epochs=50
+        ... )
+        >>> # For ViT models
+        >>> model_path = lightly_train_model(
+        ...     data_dir="path/to/unlabeled/images",
+        ...     output_dir="path/to/output",
+        ...     model="timm/vit_base_patch16_224",
+        ...     method="dinov2",  # dinov2 requires ViT
+        ...     epochs=50
+        ... )
+    """
+    if not LIGHTLY_TRAIN_AVAILABLE:
+        raise ImportError(
+            "lightly-train is not installed. Please install it with: "
+            "pip install lightly-train"
+        )
+    if not os.path.exists(data_dir):
+        raise ValueError(f"Data directory does not exist: {data_dir}")
+    # Check if data directory contains images
+    image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.tif", "*.tiff", "*.bmp"]
+    image_files = []
+    for ext in image_extensions:
+        image_files.extend(glob.glob(os.path.join(data_dir, "**", ext), recursive=True))
+    if not image_files:
+        raise ValueError(f"No image files found in {data_dir}")
+    # Validate model/method compatibility
+    is_vit_model = "vit" in model.lower() or "vision_transformer" in model.lower()
+    if method in ["dinov2", "dinov2_distillation"] and not is_vit_model:
+        raise ValueError(
+            f"Method '{method}' requires a Vision Transformer (ViT) model, but got '{model}'.\n"
+            f"Solutions:\n"
+            f"  1. Use a ViT model: model='timm/vit_base_patch16_224'\n"
+            f"  2. Use a CNN-compatible method: method='simclr' or method='dino'\n"
+            f"\nFor CNN models (ResNet, EfficientNet), use 'simclr' or 'dino'.\n"
+            f"For ViT models, use 'dinov2', 'dinov2_distillation', or 'dino'."
+        )
+    print(f"Found {len(image_files)} images in {data_dir}")
+    print(f"Starting self-supervised pretraining with {method} method...")
+    print(f"Model: {model}")
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+    # Detect if running in notebook environment and set appropriate configuration
+    def is_notebook():
+        try:
+            from IPython import get_ipython
+            if get_ipython() is not None:
+                return True
+        except (ImportError, NameError):
+            pass
+        return False
+    # Force single-device training in notebooks to avoid DDP strategy issues
+    if is_notebook():
+        # Only override if not explicitly set by user
+        if "accelerator" not in kwargs:
+            # Use CPU in notebooks to avoid DDP incompatibility
+            # Users can still override by passing accelerator='gpu'
+            kwargs["accelerator"] = "cpu"
+        if "devices" not in kwargs:
+            kwargs["devices"] = 1  # Force single device
+    # Train the model using Lightly Train
+    lightly_train.train(
+        out=output_dir,
+        data=data_dir,
+        model=model,
+        method=method,
+        epochs=epochs,
+        batch_size=batch_size,
+        **kwargs,
+    )
+    # Return path to the exported model
+    exported_model_path = os.path.join(
+        output_dir, "exported_models", "exported_last.pt"
+    )
+    if os.path.exists(exported_model_path):
+        print(
+            f"Model training completed. Exported model saved to: {exported_model_path}"
+        )
+        return exported_model_path
+    else:
+        # Check for alternative export paths
+        possible_paths = [
+            os.path.join(output_dir, "exported_models", "exported_best.pt"),
+            os.path.join(output_dir, "checkpoints", "last.ckpt"),
+        ]
+        for path in possible_paths:
+            if os.path.exists(path):
+                print(f"Model training completed. Exported model saved to: {path}")
+                return path
+        print(f"Model training completed. Output saved to: {output_dir}")
+        return output_dir
+def load_lightly_pretrained_model(
+    model_path: str,
+    model_architecture: str = "torchvision/resnet50",
+    device: str = None,
+) -> torch.nn.Module:
+    """
+    Load a pretrained model from Lightly Train.
+    Args:
+        model_path (str): Path to the pretrained model file (.pt format).
+        model_architecture (str): Architecture of the model to load.
+            Default is "torchvision/resnet50".
+        device (str): Device to load the model on. If None, uses CPU.
+    Returns:
+        torch.nn.Module: Loaded pretrained model ready for fine-tuning.
+    Raises:
+        FileNotFoundError: If model_path does not exist.
+        ImportError: If required libraries are not available.
+    Example:
+        >>> model = load_lightly_pretrained_model(
+        ...     model_path="path/to/pretrained_model.pt",
+        ...     model_architecture="torchvision/resnet50",
+        ...     device="cuda"
+        ... )
+        >>> # Fine-tune the model with your existing training pipeline
+    """
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file not found: {model_path}")
+    print(f"Loading pretrained model from: {model_path}")
+    # Load the model based on architecture
+    if model_architecture.startswith("torchvision/"):
+        model_name = model_architecture.replace("torchvision/", "")
+        # Import the model from torchvision
+        if hasattr(torchvision.models, model_name):
+            model = getattr(torchvision.models, model_name)()
+        else:
+            raise ValueError(f"Unknown torchvision model: {model_name}")
+    elif model_architecture.startswith("timm/"):
+        try:
+            import timm
+            model_name = model_architecture.replace("timm/", "")
+            model = timm.create_model(model_name)
+        except ImportError:
+            raise ImportError(
+                "timm is required for TIMM models. Install with: pip install timm"
+            )
+    else:
+        # For other architectures, try to import from torchvision as default
+        try:
+            model = getattr(torchvision.models, model_architecture)()
+        except AttributeError:
+            raise ValueError(f"Unsupported model architecture: {model_architecture}")
+    # Load the pretrained weights
+    try:
+        state_dict = torch.load(model_path, map_location=device, weights_only=True)
+    except TypeError:
+        # For backward compatibility with older PyTorch versions
+        state_dict = torch.load(model_path, map_location=device)
+    model.load_state_dict(state_dict)
+    print(f"Successfully loaded pretrained model: {model_architecture}")
+    return model
+def lightly_embed_images(
+    data_dir: str,
+    model_path: str,
+    output_path: str,
+    model_architecture: str = None,  # Deprecated, kept for backwards compatibility
+    batch_size: int = 64,
+    **kwargs: Any,
+) -> str:
+    """
+    Generate embeddings for images using a Lightly Train pretrained model.
+    Args:
+        data_dir (str): Directory containing images to embed.
+        model_path (str): Path to the pretrained model checkpoint file (.ckpt).
+        output_path (str): Path to save the embeddings (as .pt file).
+        model_architecture (str): Architecture of the pretrained model (deprecated,
+            kept for backwards compatibility but not used). The model architecture
+            is automatically loaded from the checkpoint.
+        batch_size (int): Batch size for embedding generation. Default is 64.
+        **kwargs: Additional arguments passed to lightly_train.embed().
+            Supported kwargs include: image_size, num_workers, accelerator, etc.
+    Returns:
+        str: Path to the saved embeddings file.
+    Raises:
+        ImportError: If lightly-train is not installed.
+        FileNotFoundError: If data_dir or model_path does not exist.
+    Note:
+        The model_path should point to a .ckpt file from the training output,
+        typically located at: output_dir/checkpoints/last.ckpt
+    Example:
+        >>> embeddings_path = lightly_embed_images(
+        ...     data_dir="path/to/images",
+        ...     model_path="output_dir/checkpoints/last.ckpt",
+        ...     output_path="embeddings.pt",
+        ...     batch_size=32
+        ... )
+        >>> print(f"Embeddings saved to: {embeddings_path}")
+    """
+    if not LIGHTLY_TRAIN_AVAILABLE:
+        raise ImportError(
+            "lightly-train is not installed. Please install it with: "
+            "pip install lightly-train"
+        )
+    if not os.path.exists(data_dir):
+        raise FileNotFoundError(f"Data directory does not exist: {data_dir}")
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file does not exist: {model_path}")
+    print(f"Generating embeddings for images in: {data_dir}")
+    print(f"Using pretrained model: {model_path}")
+    output_dir = os.path.dirname(output_path)
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+    # Generate embeddings using Lightly Train
+    # Note: model_architecture is not used - it's inferred from the checkpoint
+    lightly_train.embed(
+        out=output_path,
+        data=data_dir,
+        checkpoint=model_path,
+        batch_size=batch_size,
+        **kwargs,
+    )
+    print(f"Embeddings saved to: {output_path}")
+    return output_path

geoai-py 0.14.0__py2.py3-none-any.whl → 0.16.0__py2.py3-none-any.whl

geoai-py 0.14.0py2.py3-none-any.whl → 0.16.0py2.py3-none-any.whl