PyPI - maite-datasets - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

maite-datasets 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

maite_datasets/__init__.py +2 -6
maite_datasets/_base.py +169 -51
maite_datasets/_builder.py +46 -55
maite_datasets/_collate.py +2 -3
maite_datasets/{_reader/_base.py → _reader.py} +62 -36
maite_datasets/_validate.py +4 -2
maite_datasets/adapters/__init__.py +3 -0
maite_datasets/adapters/_huggingface.py +391 -0
maite_datasets/image_classification/_cifar10.py +12 -7
maite_datasets/image_classification/_mnist.py +15 -10
maite_datasets/image_classification/_ships.py +12 -8
maite_datasets/object_detection/__init__.py +4 -7
maite_datasets/object_detection/_antiuav.py +11 -8
maite_datasets/{_reader → object_detection}/_coco.py +29 -27
maite_datasets/object_detection/_milco.py +11 -9
maite_datasets/object_detection/_seadrone.py +11 -9
maite_datasets/object_detection/_voc.py +11 -13
maite_datasets/{_reader → object_detection}/_yolo.py +26 -21
maite_datasets/protocols.py +94 -0
maite_datasets/wrappers/__init__.py +8 -0
maite_datasets/wrappers/_torch.py +109 -0
maite_datasets-0.0.7.dist-info/METADATA +181 -0
maite_datasets-0.0.7.dist-info/RECORD +28 -0
maite_datasets/_mixin/__init__.py +0 -0
maite_datasets/_mixin/_numpy.py +0 -28
maite_datasets/_mixin/_torch.py +0 -28
maite_datasets/_protocols.py +0 -217
maite_datasets/_reader/__init__.py +0 -6
maite_datasets/_reader/_factory.py +0 -64
maite_datasets/_types.py +0 -50
maite_datasets/object_detection/_voc_torch.py +0 -65
maite_datasets-0.0.5.dist-info/METADATA +0 -91
maite_datasets-0.0.5.dist-info/RECORD +0 -31
{maite_datasets-0.0.5.dist-info → maite_datasets-0.0.7.dist-info}/WHEEL +0 -0
{maite_datasets-0.0.5.dist-info → maite_datasets-0.0.7.dist-info}/licenses/LICENSE +0 -0

maite_datasets/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from ._huggingface import HFImageClassificationDataset, HFObjectDetectionDataset, from_huggingface
+__all__ = ["HFImageClassificationDataset", "HFObjectDetectionDataset", "from_huggingface"]

maite_datasets/adapters/_huggingface.py ADDED Viewed

@@ -0,0 +1,391 @@
+from __future__ import annotations
+from collections.abc import Mapping
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Any, Literal, TypeAlias, overload
+import maite.protocols.image_classification as ic
+import maite.protocols.object_detection as od
+import numpy as np
+from maite.protocols import DatasetMetadata, DatumMetadata
+from maite_datasets._base import BaseDataset, NumpyArray, ObjectDetectionTarget
+from maite_datasets.protocols import HFArray, HFClassLabel, HFDataset, HFImage, HFList, HFValue
+from maite_datasets.wrappers._torch import TTarget
+# Constants for image processing
+MAX_VALID_CHANNELS = 10
+FeatureDict: TypeAlias = Mapping[str, Any]
+@dataclass
+class HFDatasetInfo:
+    image_key: str
+@dataclass
+class HFImageClassificationDatasetInfo(HFDatasetInfo):
+    label_key: str
+@dataclass
+class HFObjectDetectionDatasetInfo(HFDatasetInfo):
+    objects_key: str
+    bbox_key: str
+    label_key: str
+class HFBaseDataset(BaseDataset[NumpyArray, TTarget]):
+    """Base wrapper for Hugging Face datasets, handling common logic."""
+    def __init__(self, hf_dataset: HFDataset, image_key: str, known_keys: set[str]) -> None:
+        self.source = hf_dataset
+        self._image_key = image_key
+        # Add dataset metadata
+        dataset_info_dict = hf_dataset.info.__dict__
+        if "id" in dataset_info_dict:
+            dataset_info_dict["datasetinfo_id"] = dataset_info_dict.pop("id")
+        self._metadata_id = dataset_info_dict["dataset_name"]
+        self._metadata_dict = dataset_info_dict
+        # Pre-validate features and cache metadata keys
+        self._validate_features(hf_dataset.features)
+        self._scalar_meta_keys = self._extract_scalar_meta_keys(hf_dataset.features, known_keys)
+        # Cache for image conversions
+        self._image_cache: dict[int, np.ndarray] = {}
+    def _validate_features(self, features: FeatureDict) -> None:
+        """Pre-validate all features during initialization."""
+        if self._image_key not in features:
+            raise ValueError(f"Image key '{self._image_key}' not found in dataset features.")
+        if not isinstance(features[self._image_key], (HFImage, HFArray)):
+            raise TypeError(f"Image feature '{self._image_key}' must be HFImage or HFArray.")
+    def _extract_scalar_meta_keys(self, features: FeatureDict, known_keys: set[str]) -> list[str]:
+        """Extract scalar metadata keys during initialization."""
+        return [key for key, feature in features.items() if key not in known_keys and isinstance(feature, HFValue)]
+    def __len__(self) -> int:
+        return len(self.source)
+    def _get_base_metadata(self, index: int) -> DatumMetadata:
+        """Extract base metadata for a datum."""
+        item = self.source[index]
+        datum_metadata: DatumMetadata = {"id": index}
+        for key in self._scalar_meta_keys:
+            datum_metadata[key] = item[key]
+        return datum_metadata
+    @lru_cache(maxsize=64)  # Cache image conversions
+    def _get_image(self, index: int) -> np.ndarray:
+        """Get and process image with caching and optimized conversions."""
+        # Convert to numpy array only once
+        raw_image = self.source[index][self._image_key]
+        image = np.asarray(raw_image)
+        # Handle different image formats efficiently
+        if image.ndim == 2:
+            # Grayscale: HW -> CHW
+            image = image[np.newaxis, ...]  # More efficient than expand_dims
+        elif image.ndim == 3:
+            # Check if we need to transpose from HWC to CHW
+            if image.shape[-1] < image.shape[-3] and image.shape[-1] <= MAX_VALID_CHANNELS:
+                # HWC -> CHW using optimized transpose
+                image = np.transpose(image, (2, 0, 1))
+            elif image.shape[0] > MAX_VALID_CHANNELS:
+                raise ValueError(
+                    f"Image at index {index} has invalid channel configuration. "
+                    f"Expected channels to be less than {MAX_VALID_CHANNELS}, got shape {image.shape}"
+                )
+        else:
+            raise ValueError(
+                f"Image at index {index} has unsupported dimensionality. "
+                f"Expected 2D or 3D, got {image.ndim}D with shape {image.shape}"
+            )
+        if image.ndim != 3:
+            raise ValueError(f"Image processing failed for index {index}. Final shape: {image.shape}")
+        return image
+class HFImageClassificationDataset(HFBaseDataset[NumpyArray], ic.Dataset):
+    """Wraps a Hugging Face dataset to comply with the ImageClassificationDataset protocol."""
+    def __init__(self, hf_dataset: HFDataset, image_key: str, label_key: str) -> None:
+        super().__init__(hf_dataset, image_key, known_keys={image_key, label_key})
+        self._label_key = label_key
+        # Pre-validate label feature
+        label_feature = hf_dataset.features[self._label_key]
+        if not isinstance(label_feature, HFClassLabel):
+            raise TypeError(
+                f"Label feature '{self._label_key}' must be a datasets.ClassLabel, got {type(label_feature).__name__}."
+            )
+        self._num_classes: int = label_feature.num_classes
+        # Pre-compute one-hot identity matrix for efficient encoding
+        self._one_hot_matrix = np.eye(self._num_classes, dtype=np.float32)
+        # Enhanced metadata with validation
+        self.metadata: DatasetMetadata = DatasetMetadata(
+            id=self._metadata_id, index2label=dict(enumerate(label_feature.names), **self._metadata_dict)
+        )
+    def __getitem__(self, index: int) -> tuple[NumpyArray, NumpyArray, DatumMetadata]:
+        if not 0 <= index < len(self.source):
+            raise IndexError(f"Index {index} out of range for dataset of size {len(self.source)}")
+        # Process image
+        image = self._get_image(index)
+        label_int = self.source[index][self._label_key]
+        # Process target
+        if not 0 <= label_int < self._num_classes:
+            raise ValueError(f"Label {label_int} at index {index} is out of range [0, {self._num_classes})")
+        one_hot_label = self._one_hot_matrix[label_int]
+        # Process metadata
+        datum_metadata = self._get_base_metadata(index)
+        return image, one_hot_label, datum_metadata
+class HFObjectDetectionDataset(HFBaseDataset[ObjectDetectionTarget], od.Dataset):
+    """Wraps a Hugging Face dataset to comply with the ObjectDetectionDataset protocol."""
+    def __init__(self, hf_dataset: HFDataset, image_key: str, objects_key: str, bbox_key: str, label_key: str) -> None:
+        super().__init__(hf_dataset, image_key, known_keys={image_key, objects_key})
+        self._objects_key = objects_key
+        self._bbox_key = bbox_key
+        self._label_key = label_key
+        # Pre-validate and extract object features
+        self._object_meta_keys = self._validate_and_extract_object_features(hf_dataset.features)
+        # Validate and extract label information
+        label_feature = self._extract_label_feature(hf_dataset.features)
+        self.metadata: DatasetMetadata = DatasetMetadata(
+            id=self._metadata_id, index2label=dict(enumerate(label_feature.names)), **self._metadata_dict
+        )
+    def _validate_and_extract_object_features(self, features: FeatureDict) -> list[str]:
+        """Validate objects feature and extract metadata keys."""
+        objects_feature = features[self._objects_key]
+        # Determine the structure and get inner features
+        if isinstance(objects_feature, HFList):  # list(dict) case
+            if not isinstance(objects_feature.feature, dict):
+                raise TypeError(f"Objects feature '{self._objects_key}' with list type must contain dict features.")
+            inner_feature_dict = objects_feature.feature
+        elif isinstance(objects_feature, dict):  # dict(list) case
+            inner_feature_dict = objects_feature
+        else:
+            raise TypeError(
+                f"Objects feature '{self._objects_key}' must be a list or dict, got {type(objects_feature).__name__}."
+            )
+        # Validate required keys exist
+        required_keys = {self._bbox_key, self._label_key}
+        missing_keys = required_keys - set(inner_feature_dict.keys())
+        if missing_keys:
+            raise ValueError(f"Objects feature '{self._objects_key}' missing required keys: {missing_keys}")
+        # Extract object metadata keys
+        known_inner_keys = {self._bbox_key, self._label_key}
+        return [
+            key
+            for key, feature in inner_feature_dict.items()
+            if key not in known_inner_keys and isinstance(feature, (HFValue, HFList))
+        ]
+    def _extract_label_feature(self, features: FeatureDict) -> HFClassLabel:
+        """Extract and validate the label feature."""
+        objects_feature = features[self._objects_key]
+        inner_features = objects_feature.feature if isinstance(objects_feature, HFList) else objects_feature
+        label_feature_container = inner_features[self._label_key]
+        label_feature = (
+            label_feature_container.feature
+            if isinstance(label_feature_container.feature, HFClassLabel)
+            else label_feature_container
+        )
+        if not isinstance(label_feature, HFClassLabel):
+            raise TypeError(
+                f"Label '{self._label_key}' in '{self._objects_key}' must be a ClassLabel, "
+                f"got {type(label_feature).__name__}."
+            )
+        return label_feature
+    def __getitem__(self, index: int) -> tuple[NumpyArray, ObjectDetectionTarget, DatumMetadata]:
+        if not 0 <= index < len(self.source):
+            raise IndexError(f"Index {index} out of range for dataset of size {len(self.source)}")
+        # Process image
+        image = self._get_image(index)
+        objects = self.source[index][self._objects_key]
+        # Process target
+        boxes = objects[self._bbox_key]
+        labels = objects[self._label_key]
+        scores = np.zeros_like(labels, dtype=np.float32)
+        target = ObjectDetectionTarget(boxes, labels, scores)
+        # Process metadata
+        datum_metadata = self._get_base_metadata(index)
+        self._add_object_metadata(objects, datum_metadata)
+        return image, target, datum_metadata
+    def _add_object_metadata(self, objects: dict[str, Any], datum_metadata: DatumMetadata) -> None:
+        """Efficiently add object metadata to datum metadata."""
+        if not objects[self._bbox_key]:  # No objects
+            return
+        num_objects = len(objects[self._bbox_key])
+        for key in self._object_meta_keys:
+            value = objects[key]
+            if isinstance(value, list):
+                if len(value) == num_objects:
+                    datum_metadata[key] = value
+                else:
+                    raise ValueError(
+                        f"Object metadata '{key}' length {len(value)} doesn't match number of objects {num_objects}"
+                    )
+            else:
+                datum_metadata[key] = [value] * num_objects
+def is_bbox(feature: Any) -> bool:
+    """Check if feature represents bounding box data with proper type validation."""
+    if not isinstance(feature, HFList):
+        return False
+    # Handle nested list structure
+    bbox_candidate = feature.feature if isinstance(feature.feature, HFList) else feature
+    return (
+        isinstance(bbox_candidate, HFList)
+        and bbox_candidate.length == 4
+        and isinstance(bbox_candidate.feature, HFValue)
+        and any(dtype in bbox_candidate.feature.dtype for dtype in ["float", "int"])
+    )
+def is_label(feature: Any) -> bool:
+    """Check if feature represents label data with proper type validation."""
+    target_feature = feature.feature if isinstance(feature, HFList) else feature
+    return isinstance(target_feature, HFClassLabel)
+def find_od_keys(feature: Any) -> tuple[str | None, str | None]:
+    """Helper to find bbox and label keys for object detection with improved logic."""
+    if not ((isinstance(feature, HFList) and isinstance(feature.feature, dict)) or isinstance(feature, dict)):
+        return None, None
+    inner_features: FeatureDict = feature.feature if isinstance(feature, HFList) else feature
+    bbox_key = label_key = None
+    for inner_name, inner_feature in inner_features.items():
+        if bbox_key is None and is_bbox(inner_feature):
+            bbox_key = inner_name
+        if label_key is None and is_label(inner_feature):
+            label_key = inner_name
+        # Early exit if both found
+        if bbox_key and label_key:
+            break
+    return bbox_key, label_key
+def get_dataset_info(dataset: HFDataset) -> HFDatasetInfo:
+    """Extract dataset information with improved validation and error messages."""
+    features = dataset.features
+    image_key = label_key = objects_key = bbox_key = None
+    # More efficient feature detection
+    for name, feature in features.items():
+        if image_key is None and isinstance(feature, (HFImage, HFArray)):
+            image_key = name
+        elif label_key is None and isinstance(feature, HFClassLabel):
+            label_key = name
+        elif objects_key is None:
+            temp_bbox, temp_label = find_od_keys(feature)
+            if temp_bbox and temp_label:
+                objects_key, bbox_key, label_key = name, temp_bbox, temp_label
+    if not image_key:
+        available_features = list(features.keys())
+        raise ValueError(
+            f"No image key found in dataset. Available features: {available_features}. "
+            f"Expected HFImage or HFArray type."
+        )
+    # Return appropriate dataset info based on detected features
+    if objects_key and bbox_key and label_key:
+        return HFObjectDetectionDatasetInfo(image_key, objects_key, bbox_key, label_key)
+    if label_key:
+        return HFImageClassificationDatasetInfo(image_key, label_key)
+    return HFDatasetInfo(image_key)
+@overload
+def from_huggingface(dataset: HFDataset, task: Literal["image_classification"]) -> HFImageClassificationDataset: ...
+@overload
+def from_huggingface(dataset: HFDataset, task: Literal["object_detection"]) -> HFObjectDetectionDataset: ...
+@overload
+def from_huggingface(
+    dataset: HFDataset, task: Literal["auto"] = "auto"
+) -> HFObjectDetectionDataset | HFImageClassificationDataset: ...
+def from_huggingface(
+    dataset: HFDataset, task: Literal["image_classification", "object_detection", "auto"] = "auto"
+) -> HFObjectDetectionDataset | HFImageClassificationDataset:
+    """Create appropriate dataset wrapper with enhanced error handling."""
+    info = get_dataset_info(dataset)
+    if isinstance(info, HFImageClassificationDatasetInfo):
+        if task in ("image_classification", "auto"):
+            return HFImageClassificationDataset(dataset, info.image_key, info.label_key)
+        if task == "object_detection":
+            raise ValueError(
+                f"Task mismatch: requested 'object_detection' but dataset appears to be "
+                f"image classification. Detected features: image='{info.image_key}', "
+                f"label='{info.label_key}'"
+            )
+    elif isinstance(info, HFObjectDetectionDatasetInfo):
+        if task in ("object_detection", "auto"):
+            return HFObjectDetectionDataset(dataset, info.image_key, info.objects_key, info.bbox_key, info.label_key)
+        if task == "image_classification":
+            raise ValueError(
+                f"Task mismatch: requested 'image_classification' but dataset appears to be "
+                f"object detection. Detected features: image='{info.image_key}', "
+                f"objects='{info.objects_key}'"
+            )
+    # Enhanced error message for auto-detection failure
+    available_features = list(dataset.features.keys())
+    feature_types = {k: type(v).__name__ for k, v in dataset.features.items()}
+    raise ValueError(
+        f"Could not automatically determine task for requested type '{task}'. "
+        f"Detected info: {info}. Available features: {available_features}. "
+        f"Feature types: {feature_types}. Ensure dataset has proper image and label/objects features."
+    )

maite_datasets/image_classification/_cifar10.py CHANGED Viewed

@@ -2,15 +2,20 @@ from __future__ import annotations
 __all__ = []
+from collections.abc import Sequence
 from pathlib import Path
-from typing import Any, Literal, Sequence, TypeVar
+from typing import Any, Literal, TypeVar
 import numpy as np
 from numpy.typing import NDArray
-from maite_datasets._base import BaseICDataset, DataLocation
-from maite_datasets._mixin._numpy import BaseDatasetNumpyMixin
-from maite_datasets._protocols import Transform
+from maite_datasets._base import (
+    BaseDatasetNumpyMixin,
+    BaseICDataset,
+    DataLocation,
+    NumpyArray,
+    NumpyImageClassificationTransform,
+)
 CIFARClassStringMap = Literal[
     "airplane",
@@ -27,7 +32,7 @@ CIFARClassStringMap = Literal[
 TCIFARClassMap = TypeVar("TCIFARClassMap", CIFARClassStringMap, int, list[CIFARClassStringMap], list[int])
-class CIFAR10(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
+class CIFAR10(BaseICDataset[NumpyArray], BaseDatasetNumpyMixin):
     """
     `CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset as NumPy arrays.
@@ -89,7 +94,7 @@ class CIFAR10(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
         self,
         root: str | Path,
         image_set: Literal["train", "test", "base"] = "train",
-        transforms: Transform[NDArray[np.number[Any]]] | Sequence[Transform[NDArray[np.number[Any]]]] | None = None,
+        transforms: NumpyImageClassificationTransform | Sequence[NumpyImageClassificationTransform] | None = None,
         download: bool = False,
         verbose: bool = False,
     ) -> None:
@@ -214,7 +219,7 @@ class CIFAR10(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
                 images[i, 2] = blue_channel  # Blue channel
         return images, labels
-    def _read_file(self, path: str) -> NDArray[np.number[Any]]:
+    def _read_file(self, path: str) -> NumpyArray:
         """
         Function to grab the correct image from the loaded data.
         Overwrite of the base `_read_file` because data is an all or nothing load.

maite_datasets/image_classification/_mnist.py CHANGED Viewed

@@ -2,15 +2,20 @@ from __future__ import annotations
 __all__ = []
+from collections.abc import Sequence
 from pathlib import Path
-from typing import Any, Literal, Sequence, TypeVar
+from typing import Any, Literal, TypeVar
 import numpy as np
 from numpy.typing import NDArray
-from maite_datasets._base import BaseICDataset, DataLocation
-from maite_datasets._mixin._numpy import BaseDatasetNumpyMixin
-from maite_datasets._protocols import Transform
+from maite_datasets._base import (
+    BaseDatasetNumpyMixin,
+    BaseICDataset,
+    DataLocation,
+    NumpyArray,
+    NumpyImageClassificationTransform,
+)
 MNISTClassStringMap = Literal["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
 TMNISTClassMap = TypeVar("TMNISTClassMap", MNISTClassStringMap, int, list[MNISTClassStringMap], list[int])
@@ -34,7 +39,7 @@ CorruptionStringMap = Literal[
 ]
-class MNIST(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
+class MNIST(BaseICDataset[NumpyArray], BaseDatasetNumpyMixin):
     """`MNIST <https://en.wikipedia.org/wiki/MNIST_database>`_ Dataset and `Corruptions <https://arxiv.org/abs/1906.02337>`_.
     There are 15 different styles of corruptions. This class downloads differently depending on if you
@@ -118,7 +123,7 @@ class MNIST(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
         root: str | Path,
         image_set: Literal["train", "test", "base"] = "train",
         corruption: CorruptionStringMap | None = None,
-        transforms: Transform[NDArray[np.number[Any]]] | Sequence[Transform[NDArray[np.number[Any]]]] | None = None,
+        transforms: NumpyImageClassificationTransform | Sequence[NumpyImageClassificationTransform] | None = None,
         download: bool = False,
         verbose: bool = False,
     ) -> None:
@@ -149,7 +154,7 @@ class MNIST(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
         index_strings = np.arange(self._loaded_data.shape[0]).astype(str).tolist()
         return index_strings, labels.tolist(), {}
-    def _load_corruption(self) -> tuple[NDArray[np.number[Any]], NDArray[np.uintp]]:
+    def _load_corruption(self) -> tuple[NumpyArray, NDArray[np.uintp]]:
         """Function to load in the file paths for the data and labels for the different corrupt data formats"""
         corruption = self.corruption if self.corruption is not None else "identity"
         base_path = self.path / "mnist_c" / corruption
@@ -176,7 +181,7 @@ class MNIST(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
         return data, labels
-    def _grab_data(self, path: Path) -> tuple[NDArray[np.number[Any]], NDArray[np.uintp]]:
+    def _grab_data(self, path: Path) -> tuple[NumpyArray, NDArray[np.uintp]]:
         """Function to load in the data numpy array"""
         with np.load(path, allow_pickle=True) as data_array:
             if self.image_set == "base":
@@ -190,11 +195,11 @@ class MNIST(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
             data = np.expand_dims(data, axis=1)
         return data, labels
-    def _grab_corruption_data(self, path: Path) -> NDArray[np.number[Any]]:
+    def _grab_corruption_data(self, path: Path) -> NumpyArray:
         """Function to load in the data numpy array for the previously chosen corrupt format"""
         return np.load(path, allow_pickle=False)
-    def _read_file(self, path: str) -> NDArray[np.number[Any]]:
+    def _read_file(self, path: str) -> NumpyArray:
         """
         Function to grab the correct image from the loaded data.
         Overwrite of the base `_read_file` because data is an all or nothing load.

maite_datasets/image_classification/_ships.py CHANGED Viewed

@@ -2,18 +2,22 @@ from __future__ import annotations
 __all__ = []
+from collections.abc import Sequence
 from pathlib import Path
-from typing import Any, Sequence
+from typing import Any
 import numpy as np
-from numpy.typing import NDArray
-from maite_datasets._base import BaseICDataset, DataLocation
-from maite_datasets._mixin._numpy import BaseDatasetNumpyMixin
-from maite_datasets._protocols import Transform
+from maite_datasets._base import (
+    BaseDatasetNumpyMixin,
+    BaseICDataset,
+    DataLocation,
+    NumpyArray,
+    NumpyImageClassificationTransform,
+)
-class Ships(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
+class Ships(BaseICDataset[NumpyArray], BaseDatasetNumpyMixin):
     """
     A dataset that focuses on identifying ships from satellite images.
@@ -76,7 +80,7 @@ class Ships(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
     def __init__(
         self,
         root: str | Path,
-        transforms: Transform[NDArray[np.number[Any]]] | Sequence[Transform[NDArray[np.number[Any]]]] | None = None,
+        transforms: NumpyImageClassificationTransform | Sequence[NumpyImageClassificationTransform] | None = None,
         download: bool = False,
         verbose: bool = False,
     ) -> None:
@@ -125,7 +129,7 @@ class Ships(BaseICDataset[NDArray[np.number[Any]]], BaseDatasetNumpyMixin):
         """Function to load in the file paths for the scene images"""
         return sorted(str(entry) for entry in (self.path / "scenes").glob("*.png"))
-    def get_scene(self, index: int) -> NDArray[np.number[Any]]:
+    def get_scene(self, index: int) -> NumpyArray:
         """
         Get the desired satellite image (scene) by passing in the index of the desired file.

maite_datasets/object_detection/__init__.py CHANGED Viewed

@@ -1,20 +1,17 @@
 """Module for MAITE compliant Object Detection datasets."""
 from maite_datasets.object_detection._antiuav import AntiUAVDetection
+from maite_datasets.object_detection._coco import COCODatasetReader
 from maite_datasets.object_detection._milco import MILCO
 from maite_datasets.object_detection._seadrone import SeaDrone
 from maite_datasets.object_detection._voc import VOCDetection
+from maite_datasets.object_detection._yolo import YOLODatasetReader
 __all__ = [
     "AntiUAVDetection",
     "MILCO",
     "SeaDrone",
     "VOCDetection",
+    "COCODatasetReader",
+    "YOLODatasetReader",
 ]
-import importlib.util
-if importlib.util.find_spec("torch") is not None:
-    from maite_datasets.object_detection._voc_torch import VOCDetectionTorch
-    __all__ += ["VOCDetectionTorch"]

maite_datasets/object_detection/_antiuav.py CHANGED Viewed

@@ -2,19 +2,22 @@ from __future__ import annotations
 __all__ = []
+from collections.abc import Sequence
 from pathlib import Path
-from typing import Any, Literal, Sequence
+from typing import Any, Literal
-import numpy as np
 from defusedxml.ElementTree import parse
-from numpy.typing import NDArray
-from maite_datasets._base import BaseODDataset, DataLocation
-from maite_datasets._mixin._numpy import BaseDatasetNumpyMixin
-from maite_datasets._protocols import Transform
+from maite_datasets._base import (
+    BaseDatasetNumpyMixin,
+    BaseODDataset,
+    DataLocation,
+    NumpyArray,
+    NumpyObjectDetectionTransform,
+)
-class AntiUAVDetection(BaseODDataset[NDArray[np.number[Any]], list[str], str], BaseDatasetNumpyMixin):
+class AntiUAVDetection(BaseODDataset[NumpyArray, list[str], str], BaseDatasetNumpyMixin):
     """
     A UAV detection dataset focused on detecting UAVs in natural images against large variation in backgrounds.
@@ -101,7 +104,7 @@ class AntiUAVDetection(BaseODDataset[NDArray[np.number[Any]], list[str], str], B
         self,
         root: str | Path,
         image_set: Literal["train", "val", "test", "base"] = "train",
-        transforms: Transform[NDArray[np.number[Any]]] | Sequence[Transform[NDArray[np.number[Any]]]] | None = None,
+        transforms: NumpyObjectDetectionTransform | Sequence[NumpyObjectDetectionTransform] | None = None,
         download: bool = False,
         verbose: bool = False,
     ) -> None:

maite-datasets 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

maite-datasets 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl