PyPI - dataeval - Versions diffs - 0.86.0__py3-none-any.whl → 0.86.1__py3-none-any.whl - Mend

dataeval 0.86.0py3-none-any.whl → 0.86.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

dataeval/__init__.py +1 -1
dataeval/_log.py +1 -1
dataeval/config.py +21 -4
dataeval/data/_embeddings.py +2 -2
dataeval/data/_images.py +2 -3
dataeval/data/_metadata.py +48 -37
dataeval/data/_selection.py +1 -2
dataeval/data/_split.py +2 -3
dataeval/data/_targets.py +17 -13
dataeval/data/selections/_classfilter.py +2 -5
dataeval/data/selections/_prioritize.py +6 -9
dataeval/data/selections/_shuffle.py +3 -1
dataeval/detectors/drift/_base.py +4 -5
dataeval/detectors/drift/_mmd.py +3 -6
dataeval/detectors/drift/_nml/_base.py +4 -2
dataeval/detectors/drift/_nml/_chunk.py +11 -19
dataeval/detectors/drift/_nml/_domainclassifier.py +8 -19
dataeval/detectors/drift/_nml/_result.py +8 -9
dataeval/detectors/drift/_nml/_thresholds.py +66 -77
dataeval/detectors/linters/outliers.py +7 -7
dataeval/metrics/bias/_parity.py +10 -13
dataeval/metrics/estimators/_divergence.py +2 -4
dataeval/metrics/stats/_base.py +103 -42
dataeval/metrics/stats/_boxratiostats.py +21 -19
dataeval/metrics/stats/_dimensionstats.py +14 -10
dataeval/metrics/stats/_hashstats.py +1 -1
dataeval/metrics/stats/_pixelstats.py +6 -6
dataeval/metrics/stats/_visualstats.py +3 -3
dataeval/outputs/_base.py +22 -7
dataeval/outputs/_bias.py +26 -28
dataeval/outputs/_drift.py +1 -9
dataeval/outputs/_linters.py +11 -11
dataeval/outputs/_stats.py +82 -23
dataeval/outputs/_workflows.py +2 -2
dataeval/utils/_array.py +6 -9
dataeval/utils/_bin.py +1 -2
dataeval/utils/_clusterer.py +7 -4
dataeval/utils/_fast_mst.py +27 -13
dataeval/utils/_image.py +65 -11
dataeval/utils/_mst.py +1 -3
dataeval/utils/_plot.py +15 -10
dataeval/utils/data/_dataset.py +32 -20
dataeval/utils/data/metadata.py +104 -82
dataeval/utils/datasets/__init__.py +2 -0
dataeval/utils/datasets/_antiuav.py +189 -0
dataeval/utils/datasets/_base.py +11 -8
dataeval/utils/datasets/_cifar10.py +104 -45
dataeval/utils/datasets/_fileio.py +21 -47
dataeval/utils/datasets/_milco.py +19 -11
dataeval/utils/datasets/_mixin.py +2 -4
dataeval/utils/datasets/_mnist.py +3 -4
dataeval/utils/datasets/_ships.py +14 -7
dataeval/utils/datasets/_voc.py +229 -42
dataeval/utils/torch/models.py +5 -10
dataeval/utils/torch/trainer.py +3 -3
dataeval/workflows/sufficiency.py +2 -2
{dataeval-0.86.0.dist-info → dataeval-0.86.1.dist-info}/METADATA +1 -1
dataeval-0.86.1.dist-info/RECORD +114 -0
dataeval/detectors/ood/vae.py +0 -74
dataeval-0.86.0.dist-info/RECORD +0 -114
{dataeval-0.86.0.dist-info → dataeval-0.86.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.86.0.dist-info → dataeval-0.86.1.dist-info}/WHEEL +0 -0

dataeval/utils/datasets/_cifar10.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar
 import numpy as np
 from numpy.typing import NDArray
-from PIL import Image
 from dataeval.utils.datasets._base import BaseICDataset, DataLocation
 from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
@@ -26,7 +25,7 @@ class CIFAR10(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     Parameters
     ----------
     root : str or pathlib.Path
-        Root directory of dataset where the ``mnist`` folder exists.
+        Root directory where the data should be downloaded to or the ``cifar10`` folder of the already downloaded data.
     image_set : "train", "test" or "base", default "train"
         If "base", returns all of the data to allow the user to create their own splits.
     transforms : Transform, Sequence[Transform] or None, default None
@@ -93,50 +92,110 @@ class CIFAR10(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
             verbose,
         )
+    def _load_bin_data(self, data_folder: list[Path]) -> tuple[list[str], list[int], dict[str, Any]]:
+        batch_nums = np.zeros(60000, dtype=np.uint8)
+        all_labels = np.zeros(60000, dtype=np.uint8)
+        all_images = np.zeros((60000, 3, 32, 32), dtype=np.uint8)
+        # Process each batch file, skipping .meta and .html files
+        for batch_file in data_folder:
+            # Get batch parameters
+            batch_type = "test" if "test" in batch_file.stem else "train"
+            batch_num = 5 if batch_type == "test" else int(batch_file.stem.split("_")[-1]) - 1
+            # Load data
+            batch_images, batch_labels = self._unpack_batch_files(batch_file)
+            # Stack data
+            num_images = batch_images.shape[0]
+            batch_start = batch_num * num_images
+            all_images[batch_start : batch_start + num_images] = batch_images
+            all_labels[batch_start : batch_start + num_images] = batch_labels
+            batch_nums[batch_start : batch_start + num_images] = batch_num
+        # Save data
+        self._loaded_data = all_images
+        np.savez(self.path / "cifar10", images=self._loaded_data, labels=all_labels, batches=batch_nums)
+        # Select data
+        image_list = np.arange(all_labels.shape[0]).astype(str)
+        if self.image_set == "train":
+            return (
+                image_list[np.nonzero(batch_nums != 5)[0]].tolist(),
+                all_labels[batch_nums != 5].tolist(),
+                {"batch_num": batch_nums[batch_nums != 5].tolist()},
+            )
+        if self.image_set == "test":
+            return (
+                image_list[np.nonzero(batch_nums == 5)[0]].tolist(),
+                all_labels[batch_nums == 5].tolist(),
+                {"batch_num": batch_nums[batch_nums == 5].tolist()},
+            )
+        return image_list.tolist(), all_labels.tolist(), {"batch_num": batch_nums.tolist()}
     def _load_data_inner(self) -> tuple[list[str], list[int], dict[str, Any]]:
         """Function to load in the file paths for the data and labels and retrieve metadata"""
-        file_meta = {"batch_num": []}
-        raw_data = []
-        labels = []
-        data_folder = self.path / "cifar-10-batches-bin"
-        save_folder = self.path / "images"
-        image_sets: dict[str, list[str]] = {"base": [], "train": [], "test": []}
-        # Process each batch file, skipping .meta and .html files
-        for entry in data_folder.iterdir():
-            if entry.suffix == ".bin":
-                batch_data, batch_labels = self._unpack_batch_files(entry)
-                raw_data.append(batch_data)
-                group = "train" if "test" not in entry.stem else "test"
-                name_split = entry.stem.split("_")
-                batch_num = int(name_split[-1]) - 1 if group == "train" else 5
-                file_names = [
-                    str(save_folder / f"{i + 10000 * batch_num:05d}_{self.index2label[label]}.png")
-                    for i, label in enumerate(batch_labels)
-                ]
-                image_sets["base"].extend(file_names)
-                image_sets[group].extend(file_names)
-                if self.image_set in (group, "base"):
-                    labels.extend(batch_labels)
-                    file_meta["batch_num"].extend([batch_num] * len(labels))
-        # Stack and reshape images
-        images = np.vstack(raw_data).reshape(-1, 3, 32, 32)
-        # Save the raw data into images if not already there
-        if not save_folder.exists():
-            save_folder.mkdir(exist_ok=True)
-            for i, file in enumerate(image_sets["base"]):
-                Image.fromarray(images[i].transpose(1, 2, 0).astype(np.uint8)).save(file)
-        return image_sets[self.image_set], labels, file_meta
-    def _unpack_batch_files(self, file_path: Path) -> tuple[NDArray[Any], list[int]]:
+        data_file = self.path / "cifar10.npz"
+        if not data_file.exists():
+            data_folder = sorted((self.path / "cifar-10-batches-bin").glob("*.bin"))
+            if not data_folder:
+                raise FileNotFoundError
+            return self._load_bin_data(data_folder)
+        # Load data
+        data = np.load(data_file)
+        self._loaded_data = data["images"]
+        all_labels = data["labels"]
+        batch_nums = data["batches"]
+        # Select data
+        image_list = np.arange(all_labels.shape[0]).astype(str)
+        if self.image_set == "train":
+            return (
+                image_list[np.nonzero(batch_nums != 5)[0]].tolist(),
+                all_labels[batch_nums != 5].tolist(),
+                {"batch_num": batch_nums[batch_nums != 5].tolist()},
+            )
+        if self.image_set == "test":
+            return (
+                image_list[np.nonzero(batch_nums == 5)[0]].tolist(),
+                all_labels[batch_nums == 5].tolist(),
+                {"batch_num": batch_nums[batch_nums == 5].tolist()},
+            )
+        return image_list.tolist(), all_labels.tolist(), {"batch_num": batch_nums.tolist()}
+    def _unpack_batch_files(self, file_path: Path) -> tuple[NDArray[np.uint8], NDArray[np.uint8]]:
         # Load pickle data with latin1 encoding
         with file_path.open("rb") as f:
-            buffer = np.frombuffer(f.read(), "B")
-            labels = buffer[::3073]
-            pixels = np.delete(buffer, np.arange(0, buffer.size, 3073))
-            images = pixels.reshape(-1, 3072)
-        return images, labels.tolist()
+            buffer = np.frombuffer(f.read(), dtype=np.uint8)
+            # Each entry is 1 byte for label + 3072 bytes for image (3*32*32)
+            entry_size = 1 + 3072
+            num_entries = buffer.size // entry_size
+            # Extract labels (first byte of each entry)
+            labels = buffer[::entry_size]
+            # Extract image data and reshape to (N, 3, 32, 32)
+            images = np.zeros((num_entries, 3, 32, 32), dtype=np.uint8)
+            for i in range(num_entries):
+                # Skip the label byte and get image data for this entry
+                start_idx = i * entry_size + 1  # +1 to skip label
+                img_flat = buffer[start_idx : start_idx + 3072]
+                # The CIFAR format stores channels in blocks (all R, then all G, then all B)
+                # Each channel block is 1024 bytes (32x32)
+                red_channel = img_flat[0:1024].reshape(32, 32)
+                green_channel = img_flat[1024:2048].reshape(32, 32)
+                blue_channel = img_flat[2048:3072].reshape(32, 32)
+                # Stack the channels in the proper C×H×W format
+                images[i, 0] = red_channel  # Red channel
+                images[i, 1] = green_channel  # Green channel
+                images[i, 2] = blue_channel  # Blue channel
+        return images, labels
+    def _read_file(self, path: str) -> NDArray[Any]:
+        """
+        Function to grab the correct image from the loaded data.
+        Overwrite of the base `_read_file` because data is an all or nothing load.
+        """
+        index = int(path)
+        return self._loaded_data[index]

dataeval/utils/datasets/_fileio.py CHANGED Viewed

@@ -3,7 +3,6 @@ from __future__ import annotations
 __all__ = []
 import hashlib
-import shutil
 import tarfile
 import zipfile
 from pathlib import Path
@@ -15,7 +14,12 @@ ARCHIVE_ENDINGS = [".zip", ".tar", ".tgz"]
 COMPRESS_ENDINGS = [".gz", ".bz2"]
-def _validate_file(fpath, file_md5, md5: bool = False, chunk_size=65535) -> bool:
+def _print(text: str, verbose: bool) -> None:
+    if verbose:
+        print(text)
+def _validate_file(fpath: Path | str, file_md5: str, md5: bool = False, chunk_size: int = 65535) -> bool:
     hasher = hashlib.md5(usedforsecurity=False) if md5 else hashlib.sha256()
     with open(fpath, "rb") as fpath_file:
         while chunk := fpath_file.read(chunk_size):
@@ -23,7 +27,7 @@ def _validate_file(fpath, file_md5, md5: bool = False, chunk_size=65535) -> bool
     return hasher.hexdigest() == file_md5
-def _download_dataset(url: str, file_path: Path, timeout: int = 60) -> None:
+def _download_dataset(url: str, file_path: Path, timeout: int = 60, verbose: bool = False) -> None:
     """Download a single resource from its URL to the `data_folder`."""
     error_msg = "URL fetch failure on {}: {} -- {}"
     try:
@@ -36,7 +40,7 @@ def _download_dataset(url: str, file_path: Path, timeout: int = 60) -> None:
     total_size = int(response.headers.get("content-length", 0))
     block_size = 8192  # 8 KB
-    progress_bar = tqdm(total=total_size, unit="iB", unit_scale=True)
+    progress_bar = tqdm(total=total_size, unit="iB", unit_scale=True, disable=not verbose)
     with open(file_path, "wb") as f:
         for chunk in response.iter_content(block_size):
@@ -49,7 +53,7 @@ def _extract_zip_archive(file_path: Path, extract_to: Path) -> None:
     """Extracts the zip file to the given directory."""
     try:
         with zipfile.ZipFile(file_path, "r") as zip_ref:
-            zip_ref.extractall(extract_to)
+            zip_ref.extractall(extract_to)  # noqa: S202
             file_path.unlink()
     except zipfile.BadZipFile:
         raise FileNotFoundError(f"{file_path.name} is not a valid zip file, skipping extraction.")
@@ -59,36 +63,15 @@ def _extract_tar_archive(file_path: Path, extract_to: Path) -> None:
     """Extracts a tar file (or compressed tar) to the specified directory."""
     try:
         with tarfile.open(file_path, "r:*") as tar_ref:
-            tar_ref.extractall(extract_to)
+            tar_ref.extractall(extract_to)  # noqa: S202
             file_path.unlink()
     except tarfile.TarError:
         raise FileNotFoundError(f"{file_path.name} is not a valid tar file, skipping extraction.")
-def _flatten_extraction(base_directory: Path, verbose: bool = False) -> None:
-    """
-    If the extracted folder contains only directories (and no files),
-    move all its subfolders to the dataset_dir and remove the now-empty folder.
-    """
-    for child in base_directory.iterdir():
-        if child.is_dir():
-            inner_list = list(child.iterdir())
-            if all(subchild.is_dir() for subchild in inner_list):
-                for subchild in child.iterdir():
-                    if verbose:
-                        print(f"Moving {subchild.stem} to {base_directory}")
-                    shutil.move(subchild, base_directory)
-                if verbose:
-                    print(f"Removing empty folder {child.stem}")
-                child.rmdir()
-                # Checking for additional placeholder folders
-                if len(inner_list) == 1:
-                    _flatten_extraction(base_directory, verbose)
-def _archive_extraction(file_ext, file_path, directory, compression: bool = False, verbose: bool = False):
+def _extract_archive(
+    file_ext: str, file_path: Path, directory: Path, compression: bool = False, verbose: bool = False
+) -> None:
     """
     Single function to extract and then flatten if necessary.
     Recursively extracts nested zip files as well.
@@ -102,14 +85,9 @@ def _archive_extraction(file_ext, file_path, directory, compression: bool = Fals
     # Does NOT extract in place - extracts everything to directory
     for child in directory.iterdir():
         if child.suffix == ".zip":
-            if verbose:
-                print(f"Extracting nested zip: {child} to {directory}")
+            _print(f"Extracting nested zip: {child} to {directory}", verbose)
             _extract_zip_archive(child, directory)
-    # Determine if there are nested folders and remove them
-    # Helps ensure there that data is at most one folder below main directory
-    _flatten_extraction(directory, verbose)
 def _ensure_exists(
     url: str,
@@ -137,18 +115,16 @@ def _ensure_exists(
     # Download file if it doesn't exist.
     if not check_path.exists() and download:
-        if verbose:
-            print(f"Downloading {filename} from {url}")
-        _download_dataset(url, check_path)
+        _print(f"Downloading {filename} from {url}", verbose)
+        _download_dataset(url, check_path, verbose=verbose)
         if not _validate_file(check_path, checksum, md5):
             raise Exception("File checksum mismatch. Remove current file and retry download.")
         # If the file is a zip, tar or tgz extract it into the designated folder.
         if file_ext in ARCHIVE_ENDINGS:
-            if verbose:
-                print(f"Extracting {filename}...")
-            _archive_extraction(file_ext, check_path, directory, compression, verbose)
+            _print(f"Extracting {filename}...", verbose)
+            _extract_archive(file_ext, check_path, directory, compression, verbose)
     elif not check_path.exists() and not download:
         raise FileNotFoundError(
@@ -159,10 +135,8 @@ def _ensure_exists(
     else:
         if not _validate_file(check_path, checksum, md5):
             raise Exception("File checksum mismatch. Remove current file and retry download.")
-        if verbose:
-            print(f"{filename} already exists, skipping download.")
+        _print(f"{filename} already exists, skipping download.", verbose)
         if file_ext in ARCHIVE_ENDINGS:
-            if verbose:
-                print(f"Extracting {filename}...")
-            _archive_extraction(file_ext, check_path, directory, compression, verbose)
+            _print(f"Extracting {filename}...", verbose)
+            _extract_archive(file_ext, check_path, directory, compression, verbose)

dataeval/utils/datasets/_milco.py CHANGED Viewed

@@ -38,7 +38,7 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     Parameters
     ----------
     root : str or pathlib.Path
-        Root directory of dataset where the ``milco`` folder exists.
+        Root directory where the data should be downloaded to or the ``milco`` folder of the already downloaded data.
     image_set: "train", "operational", or "base", default "train"
         If "train", then the images from 2015, 2017 and 2021 are selected,
         resulting in 315 MILCO objects and 177 NOMBO objects.
@@ -128,6 +128,7 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
             download,
             verbose,
         )
+        self._bboxes_per_size = True
     def _load_data(self) -> tuple[list[str], list[str], dict[str, list[Any]]]:
         filepaths: list[str] = []
@@ -160,15 +161,17 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     def _load_data_inner(self) -> tuple[list[str], list[str], dict[str, Any]]:
         file_data = {"year": [], "image_id": [], "data_path": [], "label_path": []}
-        data_folder = self.path / self._resource.filename[:-4]
-        for entry in data_folder.iterdir():
-            if entry.is_file() and entry.suffix == ".jpg":
-                # Remove file extension and split by "_"
-                parts = entry.stem.split("_")
-                file_data["image_id"].append(parts[0])
-                file_data["year"].append(parts[1])
-                file_data["data_path"].append(str(entry))
-                file_data["label_path"].append(str(entry.parent / entry.stem) + ".txt")
+        data_folder = sorted((self.path / self._resource.filename[:-4]).glob("*.jpg"))
+        if not data_folder:
+            raise FileNotFoundError
+        for entry in data_folder:
+            # Remove file extension and split by "_"
+            parts = entry.stem.split("_")
+            file_data["image_id"].append(parts[0])
+            file_data["year"].append(parts[1])
+            file_data["data_path"].append(str(entry))
+            file_data["label_path"].append(str(entry.parent / entry.stem) + ".txt")
         data = file_data.pop("data_path")
         annotations = file_data.pop("label_path")
@@ -182,6 +185,11 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
             for line in f.readlines():
                 out = line.strip().split(" ")
                 labels.append(int(out[0]))
-                boxes.append([float(out[1]), float(out[2]), float(out[3]), float(out[4])])
+                xcenter, ycenter, width, height = [float(out[1]), float(out[2]), float(out[3]), float(out[4])]
+                x0 = xcenter - width / 2
+                x1 = x0 + width
+                y0 = ycenter - height / 2
+                y1 = y0 + height
+                boxes.append([x0, y0, x1, y1])
         return boxes, labels, {}

dataeval/utils/datasets/_mixin.py CHANGED Viewed

@@ -34,8 +34,7 @@ class BaseDatasetNumpyMixin(BaseDatasetMixin[NDArray[Any]]):
         return encoded
     def _read_file(self, path: str) -> NDArray[Any]:
-        x = np.array(Image.open(path)).transpose(2, 0, 1)
-        return x
+        return np.array(Image.open(path)).transpose(2, 0, 1)
 class BaseDatasetTorchMixin(BaseDatasetMixin[torch.Tensor]):
@@ -52,5 +51,4 @@ class BaseDatasetTorchMixin(BaseDatasetMixin[torch.Tensor]):
         return encoded
     def _read_file(self, path: str) -> torch.Tensor:
-        x = torch.as_tensor(np.array(Image.open(path)).transpose(2, 0, 1))
-        return x
+        return torch.as_tensor(np.array(Image.open(path)).transpose(2, 0, 1))

dataeval/utils/datasets/_mnist.py CHANGED Viewed

@@ -48,7 +48,7 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     Parameters
     ----------
     root : str or pathlib.Path
-        Root directory of dataset where the ``mnist`` folder exists.
+        Root directory where the data should be downloaded to or the ``minst`` folder of the already downloaded data.
     image_set : "train", "test" or "base", default "train"
         If "base", returns all of the data to allow the user to create their own splits.
     corruption : "identity", "shot_noise", "impulse_noise", "glass_blur", "motion_blur", \
@@ -154,7 +154,7 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     def _load_corruption(self) -> tuple[NDArray[Any], NDArray[np.uintp]]:
         """Function to load in the file paths for the data and labels for the different corrupt data formats"""
         corruption = self.corruption if self.corruption is not None else "identity"
-        base_path = self.path / corruption
+        base_path = self.path / "mnist_c" / corruption
         if self.image_set == "base":
             raw_data = []
             raw_labels = []
@@ -191,8 +191,7 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     def _grab_corruption_data(self, path: Path) -> NDArray[Any]:
         """Function to load in the data numpy array for the previously chosen corrupt format"""
-        x = np.load(path, allow_pickle=False)
-        return x
+        return np.load(path, allow_pickle=False)
     def _read_file(self, path: str) -> NDArray[Any]:
         """

dataeval/utils/datasets/_ships.py CHANGED Viewed

@@ -30,7 +30,7 @@ class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     Parameters
     ----------
     root : str or pathlib.Path
-        Root directory of dataset where the ``shipdataset`` folder exists.
+        Root directory where the data should be downloaded to or the ``ships`` folder of the already downloaded data.
     transforms : Transform, Sequence[Transform] or None, default None
         Transform(s) to apply to the data.
     download : bool, default False
@@ -90,14 +90,23 @@ class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
             verbose,
         )
         self._scenes: list[str] = self._load_scenes()
+        self._remove_extraneous_json_file()
+    def _remove_extraneous_json_file(self) -> None:
+        json_path = self.path / "shipsnet.json"
+        if json_path.exists():
+            json_path.unlink()
     def _load_data_inner(self) -> tuple[list[str], list[int], dict[str, Any]]:
         """Function to load in the file paths for the data and labels"""
         file_data = {"label": [], "scene_id": [], "longitude": [], "latitude": [], "path": []}
-        data_folder = self.path / "shipsnet"
-        for entry in data_folder.iterdir():
+        data_folder = sorted((self.path / "shipsnet").glob("*.png"))
+        if not data_folder:
+            raise FileNotFoundError
+        for entry in data_folder:
             # Remove file extension and split by "_"
-            parts = entry.stem.split("__")  # Removes ".png" and splits the string
+            parts = entry.stem.split("__")
             file_data["label"].append(int(parts[0]))
             file_data["scene_id"].append(parts[1])
             lat_lon = parts[2].split("_")
@@ -110,9 +119,7 @@ class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
     def _load_scenes(self) -> list[str]:
         """Function to load in the file paths for the scene images"""
-        data_folder = self.path / "scenes"
-        scene = [str(entry) for entry in data_folder.iterdir()]
-        return scene
+        return sorted(str(entry) for entry in (self.path / "scenes").glob("*.png"))
     def get_scene(self, index: int) -> NDArray[np.uintp]:
         """

dataeval 0.86.0__py3-none-any.whl → 0.86.1__py3-none-any.whl

dataeval 0.86.0py3-none-any.whl → 0.86.1py3-none-any.whl