dataeval 0.86.9__py3-none-any.whl → 0.88.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_log.py +1 -1
- dataeval/_version.py +2 -2
- dataeval/config.py +4 -19
- dataeval/data/_embeddings.py +78 -35
- dataeval/data/_images.py +41 -8
- dataeval/data/_metadata.py +348 -66
- dataeval/data/_selection.py +22 -7
- dataeval/data/_split.py +3 -2
- dataeval/data/selections/_classbalance.py +4 -3
- dataeval/data/selections/_classfilter.py +9 -8
- dataeval/data/selections/_indices.py +4 -3
- dataeval/data/selections/_prioritize.py +249 -29
- dataeval/data/selections/_reverse.py +1 -1
- dataeval/data/selections/_shuffle.py +5 -4
- dataeval/detectors/drift/_base.py +2 -1
- dataeval/detectors/drift/_mmd.py +2 -1
- dataeval/detectors/drift/_nml/_base.py +1 -1
- dataeval/detectors/drift/_nml/_chunk.py +2 -1
- dataeval/detectors/drift/_nml/_result.py +3 -2
- dataeval/detectors/drift/_nml/_thresholds.py +6 -5
- dataeval/detectors/drift/_uncertainty.py +2 -1
- dataeval/detectors/linters/duplicates.py +2 -1
- dataeval/detectors/linters/outliers.py +4 -3
- dataeval/detectors/ood/__init__.py +2 -1
- dataeval/detectors/ood/ae.py +1 -1
- dataeval/detectors/ood/base.py +39 -1
- dataeval/detectors/ood/knn.py +95 -0
- dataeval/detectors/ood/mixin.py +2 -1
- dataeval/metadata/_utils.py +1 -1
- dataeval/metrics/bias/_balance.py +29 -22
- dataeval/metrics/bias/_diversity.py +4 -4
- dataeval/metrics/bias/_parity.py +2 -2
- dataeval/metrics/stats/_base.py +3 -29
- dataeval/metrics/stats/_boxratiostats.py +2 -1
- dataeval/metrics/stats/_dimensionstats.py +2 -1
- dataeval/metrics/stats/_hashstats.py +21 -3
- dataeval/metrics/stats/_pixelstats.py +2 -1
- dataeval/metrics/stats/_visualstats.py +2 -1
- dataeval/outputs/_base.py +2 -3
- dataeval/outputs/_bias.py +2 -1
- dataeval/outputs/_estimators.py +1 -1
- dataeval/outputs/_linters.py +3 -3
- dataeval/outputs/_stats.py +3 -3
- dataeval/outputs/_utils.py +1 -1
- dataeval/outputs/_workflows.py +49 -31
- dataeval/typing.py +23 -9
- dataeval/utils/__init__.py +2 -2
- dataeval/utils/_array.py +3 -2
- dataeval/utils/_bin.py +9 -7
- dataeval/utils/_method.py +2 -3
- dataeval/utils/_multiprocessing.py +34 -0
- dataeval/utils/_plot.py +2 -1
- dataeval/utils/data/__init__.py +6 -5
- dataeval/utils/data/{metadata.py → _merge.py} +3 -2
- dataeval/utils/data/_validate.py +170 -0
- dataeval/utils/data/collate.py +2 -1
- dataeval/utils/torch/_internal.py +2 -1
- dataeval/utils/torch/trainer.py +1 -1
- dataeval/workflows/sufficiency.py +13 -9
- {dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/METADATA +8 -21
- dataeval-0.88.0.dist-info/RECORD +105 -0
- dataeval/utils/data/_dataset.py +0 -246
- dataeval/utils/datasets/__init__.py +0 -21
- dataeval/utils/datasets/_antiuav.py +0 -189
- dataeval/utils/datasets/_base.py +0 -266
- dataeval/utils/datasets/_cifar10.py +0 -201
- dataeval/utils/datasets/_fileio.py +0 -142
- dataeval/utils/datasets/_milco.py +0 -197
- dataeval/utils/datasets/_mixin.py +0 -54
- dataeval/utils/datasets/_mnist.py +0 -202
- dataeval/utils/datasets/_seadrone.py +0 -512
- dataeval/utils/datasets/_ships.py +0 -144
- dataeval/utils/datasets/_types.py +0 -48
- dataeval/utils/datasets/_voc.py +0 -583
- dataeval-0.86.9.dist-info/RECORD +0 -115
- {dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/WHEEL +0 -0
- /dataeval-0.86.9.dist-info/licenses/LICENSE.txt → /dataeval-0.88.0.dist-info/licenses/LICENSE +0 -0
@@ -1,144 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
__all__ = []
|
4
|
-
|
5
|
-
from pathlib import Path
|
6
|
-
from typing import TYPE_CHECKING, Any, Sequence
|
7
|
-
|
8
|
-
import numpy as np
|
9
|
-
from numpy.typing import NDArray
|
10
|
-
|
11
|
-
from dataeval.utils.datasets._base import BaseICDataset, DataLocation
|
12
|
-
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
13
|
-
|
14
|
-
if TYPE_CHECKING:
|
15
|
-
from dataeval.typing import Transform
|
16
|
-
|
17
|
-
|
18
|
-
class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
19
|
-
"""
|
20
|
-
A dataset that focuses on identifying ships from satellite images.
|
21
|
-
|
22
|
-
The dataset comes from kaggle,
|
23
|
-
`Ships in Satellite Imagery <https://www.kaggle.com/datasets/rhammell/ships-in-satellite-imagery>`_.
|
24
|
-
The images come from Planet satellite imagery when they gave
|
25
|
-
`open-access to a portion of their data <https://www.planet.com/pulse/open-california-rapideye-data/>`_.
|
26
|
-
|
27
|
-
There are 4000 80x80x3 (HWC) images of ships, sea, and land.
|
28
|
-
There are also 8 larger scene images similar to what would be operationally provided.
|
29
|
-
|
30
|
-
Parameters
|
31
|
-
----------
|
32
|
-
root : str or pathlib.Path
|
33
|
-
Root directory where the data should be downloaded to or the ``ships`` folder of the already downloaded data.
|
34
|
-
transforms : Transform, Sequence[Transform] or None, default None
|
35
|
-
Transform(s) to apply to the data.
|
36
|
-
download : bool, default False
|
37
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
38
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
39
|
-
verbose : bool, default False
|
40
|
-
If True, outputs print statements.
|
41
|
-
|
42
|
-
Attributes
|
43
|
-
----------
|
44
|
-
path : pathlib.Path
|
45
|
-
Location of the folder containing the data.
|
46
|
-
image_set : "base"
|
47
|
-
The base image set is the only available image set for the Ships dataset.
|
48
|
-
index2label : dict[int, str]
|
49
|
-
Dictionary which translates from class integers to the associated class strings.
|
50
|
-
label2index : dict[str, int]
|
51
|
-
Dictionary which translates from class strings to the associated class integers.
|
52
|
-
metadata : DatasetMetadata
|
53
|
-
Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
|
54
|
-
transforms : Sequence[Transform]
|
55
|
-
The transforms to be applied to the data.
|
56
|
-
size : int
|
57
|
-
The size of the dataset.
|
58
|
-
|
59
|
-
Note
|
60
|
-
----
|
61
|
-
Data License: `CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0/>`_
|
62
|
-
"""
|
63
|
-
|
64
|
-
_resources = [
|
65
|
-
DataLocation(
|
66
|
-
url="https://zenodo.org/record/3611230/files/ships-in-satellite-imagery.zip",
|
67
|
-
filename="ships-in-satellite-imagery.zip",
|
68
|
-
md5=True,
|
69
|
-
checksum="b2e8a41ed029592b373bd72ee4b89f32",
|
70
|
-
),
|
71
|
-
]
|
72
|
-
|
73
|
-
index2label: dict[int, str] = {
|
74
|
-
0: "no ship",
|
75
|
-
1: "ship",
|
76
|
-
}
|
77
|
-
|
78
|
-
def __init__(
|
79
|
-
self,
|
80
|
-
root: str | Path,
|
81
|
-
transforms: Transform[NDArray[Any]] | Sequence[Transform[NDArray[Any]]] | None = None,
|
82
|
-
download: bool = False,
|
83
|
-
verbose: bool = False,
|
84
|
-
) -> None:
|
85
|
-
super().__init__(
|
86
|
-
root,
|
87
|
-
"base",
|
88
|
-
transforms,
|
89
|
-
download,
|
90
|
-
verbose,
|
91
|
-
)
|
92
|
-
self._scenes: list[str] = self._load_scenes()
|
93
|
-
self._remove_extraneous_json_file()
|
94
|
-
|
95
|
-
def _remove_extraneous_json_file(self) -> None:
|
96
|
-
json_path = self.path / "shipsnet.json"
|
97
|
-
if json_path.exists():
|
98
|
-
json_path.unlink()
|
99
|
-
|
100
|
-
def _load_data_inner(self) -> tuple[list[str], list[int], dict[str, Any]]:
|
101
|
-
"""Function to load in the file paths for the data and labels"""
|
102
|
-
file_data = {"label": [], "scene_id": [], "longitude": [], "latitude": [], "path": []}
|
103
|
-
data_folder = sorted((self.path / "shipsnet").glob("*.png"))
|
104
|
-
if not data_folder:
|
105
|
-
raise FileNotFoundError
|
106
|
-
|
107
|
-
for entry in data_folder:
|
108
|
-
# Remove file extension and split by "_"
|
109
|
-
parts = entry.stem.split("__")
|
110
|
-
file_data["label"].append(int(parts[0]))
|
111
|
-
file_data["scene_id"].append(parts[1])
|
112
|
-
lat_lon = parts[2].split("_")
|
113
|
-
file_data["longitude"].append(float(lat_lon[0]))
|
114
|
-
file_data["latitude"].append(float(lat_lon[1]))
|
115
|
-
file_data["path"].append(entry)
|
116
|
-
data = file_data.pop("path")
|
117
|
-
labels = file_data.pop("label")
|
118
|
-
return data, labels, file_data
|
119
|
-
|
120
|
-
def _load_scenes(self) -> list[str]:
|
121
|
-
"""Function to load in the file paths for the scene images"""
|
122
|
-
return sorted(str(entry) for entry in (self.path / "scenes").glob("*.png"))
|
123
|
-
|
124
|
-
def get_scene(self, index: int) -> NDArray[np.uintp]:
|
125
|
-
"""
|
126
|
-
Get the desired satellite image (scene) by passing in the index of the desired file.
|
127
|
-
|
128
|
-
Args
|
129
|
-
----
|
130
|
-
index : int
|
131
|
-
Value of the desired data point
|
132
|
-
|
133
|
-
Returns
|
134
|
-
-------
|
135
|
-
NDArray[np.uintp]
|
136
|
-
Scene image
|
137
|
-
|
138
|
-
Note
|
139
|
-
----
|
140
|
-
The scene will be returned with the channel axis first.
|
141
|
-
"""
|
142
|
-
scene = self._read_file(self._scenes[index])
|
143
|
-
np.moveaxis(scene, -1, 0)
|
144
|
-
return scene
|
@@ -1,48 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
__all__ = []
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from typing import Any, Generic, TypedDict, TypeVar
|
7
|
-
|
8
|
-
from torch.utils.data import Dataset
|
9
|
-
from typing_extensions import NotRequired, Required
|
10
|
-
|
11
|
-
|
12
|
-
class DatasetMetadata(TypedDict):
|
13
|
-
id: Required[str]
|
14
|
-
index2label: NotRequired[dict[int, str]]
|
15
|
-
split: NotRequired[str]
|
16
|
-
|
17
|
-
|
18
|
-
_TDatum = TypeVar("_TDatum")
|
19
|
-
_TArray = TypeVar("_TArray")
|
20
|
-
|
21
|
-
|
22
|
-
class AnnotatedDataset(Dataset[_TDatum]):
|
23
|
-
metadata: DatasetMetadata
|
24
|
-
|
25
|
-
def __len__(self) -> int: ...
|
26
|
-
|
27
|
-
|
28
|
-
class ImageClassificationDataset(AnnotatedDataset[tuple[_TArray, _TArray, dict[str, Any]]]): ...
|
29
|
-
|
30
|
-
|
31
|
-
@dataclass
|
32
|
-
class ObjectDetectionTarget(Generic[_TArray]):
|
33
|
-
boxes: _TArray
|
34
|
-
labels: _TArray
|
35
|
-
scores: _TArray
|
36
|
-
|
37
|
-
|
38
|
-
class ObjectDetectionDataset(AnnotatedDataset[tuple[_TArray, ObjectDetectionTarget[_TArray], dict[str, Any]]]): ...
|
39
|
-
|
40
|
-
|
41
|
-
@dataclass
|
42
|
-
class SegmentationTarget(Generic[_TArray]):
|
43
|
-
mask: _TArray
|
44
|
-
labels: _TArray
|
45
|
-
scores: _TArray
|
46
|
-
|
47
|
-
|
48
|
-
class SegmentationDataset(AnnotatedDataset[tuple[_TArray, SegmentationTarget[_TArray], dict[str, Any]]]): ...
|