maite-datasets 0.0.5__tar.gz → 0.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/.gitignore +5 -1
- maite_datasets-0.0.7/PKG-INFO +181 -0
- maite_datasets-0.0.7/README.md +154 -0
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/pyproject.toml +25 -7
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/__init__.py +2 -6
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/_base.py +169 -51
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/_builder.py +46 -55
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/_collate.py +2 -3
- maite_datasets-0.0.5/src/maite_datasets/_reader/_base.py → maite_datasets-0.0.7/src/maite_datasets/_reader.py +62 -36
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/_validate.py +4 -2
- maite_datasets-0.0.7/src/maite_datasets/adapters/__init__.py +3 -0
- maite_datasets-0.0.7/src/maite_datasets/adapters/_huggingface.py +391 -0
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/image_classification/_cifar10.py +12 -7
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/image_classification/_mnist.py +15 -10
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/image_classification/_ships.py +12 -8
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/object_detection/__init__.py +4 -7
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/object_detection/_antiuav.py +11 -8
- {maite_datasets-0.0.5/src/maite_datasets/_reader → maite_datasets-0.0.7/src/maite_datasets/object_detection}/_coco.py +29 -27
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/object_detection/_milco.py +11 -9
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/object_detection/_seadrone.py +11 -9
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/object_detection/_voc.py +11 -13
- {maite_datasets-0.0.5/src/maite_datasets/_reader → maite_datasets-0.0.7/src/maite_datasets/object_detection}/_yolo.py +26 -21
- maite_datasets-0.0.7/src/maite_datasets/protocols.py +94 -0
- maite_datasets-0.0.7/src/maite_datasets/wrappers/__init__.py +8 -0
- maite_datasets-0.0.7/src/maite_datasets/wrappers/_torch.py +109 -0
- maite_datasets-0.0.5/PKG-INFO +0 -91
- maite_datasets-0.0.5/README.md +0 -65
- maite_datasets-0.0.5/src/maite_datasets/_mixin/__init__.py +0 -0
- maite_datasets-0.0.5/src/maite_datasets/_mixin/_numpy.py +0 -28
- maite_datasets-0.0.5/src/maite_datasets/_mixin/_torch.py +0 -28
- maite_datasets-0.0.5/src/maite_datasets/_protocols.py +0 -217
- maite_datasets-0.0.5/src/maite_datasets/_reader/__init__.py +0 -6
- maite_datasets-0.0.5/src/maite_datasets/_reader/_factory.py +0 -64
- maite_datasets-0.0.5/src/maite_datasets/_types.py +0 -50
- maite_datasets-0.0.5/src/maite_datasets/object_detection/_voc_torch.py +0 -65
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/LICENSE +0 -0
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/_fileio.py +0 -0
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/image_classification/__init__.py +0 -0
- {maite_datasets-0.0.5 → maite_datasets-0.0.7}/src/maite_datasets/py.typed +0 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: maite-datasets
|
3
|
+
Version: 0.0.7
|
4
|
+
Summary: A collection of Image Classification and Object Detection task datasets conforming to the MAITE protocol.
|
5
|
+
Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
|
6
|
+
License-Expression: MIT
|
7
|
+
License-File: LICENSE
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
9
|
+
Classifier: Framework :: Pytest
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Operating System :: OS Independent
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
17
|
+
Requires-Python: >=3.10
|
18
|
+
Requires-Dist: defusedxml>=0.7.1
|
19
|
+
Requires-Dist: maite<0.9,>=0.7
|
20
|
+
Requires-Dist: numpy>=1.24.2
|
21
|
+
Requires-Dist: pillow>=10.3.0
|
22
|
+
Requires-Dist: requests>=2.32.3
|
23
|
+
Requires-Dist: typing-extensions>=4.12
|
24
|
+
Provides-Extra: tqdm
|
25
|
+
Requires-Dist: tqdm>=4.66; extra == 'tqdm'
|
26
|
+
Description-Content-Type: text/markdown
|
27
|
+
|
28
|
+
# MAITE Datasets
|
29
|
+
|
30
|
+
MAITE Datasets are a collection of public datasets wrapped in a [MAITE](https://mit-ll-ai-technology.github.io/maite/) compliant format.
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
To install and use `maite-datasets` you can use pip:
|
35
|
+
|
36
|
+
```bash
|
37
|
+
pip install maite-datasets
|
38
|
+
```
|
39
|
+
|
40
|
+
For status bar indicators when downloading, you can include the extra `tqdm` when installing:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
pip install maite-datasets[tqdm]
|
44
|
+
```
|
45
|
+
|
46
|
+
## Available Downloadable Datasets
|
47
|
+
|
48
|
+
| Task | Dataset | Description |
|
49
|
+
|----------------|------------------|---------------------------------------------------------------------|
|
50
|
+
| Classification | CIFAR10 | [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset. |
|
51
|
+
| Classification | MNIST | A dataset of hand-written digits. |
|
52
|
+
| Classification | Ships | A dataset that focuses on identifying ships from satellite images. |
|
53
|
+
| Detection | AntiUAVDetection | A UAV detection dataset in natural images with varying backgrounds. |
|
54
|
+
| Detection | MILCO | A side-scan sonar dataset focused on mine-like object detection. |
|
55
|
+
| Detection | Seadrone | A UAV dataset focused on open water object detection. |
|
56
|
+
| Detection | VOCDetection | [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) dataset. |
|
57
|
+
|
58
|
+
### Usage
|
59
|
+
|
60
|
+
Here is an example of how to import MNIST for usage with your workflow.
|
61
|
+
|
62
|
+
```python
|
63
|
+
>>> from maite_datasets.image_classification import MNIST
|
64
|
+
|
65
|
+
>>> mnist = MNIST(root="data", download=True)
|
66
|
+
>>> print(mnist)
|
67
|
+
MNIST Dataset
|
68
|
+
-------------
|
69
|
+
Corruption: None
|
70
|
+
Transforms: []
|
71
|
+
Image_set: train
|
72
|
+
Metadata: {'id': 'MNIST_train', 'index2label': {0: 'zero', 1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six', 7: 'seven', 8: 'eight', 9: 'nine'}, 'split': 'train'}
|
73
|
+
Path: /home/user/maite-datasets/data/mnist
|
74
|
+
Size: 60000
|
75
|
+
|
76
|
+
>>> print("tuple("+", ".join([str(type(t)) for t in mnist[0]])+")")
|
77
|
+
tuple(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>, <class 'dict'>)
|
78
|
+
```
|
79
|
+
|
80
|
+
## Dataset Wrappers
|
81
|
+
|
82
|
+
Wrappers provide a way to convert datasets to allow usage of tools within specific backend frameworks.
|
83
|
+
|
84
|
+
### Torchvision
|
85
|
+
|
86
|
+
`TorchvisionWrapper` is a convenience class that wraps any of the datasets and provides the capability to apply
|
87
|
+
`torchvision` transforms to the dataset.
|
88
|
+
|
89
|
+
**NOTE:** `TorchvisionWrapper` requires _torch_ and _torchvision_ to be installed.
|
90
|
+
|
91
|
+
```python
|
92
|
+
>>> from maite_datasets.object_detection import MILCO
|
93
|
+
|
94
|
+
>>> milco = MILCO(root="data", download=True)
|
95
|
+
>>> print(milco)
|
96
|
+
MILCO Dataset
|
97
|
+
-------------
|
98
|
+
Transforms: []
|
99
|
+
Image Set: train
|
100
|
+
Metadata: {'id': 'MILCO_train', 'index2label': {0: 'MILCO', 1: 'NOMBO'}, 'split': 'train'}
|
101
|
+
Path: /home/user/maite-datasets/data/milco
|
102
|
+
Size: 261
|
103
|
+
|
104
|
+
>>> print(f"type={milco[0][0].__class__.__name__}, shape={milco[0][0].shape}")
|
105
|
+
type=ndarray, shape=(3, 1024, 1024)
|
106
|
+
|
107
|
+
>>> print(milco[0][1].boxes[0])
|
108
|
+
[ 75. 217. 130. 247.]
|
109
|
+
|
110
|
+
>>> from maite_datasets.wrappers import TorchvisionWrapper
|
111
|
+
>>> from torchvision.transforms.v2 import Resize
|
112
|
+
|
113
|
+
>>> milco_torch = TorchvisionWrapper(milco, transforms=Resize(224))
|
114
|
+
>>> print(milco_torch)
|
115
|
+
Torchvision Wrapped MILCO Dataset
|
116
|
+
---------------------------
|
117
|
+
Transforms: Resize(size=[224], interpolation=InterpolationMode.BILINEAR, antialias=True)
|
118
|
+
|
119
|
+
MILCO Dataset
|
120
|
+
-------------
|
121
|
+
Transforms: []
|
122
|
+
Image Set: train
|
123
|
+
Metadata: {'id': 'MILCO_train', 'index2label': {0: 'MILCO', 1: 'NOMBO'}, 'split': 'train'}
|
124
|
+
Path: /home/user/maite-datasets/data/milco
|
125
|
+
Size: 261
|
126
|
+
|
127
|
+
>>> print(f"type={milco_torch[0][0].__class__.__name__}, shape={milco_torch[0][0].shape}")
|
128
|
+
type=Image, shape=torch.Size([3, 224, 224])
|
129
|
+
|
130
|
+
>>> print(milco_torch[0][1].boxes[0])
|
131
|
+
tensor([16.4062, 47.4688, 28.4375, 54.0312], dtype=torch.float64)
|
132
|
+
```
|
133
|
+
|
134
|
+
## Dataset Adapters
|
135
|
+
|
136
|
+
Adapters provide a way to read in datasets from other popular formats.
|
137
|
+
|
138
|
+
### Huggingface
|
139
|
+
|
140
|
+
Hugging face datasets can be adapted into MAITE compliant format using the `from_huggingface` adapter.
|
141
|
+
|
142
|
+
```python
|
143
|
+
>>> from datasets import load_dataset
|
144
|
+
>>> from maite_datasets.adapters import from_huggingface
|
145
|
+
|
146
|
+
>>> cppe5 = load_dataset("cppe-5")
|
147
|
+
>>> m_cppe5 = from_huggingface(cppe5["train"])
|
148
|
+
>>> print(m_cppe5)
|
149
|
+
HFObjectDetection Dataset
|
150
|
+
-------------------------
|
151
|
+
Source: Dataset({
|
152
|
+
features: ['image_id', 'image', 'width', 'height', 'objects'],
|
153
|
+
num_rows: 1000
|
154
|
+
})
|
155
|
+
Metadata: {'id': 'cppe-5', 'index2label': {0: 'Coverall', 1: 'Face_Shield', 2: 'Gloves', 3: 'Goggles', 4: 'Mask'}, 'description': '', 'citation': '', 'homepage': '', 'license': '', 'features': {'image_id': Value('int64'), 'image': Image(mode=None, decode=True), 'width': Value('int32'), 'height': Value('int32'), 'objects': {'id': List(Value('int64')), 'area': List(Value('int64')), 'bbox': List(List(Value('float32'), length=4)), 'category': List(ClassLabel(names=['Coverall', 'Face_Shield', 'Gloves', 'Goggles', 'Mask']))}}, 'post_processed': None, 'supervised_keys': None, 'builder_name': 'parquet', 'dataset_name': 'cppe-5', 'config_name': 'default', 'version': 0.0.0, 'splits': {'train': SplitInfo(name='train', num_bytes=240478590, num_examples=1000, shard_lengths=None, dataset_name='cppe-5'), 'test': SplitInfo(name='test', num_bytes=4172706, num_examples=29, shard_lengths=None, dataset_name='cppe-5')}, 'download_checksums': {'hf://datasets/cppe-5@66f6a5efd474e35bd7cb94bf15dea27d4c6ad3f8/data/train-00000-of-00001.parquet': {'num_bytes': 237015519, 'checksum': None}, 'hf://datasets/cppe-5@66f6a5efd474e35bd7cb94bf15dea27d4c6ad3f8/data/test-00000-of-00001.parquet': {'num_bytes': 4137134, 'checksum': None}}, 'download_size': 241152653, 'post_processing_size': None, 'dataset_size': 244651296, 'size_in_bytes': 485803949}
|
156
|
+
|
157
|
+
>>> image = m_cppe5[0][0]
|
158
|
+
>>> print(f"type={image.__class__.__name__}, shape={image.shape}")
|
159
|
+
type=ndarray, shape=(3, 663, 943)
|
160
|
+
|
161
|
+
>>> target = m_cppe5[0][1]
|
162
|
+
>>> print(f"box={target.boxes[0]}, label={target.labels[0]}")
|
163
|
+
box=[302.0, 109.0, 73.0, 52.0], label=4
|
164
|
+
|
165
|
+
>>> print(m_cppe5[0][2])
|
166
|
+
{'id': [114, 115, 116, 117], 'image_id': 15, 'width': 943, 'height': 663, 'area': [3796, 1596, 152768, 81002]}
|
167
|
+
```
|
168
|
+
|
169
|
+
## Additional Information
|
170
|
+
|
171
|
+
For more information on the MAITE protocol, check out their [documentation](https://mit-ll-ai-technology.github.io/maite/).
|
172
|
+
|
173
|
+
## Acknowledgement
|
174
|
+
|
175
|
+
### CDAO Funding Acknowledgement
|
176
|
+
|
177
|
+
This material is based upon work supported by the Chief Digital and Artificial
|
178
|
+
Intelligence Office under Contract No. W519TC-23-9-2033. The views and
|
179
|
+
conclusions contained herein are those of the author(s) and should not be
|
180
|
+
interpreted as necessarily representing the official policies or endorsements,
|
181
|
+
either expressed or implied, of the U.S. Government.
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# MAITE Datasets
|
2
|
+
|
3
|
+
MAITE Datasets are a collection of public datasets wrapped in a [MAITE](https://mit-ll-ai-technology.github.io/maite/) compliant format.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
To install and use `maite-datasets` you can use pip:
|
8
|
+
|
9
|
+
```bash
|
10
|
+
pip install maite-datasets
|
11
|
+
```
|
12
|
+
|
13
|
+
For status bar indicators when downloading, you can include the extra `tqdm` when installing:
|
14
|
+
|
15
|
+
```bash
|
16
|
+
pip install maite-datasets[tqdm]
|
17
|
+
```
|
18
|
+
|
19
|
+
## Available Downloadable Datasets
|
20
|
+
|
21
|
+
| Task | Dataset | Description |
|
22
|
+
|----------------|------------------|---------------------------------------------------------------------|
|
23
|
+
| Classification | CIFAR10 | [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset. |
|
24
|
+
| Classification | MNIST | A dataset of hand-written digits. |
|
25
|
+
| Classification | Ships | A dataset that focuses on identifying ships from satellite images. |
|
26
|
+
| Detection | AntiUAVDetection | A UAV detection dataset in natural images with varying backgrounds. |
|
27
|
+
| Detection | MILCO | A side-scan sonar dataset focused on mine-like object detection. |
|
28
|
+
| Detection | Seadrone | A UAV dataset focused on open water object detection. |
|
29
|
+
| Detection | VOCDetection | [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) dataset. |
|
30
|
+
|
31
|
+
### Usage
|
32
|
+
|
33
|
+
Here is an example of how to import MNIST for usage with your workflow.
|
34
|
+
|
35
|
+
```python
|
36
|
+
>>> from maite_datasets.image_classification import MNIST
|
37
|
+
|
38
|
+
>>> mnist = MNIST(root="data", download=True)
|
39
|
+
>>> print(mnist)
|
40
|
+
MNIST Dataset
|
41
|
+
-------------
|
42
|
+
Corruption: None
|
43
|
+
Transforms: []
|
44
|
+
Image_set: train
|
45
|
+
Metadata: {'id': 'MNIST_train', 'index2label': {0: 'zero', 1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six', 7: 'seven', 8: 'eight', 9: 'nine'}, 'split': 'train'}
|
46
|
+
Path: /home/user/maite-datasets/data/mnist
|
47
|
+
Size: 60000
|
48
|
+
|
49
|
+
>>> print("tuple("+", ".join([str(type(t)) for t in mnist[0]])+")")
|
50
|
+
tuple(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>, <class 'dict'>)
|
51
|
+
```
|
52
|
+
|
53
|
+
## Dataset Wrappers
|
54
|
+
|
55
|
+
Wrappers provide a way to convert datasets to allow usage of tools within specific backend frameworks.
|
56
|
+
|
57
|
+
### Torchvision
|
58
|
+
|
59
|
+
`TorchvisionWrapper` is a convenience class that wraps any of the datasets and provides the capability to apply
|
60
|
+
`torchvision` transforms to the dataset.
|
61
|
+
|
62
|
+
**NOTE:** `TorchvisionWrapper` requires _torch_ and _torchvision_ to be installed.
|
63
|
+
|
64
|
+
```python
|
65
|
+
>>> from maite_datasets.object_detection import MILCO
|
66
|
+
|
67
|
+
>>> milco = MILCO(root="data", download=True)
|
68
|
+
>>> print(milco)
|
69
|
+
MILCO Dataset
|
70
|
+
-------------
|
71
|
+
Transforms: []
|
72
|
+
Image Set: train
|
73
|
+
Metadata: {'id': 'MILCO_train', 'index2label': {0: 'MILCO', 1: 'NOMBO'}, 'split': 'train'}
|
74
|
+
Path: /home/user/maite-datasets/data/milco
|
75
|
+
Size: 261
|
76
|
+
|
77
|
+
>>> print(f"type={milco[0][0].__class__.__name__}, shape={milco[0][0].shape}")
|
78
|
+
type=ndarray, shape=(3, 1024, 1024)
|
79
|
+
|
80
|
+
>>> print(milco[0][1].boxes[0])
|
81
|
+
[ 75. 217. 130. 247.]
|
82
|
+
|
83
|
+
>>> from maite_datasets.wrappers import TorchvisionWrapper
|
84
|
+
>>> from torchvision.transforms.v2 import Resize
|
85
|
+
|
86
|
+
>>> milco_torch = TorchvisionWrapper(milco, transforms=Resize(224))
|
87
|
+
>>> print(milco_torch)
|
88
|
+
Torchvision Wrapped MILCO Dataset
|
89
|
+
---------------------------
|
90
|
+
Transforms: Resize(size=[224], interpolation=InterpolationMode.BILINEAR, antialias=True)
|
91
|
+
|
92
|
+
MILCO Dataset
|
93
|
+
-------------
|
94
|
+
Transforms: []
|
95
|
+
Image Set: train
|
96
|
+
Metadata: {'id': 'MILCO_train', 'index2label': {0: 'MILCO', 1: 'NOMBO'}, 'split': 'train'}
|
97
|
+
Path: /home/user/maite-datasets/data/milco
|
98
|
+
Size: 261
|
99
|
+
|
100
|
+
>>> print(f"type={milco_torch[0][0].__class__.__name__}, shape={milco_torch[0][0].shape}")
|
101
|
+
type=Image, shape=torch.Size([3, 224, 224])
|
102
|
+
|
103
|
+
>>> print(milco_torch[0][1].boxes[0])
|
104
|
+
tensor([16.4062, 47.4688, 28.4375, 54.0312], dtype=torch.float64)
|
105
|
+
```
|
106
|
+
|
107
|
+
## Dataset Adapters
|
108
|
+
|
109
|
+
Adapters provide a way to read in datasets from other popular formats.
|
110
|
+
|
111
|
+
### Huggingface
|
112
|
+
|
113
|
+
Hugging face datasets can be adapted into MAITE compliant format using the `from_huggingface` adapter.
|
114
|
+
|
115
|
+
```python
|
116
|
+
>>> from datasets import load_dataset
|
117
|
+
>>> from maite_datasets.adapters import from_huggingface
|
118
|
+
|
119
|
+
>>> cppe5 = load_dataset("cppe-5")
|
120
|
+
>>> m_cppe5 = from_huggingface(cppe5["train"])
|
121
|
+
>>> print(m_cppe5)
|
122
|
+
HFObjectDetection Dataset
|
123
|
+
-------------------------
|
124
|
+
Source: Dataset({
|
125
|
+
features: ['image_id', 'image', 'width', 'height', 'objects'],
|
126
|
+
num_rows: 1000
|
127
|
+
})
|
128
|
+
Metadata: {'id': 'cppe-5', 'index2label': {0: 'Coverall', 1: 'Face_Shield', 2: 'Gloves', 3: 'Goggles', 4: 'Mask'}, 'description': '', 'citation': '', 'homepage': '', 'license': '', 'features': {'image_id': Value('int64'), 'image': Image(mode=None, decode=True), 'width': Value('int32'), 'height': Value('int32'), 'objects': {'id': List(Value('int64')), 'area': List(Value('int64')), 'bbox': List(List(Value('float32'), length=4)), 'category': List(ClassLabel(names=['Coverall', 'Face_Shield', 'Gloves', 'Goggles', 'Mask']))}}, 'post_processed': None, 'supervised_keys': None, 'builder_name': 'parquet', 'dataset_name': 'cppe-5', 'config_name': 'default', 'version': 0.0.0, 'splits': {'train': SplitInfo(name='train', num_bytes=240478590, num_examples=1000, shard_lengths=None, dataset_name='cppe-5'), 'test': SplitInfo(name='test', num_bytes=4172706, num_examples=29, shard_lengths=None, dataset_name='cppe-5')}, 'download_checksums': {'hf://datasets/cppe-5@66f6a5efd474e35bd7cb94bf15dea27d4c6ad3f8/data/train-00000-of-00001.parquet': {'num_bytes': 237015519, 'checksum': None}, 'hf://datasets/cppe-5@66f6a5efd474e35bd7cb94bf15dea27d4c6ad3f8/data/test-00000-of-00001.parquet': {'num_bytes': 4137134, 'checksum': None}}, 'download_size': 241152653, 'post_processing_size': None, 'dataset_size': 244651296, 'size_in_bytes': 485803949}
|
129
|
+
|
130
|
+
>>> image = m_cppe5[0][0]
|
131
|
+
>>> print(f"type={image.__class__.__name__}, shape={image.shape}")
|
132
|
+
type=ndarray, shape=(3, 663, 943)
|
133
|
+
|
134
|
+
>>> target = m_cppe5[0][1]
|
135
|
+
>>> print(f"box={target.boxes[0]}, label={target.labels[0]}")
|
136
|
+
box=[302.0, 109.0, 73.0, 52.0], label=4
|
137
|
+
|
138
|
+
>>> print(m_cppe5[0][2])
|
139
|
+
{'id': [114, 115, 116, 117], 'image_id': 15, 'width': 943, 'height': 663, 'area': [3796, 1596, 152768, 81002]}
|
140
|
+
```
|
141
|
+
|
142
|
+
## Additional Information
|
143
|
+
|
144
|
+
For more information on the MAITE protocol, check out their [documentation](https://mit-ll-ai-technology.github.io/maite/).
|
145
|
+
|
146
|
+
## Acknowledgement
|
147
|
+
|
148
|
+
### CDAO Funding Acknowledgement
|
149
|
+
|
150
|
+
This material is based upon work supported by the Chief Digital and Artificial
|
151
|
+
Intelligence Office under Contract No. W519TC-23-9-2033. The views and
|
152
|
+
conclusions contained herein are those of the author(s) and should not be
|
153
|
+
interpreted as necessarily representing the official policies or endorsements,
|
154
|
+
either expressed or implied, of the U.S. Government.
|
@@ -2,10 +2,11 @@
|
|
2
2
|
name = "maite-datasets"
|
3
3
|
description = "A collection of Image Classification and Object Detection task datasets conforming to the MAITE protocol."
|
4
4
|
readme = "README.md"
|
5
|
-
requires-python = ">=3.
|
5
|
+
requires-python = ">=3.10"
|
6
6
|
dynamic = ["version"]
|
7
7
|
dependencies = [
|
8
8
|
"defusedxml>=0.7.1",
|
9
|
+
"maite>=0.7,<0.9",
|
9
10
|
"numpy>=1.24.2",
|
10
11
|
"pillow>=10.3.0",
|
11
12
|
"requests>=2.32.3",
|
@@ -23,10 +24,10 @@ classifiers = [
|
|
23
24
|
"Operating System :: OS Independent",
|
24
25
|
"License :: OSI Approved :: MIT License",
|
25
26
|
"Programming Language :: Python :: 3 :: Only",
|
26
|
-
"Programming Language :: Python :: 3.9",
|
27
27
|
"Programming Language :: Python :: 3.10",
|
28
28
|
"Programming Language :: Python :: 3.11",
|
29
29
|
"Programming Language :: Python :: 3.12",
|
30
|
+
"Programming Language :: Python :: 3.13",
|
30
31
|
]
|
31
32
|
|
32
33
|
[project.optional-dependencies]
|
@@ -36,35 +37,44 @@ tqdm = [
|
|
36
37
|
|
37
38
|
[dependency-groups]
|
38
39
|
base = [
|
39
|
-
"nox
|
40
|
+
"nox>=2025.5.1",
|
41
|
+
"nox-uv>=0.6.2",
|
42
|
+
"uv>=0.8.0",
|
43
|
+
]
|
44
|
+
more = [
|
40
45
|
"torch>=2.2.0",
|
41
|
-
"
|
46
|
+
"torchvision>=0.17.0",
|
47
|
+
"tqdm>=4.66",
|
42
48
|
]
|
43
49
|
lint = [
|
50
|
+
{ include-group = "base" },
|
44
51
|
"ruff>=0.11",
|
45
52
|
"codespell[toml]>=2.3",
|
46
53
|
]
|
47
54
|
test = [
|
48
55
|
{ include-group = "base" },
|
56
|
+
{ include-group = "more" },
|
49
57
|
"pytest>=8.3",
|
50
58
|
"pytest-cov>=6.1",
|
51
59
|
"coverage[toml]>=7.6",
|
52
60
|
]
|
53
61
|
type = [
|
54
62
|
{ include-group = "base" },
|
63
|
+
{ include-group = "more" },
|
55
64
|
"pyright[nodejs]>=1.1.400",
|
56
65
|
]
|
57
66
|
dev = [
|
58
67
|
{ include-group = "base" },
|
68
|
+
{ include-group = "more" },
|
59
69
|
{ include-group = "lint" },
|
60
70
|
{ include-group = "test" },
|
61
71
|
{ include-group = "type" },
|
72
|
+
"ipykernel>=6.30.0",
|
62
73
|
]
|
63
74
|
|
64
75
|
[tool.uv.sources]
|
65
|
-
torch = [
|
66
|
-
|
67
|
-
]
|
76
|
+
torch = [{ index = "pytorch-cpu" }]
|
77
|
+
torchvision = [{ index = "pytorch-cpu" }]
|
68
78
|
|
69
79
|
[[tool.uv.index]]
|
70
80
|
name = "pytorch-cpu"
|
@@ -108,6 +118,14 @@ line-length = 120
|
|
108
118
|
indent-width = 4
|
109
119
|
target-version = "py39"
|
110
120
|
|
121
|
+
[tool.ruff.lint]
|
122
|
+
select = ["A", "ANN", "C4", "C90", "E", "F", "I", "NPY", "S", "SIM", "RET", "RUF100", "UP"]
|
123
|
+
ignore = ["ANN401", "NPY002"]
|
124
|
+
fixable = ["ALL"]
|
125
|
+
unfixable = []
|
126
|
+
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
127
|
+
per-file-ignores = { "!src/*" = ["ANN", "S", "RET"]}
|
128
|
+
|
111
129
|
[tool.ruff.lint.isort]
|
112
130
|
known-first-party = ["maite_datasets"]
|
113
131
|
|
@@ -1,11 +1,9 @@
|
|
1
1
|
"""Module for MAITE compliant Computer Vision datasets."""
|
2
2
|
|
3
3
|
from maite_datasets._builder import to_image_classification_dataset, to_object_detection_dataset
|
4
|
-
from maite_datasets._collate import
|
4
|
+
from maite_datasets._collate import collate_as_list, collate_as_numpy, collate_as_torch
|
5
|
+
from maite_datasets._reader import create_dataset_reader
|
5
6
|
from maite_datasets._validate import validate_dataset
|
6
|
-
from maite_datasets._reader._factory import create_dataset_reader
|
7
|
-
from maite_datasets._reader._coco import COCODatasetReader
|
8
|
-
from maite_datasets._reader._yolo import YOLODatasetReader
|
9
7
|
|
10
8
|
__all__ = [
|
11
9
|
"collate_as_list",
|
@@ -15,6 +13,4 @@ __all__ = [
|
|
15
13
|
"to_image_classification_dataset",
|
16
14
|
"to_object_detection_dataset",
|
17
15
|
"validate_dataset",
|
18
|
-
"COCODatasetReader",
|
19
|
-
"YOLODatasetReader",
|
20
16
|
]
|