iden 0.0.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. iden/__init__.py +1 -0
  2. iden/constants.py +10 -0
  3. iden/dataset/__init__.py +9 -0
  4. iden/dataset/base.py +540 -0
  5. iden/dataset/exceptions.py +17 -0
  6. iden/dataset/loader/__init__.py +17 -0
  7. iden/dataset/loader/base.py +167 -0
  8. iden/dataset/loader/vanilla.py +23 -0
  9. iden/dataset/loading.py +37 -0
  10. iden/dataset/vanilla.py +423 -0
  11. iden/io/__init__.py +52 -0
  12. iden/io/auto.py +133 -0
  13. iden/io/base.py +219 -0
  14. iden/io/json.py +132 -0
  15. iden/io/pickle.py +141 -0
  16. iden/io/safetensors/__init__.py +23 -0
  17. iden/io/safetensors/loaders.py +69 -0
  18. iden/io/safetensors/savers.py +71 -0
  19. iden/io/text.py +139 -0
  20. iden/io/torch.py +145 -0
  21. iden/io/utils.py +36 -0
  22. iden/io/yaml.py +134 -0
  23. iden/shard/__init__.py +41 -0
  24. iden/shard/base.py +43 -0
  25. iden/shard/collection/__init__.py +8 -0
  26. iden/shard/collection/dict.py +329 -0
  27. iden/shard/collection/list.py +286 -0
  28. iden/shard/comparators.py +48 -0
  29. iden/shard/dict.py +324 -0
  30. iden/shard/exceptions.py +13 -0
  31. iden/shard/file.py +145 -0
  32. iden/shard/in_memory.py +46 -0
  33. iden/shard/json.py +124 -0
  34. iden/shard/loader/__init__.py +31 -0
  35. iden/shard/loader/base.py +131 -0
  36. iden/shard/loader/dict.py +50 -0
  37. iden/shard/loader/file.py +41 -0
  38. iden/shard/loader/json.py +41 -0
  39. iden/shard/loader/pickle.py +41 -0
  40. iden/shard/loader/safetensors.py +57 -0
  41. iden/shard/loader/torch.py +47 -0
  42. iden/shard/loader/tuple.py +48 -0
  43. iden/shard/loader/yaml.py +41 -0
  44. iden/shard/loading.py +37 -0
  45. iden/shard/pickle.py +124 -0
  46. iden/shard/safetensors.py +144 -0
  47. iden/shard/torch.py +141 -0
  48. iden/shard/tuple.py +264 -0
  49. iden/shard/utils.py +111 -0
  50. iden/shard/yaml.py +124 -0
  51. iden/testing/__init__.py +7 -0
  52. iden/testing/fixtures.py +13 -0
  53. iden/utils/__init__.py +1 -0
  54. iden/utils/format.py +35 -0
  55. iden/utils/imports.py +87 -0
  56. iden/utils/path.py +39 -0
  57. iden/utils/time.py +85 -0
  58. iden-0.0.1a0.dist-info/LICENSE +28 -0
  59. iden-0.0.1a0.dist-info/METADATA +145 -0
  60. iden-0.0.1a0.dist-info/RECORD +61 -0
  61. iden-0.0.1a0.dist-info/WHEEL +4 -0
iden/__init__.py ADDED
@@ -0,0 +1 @@
1
+ r"""Root package."""
iden/constants.py ADDED
@@ -0,0 +1,10 @@
1
+ r"""Contain the main constants."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ["ASSETS", "KWARGS", "LOADER", "SHARDS"]
6
+
7
+ ASSETS = "assets"
8
+ KWARGS = "kwargs"
9
+ LOADER = "loader"
10
+ SHARDS = "shards"
@@ -0,0 +1,9 @@
1
+ r"""Contain dataset implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ["BaseDataset", "VanillaDataset", "create_vanilla_dataset", "load_from_uri"]
6
+
7
+ from iden.dataset.base import BaseDataset
8
+ from iden.dataset.loading import load_from_uri
9
+ from iden.dataset.vanilla import VanillaDataset, create_vanilla_dataset
iden/dataset/base.py ADDED
@@ -0,0 +1,540 @@
1
+ r"""Contain the base class to implement a dataset object."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ["BaseDataset"]
6
+
7
+ import logging
8
+ from abc import ABC, abstractmethod
9
+ from typing import TYPE_CHECKING, Any, Generic, TypeVar
10
+
11
+ if TYPE_CHECKING:
12
+
13
+ from iden.shard import BaseShard
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ T = TypeVar("T")
18
+
19
+
20
+ class BaseDataset(Generic[T], ABC):
21
+ r"""Define the base class to implement a dataset.
22
+
23
+ Note this dataset class is very different from the PyTorch dataset
24
+ class because it has a different goal. One of the goals is to help
25
+ to organize and manage shards.
26
+
27
+ Example usage:
28
+
29
+ ```pycon
30
+
31
+ >>> import tempfile
32
+ >>> from pathlib import Path
33
+ >>> from iden.dataset import VanillaDataset
34
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
35
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
36
+ ... shards = create_shard_dict(
37
+ ... shards={
38
+ ... "train": create_shard_tuple(
39
+ ... [
40
+ ... create_json_shard(
41
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
42
+ ... ),
43
+ ... create_json_shard(
44
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
45
+ ... ),
46
+ ... ],
47
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
48
+ ... ),
49
+ ... "val": create_shard_tuple(
50
+ ... shards=[],
51
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
52
+ ... ),
53
+ ... },
54
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
55
+ ... )
56
+ ... assets = create_shard_dict(
57
+ ... shards={
58
+ ... "stats": create_json_shard(
59
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("uri_stats").as_uri()
60
+ ... )
61
+ ... },
62
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
63
+ ... )
64
+ ... dataset = VanillaDataset(
65
+ ... uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets
66
+ ... )
67
+ ... dataset
68
+ ...
69
+ VanillaDataset(
70
+ (uri): file:///.../uri
71
+ (shards): ShardDict(
72
+ (train): ShardTuple(
73
+ (0): JsonShard(uri=file:///.../shard/uri1)
74
+ (1): JsonShard(uri=file:///.../shard/uri2)
75
+ )
76
+ (val): ShardTuple()
77
+ )
78
+ (assets): ShardDict(
79
+ (stats): JsonShard(uri=file:///.../uri_stats)
80
+ )
81
+ )
82
+
83
+ ```
84
+ """
85
+
86
+ @abstractmethod
87
+ def equal(self, other: Any, equal_nan: bool = False) -> bool:
88
+ r"""Indicate if two datasets are equal or not.
89
+
90
+ Args:
91
+ other: The object to compare with.
92
+ equal_nan: If ``True``, then two ``NaN``s will be
93
+ considered equal.
94
+
95
+ Returns:
96
+ ``True`` if the two datasets are equal, otherwise ``False``.
97
+
98
+ Example usage:
99
+
100
+ ```pycon
101
+
102
+ >>> import tempfile
103
+ >>> from pathlib import Path
104
+ >>> from iden.dataset import VanillaDataset
105
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
106
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
107
+ ... shards = create_shard_dict(
108
+ ... shards={
109
+ ... "train": create_shard_tuple(
110
+ ... [
111
+ ... create_json_shard(
112
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
113
+ ... ),
114
+ ... create_json_shard(
115
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
116
+ ... ),
117
+ ... ],
118
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
119
+ ... ),
120
+ ... "val": create_shard_tuple(
121
+ ... shards=[],
122
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
123
+ ... ),
124
+ ... },
125
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
126
+ ... )
127
+ ... assets = create_shard_dict(
128
+ ... shards={
129
+ ... "stats": create_json_shard(
130
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("uri_stats").as_uri()
131
+ ... )
132
+ ... },
133
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
134
+ ... )
135
+ ... dataset1 = VanillaDataset(
136
+ ... uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets
137
+ ... )
138
+ ... dataset2 = VanillaDataset(
139
+ ... uri=Path(tmpdir).joinpath("uri2").as_uri(), shards=shards, assets=assets
140
+ ... )
141
+ ... dataset1.equal(dataset2)
142
+ ...
143
+ False
144
+
145
+ ```
146
+ """
147
+
148
+ @abstractmethod
149
+ def get_asset(self, asset_id: str) -> Any:
150
+ r"""Get a data asset from this sharded dataset.
151
+
152
+ This method is useful to access some data variables/parameters
153
+ that are not available before to load/preprocess the data.
154
+
155
+ Args:
156
+ asset_id: The asset ID used to find the asset.
157
+
158
+ Returns:
159
+ The asset.
160
+
161
+ Raises:
162
+ AssetNotFoundError: if the asset does not exist.
163
+
164
+ Example usage:
165
+
166
+ ```pycon
167
+
168
+ >>> import tempfile
169
+ >>> from pathlib import Path
170
+ >>> from iden.dataset import VanillaDataset
171
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
172
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
173
+ ... shards = create_shard_dict(
174
+ ... shards={
175
+ ... "train": create_shard_tuple(
176
+ ... [
177
+ ... create_json_shard(
178
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
179
+ ... ),
180
+ ... create_json_shard(
181
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
182
+ ... ),
183
+ ... ],
184
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
185
+ ... ),
186
+ ... "val": create_shard_tuple(
187
+ ... shards=[],
188
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
189
+ ... ),
190
+ ... },
191
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
192
+ ... )
193
+ ... assets = create_shard_dict(
194
+ ... shards={
195
+ ... "stats": create_json_shard(
196
+ ... {'mean': 42}, uri=Path(tmpdir).joinpath("uri_stats").as_uri()
197
+ ... )
198
+ ... },
199
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
200
+ ... )
201
+ ... dataset = VanillaDataset(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets)
202
+ ... dataset.get_asset('stats').get_data()
203
+ ...
204
+ {'mean': 42}
205
+
206
+ ```
207
+ """
208
+
209
+ @abstractmethod
210
+ def has_asset(self, asset_id: str) -> bool:
211
+ r"""Indicate if the asset exists or not.
212
+
213
+ Args:
214
+ asset_id: The asset ID used to find the asset.
215
+
216
+ Returns:
217
+ ``True`` if the asset exists, otherwise ``False``.
218
+
219
+ Example usage:
220
+
221
+ ```pycon
222
+
223
+ >>> import tempfile
224
+ >>> from pathlib import Path
225
+ >>> from iden.dataset import VanillaDataset
226
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
227
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
228
+ ... shards = create_shard_dict(
229
+ ... shards={
230
+ ... "train": create_shard_tuple(
231
+ ... [
232
+ ... create_json_shard(
233
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
234
+ ... ),
235
+ ... create_json_shard(
236
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
237
+ ... ),
238
+ ... ],
239
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
240
+ ... ),
241
+ ... "val": create_shard_tuple(
242
+ ... shards=[],
243
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
244
+ ... ),
245
+ ... },
246
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
247
+ ... )
248
+ ... assets = create_shard_dict(
249
+ ... shards={
250
+ ... "stats": create_json_shard(
251
+ ... {'mean': 42}, uri=Path(tmpdir).joinpath("uri_stats").as_uri()
252
+ ... )
253
+ ... },
254
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
255
+ ... )
256
+ ... dataset = VanillaDataset(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets)
257
+ ... dataset.has_asset('stats')
258
+ ... dataset.has_asset('missing')
259
+ ...
260
+ True
261
+ False
262
+
263
+ ```
264
+ """
265
+
266
+ @abstractmethod
267
+ def get_shards(self, split: str) -> tuple[BaseShard[T], ...]:
268
+ r"""Get the shards for a given split.
269
+
270
+ Returns:
271
+ The shards for a given split. The shards are
272
+ sorted by ascending order of URI.
273
+
274
+ Raises:
275
+ ``SplitNotFoundError``: if the split does not exist.
276
+
277
+ Example usage:
278
+
279
+ ```pycon
280
+
281
+ >>> import tempfile
282
+ >>> from pathlib import Path
283
+ >>> from iden.dataset import VanillaDataset
284
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
285
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
286
+ ... shards = create_shard_dict(
287
+ ... shards={
288
+ ... "train": create_shard_tuple(
289
+ ... [
290
+ ... create_json_shard(
291
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
292
+ ... ),
293
+ ... create_json_shard(
294
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
295
+ ... ),
296
+ ... ],
297
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
298
+ ... ),
299
+ ... "val": create_shard_tuple(
300
+ ... shards=[],
301
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
302
+ ... ),
303
+ ... },
304
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
305
+ ... )
306
+ ... assets = create_shard_dict(
307
+ ... shards={
308
+ ... "stats": create_json_shard(
309
+ ... {'mean': 42}, uri=Path(tmpdir).joinpath("uri_stats").as_uri()
310
+ ... )
311
+ ... },
312
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
313
+ ... )
314
+ ... dataset = VanillaDataset(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets)
315
+ ... dataset.get_shards('train')
316
+ ... dataset.get_shards('val')
317
+ ...
318
+ (JsonShard(uri=file:///.../uri1), JsonShard(uri=file:///.../uri2))
319
+ ()
320
+
321
+ ```
322
+ """
323
+
324
+ @abstractmethod
325
+ def get_num_shards(self, split: str) -> int:
326
+ r"""Get the number of shards for a given split.
327
+
328
+ Returns:
329
+ The number of shards in the dataset for a given split.
330
+
331
+ Raises:
332
+ ``SplitNotFoundError``: if the split does not exist.
333
+
334
+ Returns:
335
+ The dataset splits.
336
+
337
+ Example usage:
338
+
339
+ ```pycon
340
+
341
+ >>> import tempfile
342
+ >>> from pathlib import Path
343
+ >>> from iden.dataset import VanillaDataset
344
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
345
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
346
+ ... shards = create_shard_dict(
347
+ ... shards={
348
+ ... "train": create_shard_tuple(
349
+ ... [
350
+ ... create_json_shard(
351
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
352
+ ... ),
353
+ ... create_json_shard(
354
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
355
+ ... ),
356
+ ... ],
357
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
358
+ ... ),
359
+ ... "val": create_shard_tuple(
360
+ ... shards=[],
361
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
362
+ ... ),
363
+ ... },
364
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
365
+ ... )
366
+ ... assets = create_shard_dict(
367
+ ... shards={
368
+ ... "stats": create_json_shard(
369
+ ... {'mean': 42}, uri=Path(tmpdir).joinpath("uri_stats").as_uri()
370
+ ... )
371
+ ... },
372
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
373
+ ... )
374
+ ... dataset = VanillaDataset(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets)
375
+ ... dataset.get_num_shards('train')
376
+ ... dataset.get_num_shards('val')
377
+ ...
378
+ 2
379
+ 0
380
+
381
+ ```
382
+ """
383
+
384
+ @abstractmethod
385
+ def get_splits(self) -> set[str]:
386
+ r"""Get the available dataset splits.
387
+
388
+ Returns:
389
+ The dataset splits.
390
+
391
+ Example usage:
392
+
393
+ ```pycon
394
+
395
+ >>> import tempfile
396
+ >>> from pathlib import Path
397
+ >>> from iden.dataset import VanillaDataset
398
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
399
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
400
+ ... shards = create_shard_dict(
401
+ ... shards={
402
+ ... "train": create_shard_tuple(
403
+ ... [
404
+ ... create_json_shard(
405
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
406
+ ... ),
407
+ ... create_json_shard(
408
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
409
+ ... ),
410
+ ... ],
411
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
412
+ ... ),
413
+ ... "val": create_shard_tuple(
414
+ ... shards=[],
415
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
416
+ ... ),
417
+ ... },
418
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
419
+ ... )
420
+ ... assets = create_shard_dict(
421
+ ... shards={
422
+ ... "stats": create_json_shard(
423
+ ... {'mean': 42}, uri=Path(tmpdir).joinpath("uri_stats").as_uri()
424
+ ... )
425
+ ... },
426
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
427
+ ... )
428
+ ... dataset = VanillaDataset(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets)
429
+ ... sorted(dataset.get_splits())
430
+ ...
431
+ ['train', 'val']
432
+
433
+ ```
434
+ """
435
+
436
+ @abstractmethod
437
+ def has_split(self, split: str) -> bool:
438
+ r"""Indicate if a dataset split exists or not.
439
+
440
+ Returns:
441
+ ``True`` of the split exists, otherwise ``False``
442
+
443
+ Example usage:
444
+
445
+ ```pycon
446
+
447
+ >>> import tempfile
448
+ >>> from pathlib import Path
449
+ >>> from iden.dataset import VanillaDataset
450
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
451
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
452
+ ... shards = create_shard_dict(
453
+ ... shards={
454
+ ... "train": create_shard_tuple(
455
+ ... [
456
+ ... create_json_shard(
457
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
458
+ ... ),
459
+ ... create_json_shard(
460
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
461
+ ... ),
462
+ ... ],
463
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
464
+ ... ),
465
+ ... "val": create_shard_tuple(
466
+ ... shards=[],
467
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
468
+ ... ),
469
+ ... },
470
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
471
+ ... )
472
+ ... assets = create_shard_dict(
473
+ ... shards={
474
+ ... "stats": create_json_shard(
475
+ ... {'mean': 42}, uri=Path(tmpdir).joinpath("uri_stats").as_uri()
476
+ ... )
477
+ ... },
478
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
479
+ ... )
480
+ ... dataset = VanillaDataset(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets)
481
+ ... dataset.has_split('train')
482
+ ... dataset.has_split('missing')
483
+ ...
484
+ True
485
+ False
486
+
487
+ ```
488
+ """
489
+
490
+ @abstractmethod
491
+ def get_uri(self) -> str:
492
+ r"""Get the Uniform Resource Identifier (URI) of the dataset.
493
+
494
+ Returns:
495
+ The dataset's URI.
496
+
497
+ Example usage:
498
+
499
+ ```pycon
500
+
501
+ >>> import tempfile
502
+ >>> from pathlib import Path
503
+ >>> from iden.dataset import VanillaDataset
504
+ >>> from iden.shard import create_json_shard, create_shard_dict, create_shard_tuple
505
+ >>> with tempfile.TemporaryDirectory() as tmpdir:
506
+ ... shards = create_shard_dict(
507
+ ... shards={
508
+ ... "train": create_shard_tuple(
509
+ ... [
510
+ ... create_json_shard(
511
+ ... [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
512
+ ... ),
513
+ ... create_json_shard(
514
+ ... [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
515
+ ... ),
516
+ ... ],
517
+ ... uri=Path(tmpdir).joinpath("uri_train").as_uri(),
518
+ ... ),
519
+ ... "val": create_shard_tuple(
520
+ ... shards=[],
521
+ ... uri=Path(tmpdir).joinpath("uri_val").as_uri(),
522
+ ... ),
523
+ ... },
524
+ ... uri=Path(tmpdir).joinpath("uri_shards").as_uri(),
525
+ ... )
526
+ ... assets = create_shard_dict(
527
+ ... shards={
528
+ ... "stats": create_json_shard(
529
+ ... {'mean': 42}, uri=Path(tmpdir).joinpath("uri_stats").as_uri()
530
+ ... )
531
+ ... },
532
+ ... uri=Path(tmpdir).joinpath("uri_asset").as_uri(),
533
+ ... )
534
+ ... dataset = VanillaDataset(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards, assets=assets)
535
+ ... dataset.get_uri()
536
+ ...
537
+ file:///.../uri
538
+
539
+ ```
540
+ """
@@ -0,0 +1,17 @@
1
+ r"""Contain the definition of the exceptions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ["AssetExistsError", "AssetNotFoundError", "SplitNotFoundError"]
6
+
7
+
8
+ class AssetExistsError(Exception):
9
+ r"""Raised when trying to add an asset that already exists."""
10
+
11
+
12
+ class AssetNotFoundError(Exception):
13
+ r"""Raised when trying to access an asset that does not exist."""
14
+
15
+
16
+ class SplitNotFoundError(Exception):
17
+ r"""Raised when trying to access a split that does not exist."""
@@ -0,0 +1,17 @@
1
+ r"""Contain dataset loader implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "BaseDatasetLoader",
7
+ "VanillaDatasetLoader",
8
+ "is_dataset_loader_config",
9
+ "setup_dataset_loader",
10
+ ]
11
+
12
+ from iden.dataset.loader.base import (
13
+ BaseDatasetLoader,
14
+ is_dataset_loader_config,
15
+ setup_dataset_loader,
16
+ )
17
+ from iden.dataset.loader.vanilla import VanillaDatasetLoader