datamaestro 1.7.0__tar.gz → 1.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {datamaestro-1.7.0 → datamaestro-1.7.2}/PKG-INFO +1 -1
  2. datamaestro-1.7.2/release-notes.md +5 -0
  3. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/definitions.py +29 -4
  4. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/__init__.py +23 -5
  5. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/links.py +10 -2
  6. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/test/test_resource.py +269 -0
  7. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/version.py +1 -1
  8. datamaestro-1.7.0/release-notes.md +0 -5
  9. {datamaestro-1.7.0 → datamaestro-1.7.2}/.coverage +0 -0
  10. {datamaestro-1.7.0 → datamaestro-1.7.2}/.flake8 +0 -0
  11. {datamaestro-1.7.0 → datamaestro-1.7.2}/.github/workflows/pytest.yml +0 -0
  12. {datamaestro-1.7.0 → datamaestro-1.7.2}/.github/workflows/python-publish.yml +0 -0
  13. {datamaestro-1.7.0 → datamaestro-1.7.2}/.gitignore +0 -0
  14. {datamaestro-1.7.0 → datamaestro-1.7.2}/.pre-commit-config.yaml +0 -0
  15. {datamaestro-1.7.0 → datamaestro-1.7.2}/.python-version +0 -0
  16. {datamaestro-1.7.0 → datamaestro-1.7.2}/.readthedocs.yml +0 -0
  17. {datamaestro-1.7.0 → datamaestro-1.7.2}/CHANGELOG.md +0 -0
  18. {datamaestro-1.7.0 → datamaestro-1.7.2}/LICENSE +0 -0
  19. {datamaestro-1.7.0 → datamaestro-1.7.2}/MANIFEST.in +0 -0
  20. {datamaestro-1.7.0 → datamaestro-1.7.2}/README.md +0 -0
  21. {datamaestro-1.7.0 → datamaestro-1.7.2}/TODO.md +0 -0
  22. {datamaestro-1.7.0 → datamaestro-1.7.2}/cliff.toml +0 -0
  23. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/Makefile +0 -0
  24. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/make.bat +0 -0
  25. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/requirements.txt +0 -0
  26. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/api/data.md +0 -0
  27. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/api/download.rst +0 -0
  28. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/api/index.md +0 -0
  29. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/api/records.rst +0 -0
  30. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/cli.md +0 -0
  31. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/conf.py +0 -0
  32. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/configuration.md +0 -0
  33. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/datasets.rst +0 -0
  34. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/developping.md +0 -0
  35. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/getting-started.md +0 -0
  36. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/index.md +0 -0
  37. {datamaestro-1.7.0 → datamaestro-1.7.2}/docs/source/style.css +0 -0
  38. {datamaestro-1.7.0 → datamaestro-1.7.2}/pyproject.toml +0 -0
  39. {datamaestro-1.7.0 → datamaestro-1.7.2}/pytest.ini +0 -0
  40. {datamaestro-1.7.0 → datamaestro-1.7.2}/requirements-dev.txt +0 -0
  41. {datamaestro-1.7.0 → datamaestro-1.7.2}/requirements.txt +0 -0
  42. {datamaestro-1.7.0 → datamaestro-1.7.2}/schema.yaml +0 -0
  43. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/__init__.py +0 -0
  44. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/__main__.py +0 -0
  45. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/annotations/__init__.py +0 -0
  46. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/annotations/agreement.py +0 -0
  47. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/commands/__init__.py +0 -0
  48. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/commands/mainstyle.css +0 -0
  49. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/commands/site.py +0 -0
  50. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/context.py +0 -0
  51. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/data/__init__.py +0 -0
  52. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/data/csv.py +0 -0
  53. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/data/huggingface.py +0 -0
  54. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/data/ml.py +0 -0
  55. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/data/tensor.py +0 -0
  56. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/archive.py +0 -0
  57. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/custom.py +0 -0
  58. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/huggingface.py +0 -0
  59. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/manual.py +0 -0
  60. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/multiple.py +0 -0
  61. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/single.py +0 -0
  62. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/sync.py +0 -0
  63. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/todo.py +0 -0
  64. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/download/wayback.py +0 -0
  65. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/record.py +0 -0
  66. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/registry.py +0 -0
  67. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/search.py +0 -0
  68. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/settings.py +0 -0
  69. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/sphinx.py +0 -0
  70. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/stream/__init__.py +0 -0
  71. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/stream/compress.py +0 -0
  72. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/stream/lines.py +0 -0
  73. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/templates/dataset.py +0 -0
  74. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/test/__init__.py +0 -0
  75. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/test/checks.py +0 -0
  76. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/test/conftest.py +0 -0
  77. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/test/test_annotations.py +0 -0
  78. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/test/test_download_handlers.py +0 -0
  79. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/test/test_record.py +0 -0
  80. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/utils.py +0 -0
  81. {datamaestro-1.7.0 → datamaestro-1.7.2}/src/datamaestro/v2.md +0 -0
  82. {datamaestro-1.7.0 → datamaestro-1.7.2}/tox.ini +0 -0
  83. {datamaestro-1.7.0 → datamaestro-1.7.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.7.0
3
+ Version: 1.7.2
4
4
  Summary: Add your description here
5
5
  Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
6
6
  License-File: LICENSE
@@ -0,0 +1,5 @@
1
+ ## [1.7.2] - 2026-01-29
2
+
3
+ ### Bug Fixes
4
+ - Guard _resolve_reference with isinstance check for class types ([0e9f2bb](https://github.com/experimaestro/experimaestro-python/commit/0e9f2bbcce5beb5d09aaa4f2eff017b559178620))
5
+
@@ -6,6 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  import logging
8
8
  import inspect
9
+ import re as _re
9
10
  import shutil
10
11
  from pathlib import Path
11
12
  from itertools import chain
@@ -33,7 +34,7 @@ from typing import Type as TypingType # noqa: F401 (re-exports)
33
34
  from experimaestro.core.types import Type # noqa: F401 (re-exports)
34
35
 
35
36
  if TYPE_CHECKING:
36
- from .data import Base, Dataset
37
+ from .data import Base
37
38
  from .context import Repository, Context, DatafolderPath # noqa: F401 (re-exports)
38
39
  from datamaestro.download import Download, Resource
39
40
 
@@ -130,6 +131,21 @@ def _move_path(src: Path, dst: Path) -> None:
130
131
  shutil.move(str(src), str(dst))
131
132
 
132
133
 
134
+ _CAMEL_RE1 = _re.compile(r"([A-Z]+)([A-Z][a-z])")
135
+ _CAMEL_RE2 = _re.compile(r"([a-z0-9])([A-Z])")
136
+
137
+
138
+ def _camel_to_snake(name: str) -> str:
139
+ """Convert CamelCase to snake_case, then lowercase.
140
+
141
+ Examples: ProcessedMNIST -> processed_mnist, MyData -> my_data,
142
+ MNIST -> mnist, simple -> simple
143
+ """
144
+ s = _CAMEL_RE1.sub(r"\1_\2", name)
145
+ s = _CAMEL_RE2.sub(r"\1_\2", s)
146
+ return s.lower()
147
+
148
+
133
149
  # --- Objects holding information into classes/function
134
150
 
135
151
 
@@ -196,7 +212,12 @@ class DataDefinition(AbstractData):
196
212
  if components[0] == "datamaestro":
197
213
  longest_ix = 0
198
214
 
199
- return repository, [s.lower() for s in components[(longest_ix + 1) :]]
215
+ parts = components[(longest_ix + 1) :]
216
+ # Module components: just lowercase
217
+ # Last component (class/function name): CamelCase → snake_case
218
+ if parts:
219
+ parts = [s.lower() for s in parts[:-1]] + [_camel_to_snake(parts[-1])]
220
+ return repository, parts
200
221
 
201
222
  def ancestors(self):
202
223
  ancestors = []
@@ -594,6 +615,10 @@ class DatasetWrapper(AbstractDataset):
594
615
  if self.base is self.t:
595
616
  self.config = self.base.__create_dataset__(self)
596
617
 
618
+ elif hasattr(self.t, "__create_dataset__"):
619
+ # Class-based dataset with metadataset or different base
620
+ self.config = self.t.__create_dataset__(self)
621
+
597
622
  else:
598
623
  # Construct the object
599
624
  if self.as_prepare:
@@ -715,8 +740,8 @@ class DatasetAnnotation:
715
740
  def __call__(self, dataset: AbstractDataset):
716
741
  if isinstance(dataset, AbstractDataset):
717
742
  self.annotate(dataset)
718
- elif issubclass(dataset, Dataset):
719
- self.annotate(dataset.__datamaestro__)
743
+ elif hasattr(dataset, "__dataset__"):
744
+ self.annotate(dataset.__dataset__)
720
745
  else:
721
746
  raise RuntimeError(
722
747
  f"Only datasets can be annotated with {self}, "
@@ -638,14 +638,32 @@ class reference(Resource):
638
638
  assert reference is not None, "Reference cannot be null"
639
639
  self.reference = reference
640
640
 
641
+ def _resolve_reference(self):
642
+ """Resolve the reference to a DatasetWrapper.
643
+
644
+ For class-based datasets, the reference is the class itself with
645
+ a __dataset__ attribute pointing to the DatasetWrapper.
646
+ For function-based datasets, the reference is already a DatasetWrapper.
647
+ """
648
+ ref = self.reference
649
+ if isinstance(ref, type) and hasattr(ref, "__dataset__"):
650
+ return ref.__dataset__
651
+ return ref
652
+
641
653
  def prepare(self):
642
- v = self.reference.prepare()
643
- if isinstance(v, AbstractDataset):
644
- return v().prepare()
645
- return v
654
+ resolved = self._resolve_reference()
655
+ if isinstance(resolved, AbstractDataset):
656
+ return resolved._prepare()
657
+ return resolved.prepare()
646
658
 
647
659
  def download(self, force=False):
648
- self.reference.__datamaestro__.download(force)
660
+ resolved = self._resolve_reference()
661
+ if isinstance(resolved, AbstractDataset):
662
+ resolved.download(force)
663
+ elif hasattr(resolved, "__datamaestro__"):
664
+ resolved.__datamaestro__.download(force)
665
+ else:
666
+ resolved.download(force)
649
667
 
650
668
  def has_files(self):
651
669
  # We don't really have files
@@ -55,9 +55,17 @@ class links(Resource):
55
55
  def download(self, force=False):
56
56
  self.path.mkdir(exist_ok=True, parents=True)
57
57
  for key, value in self.links.items():
58
- value.download(force)
58
+ # Resolve class-based datasets
59
+ if hasattr(value, "__dataset__"):
60
+ wrapper = value.__dataset__
61
+ wrapper.download(force)
62
+ path = wrapper()
63
+ elif hasattr(value, "download"):
64
+ value.download(force)
65
+ path = value()
66
+ else:
67
+ path = value # Already a path
59
68
 
60
- path = value()
61
69
  dest = self.path / key
62
70
 
63
71
  if not dest.exists():
@@ -22,6 +22,8 @@ import pytest
22
22
 
23
23
  from datamaestro.definitions import (
24
24
  AbstractDataset,
25
+ DataDefinition,
26
+ DatasetWrapper,
25
27
  topological_sort,
26
28
  _compute_dependents,
27
29
  _bind_class_resources,
@@ -1109,6 +1111,134 @@ class TestReferenceResource:
1109
1111
  reference(varname="ref", reference=None)
1110
1112
 
1111
1113
 
1114
+ # ==== Reference with Class-Based Datasets ====
1115
+
1116
+
1117
+ class TestReferenceClassBased:
1118
+ """Tests for `reference` used with class-based datasets.
1119
+
1120
+ When a class-based dataset is decorated with @dataset, the class
1121
+ gets a __dataset__ attribute pointing to the DatasetWrapper.
1122
+ The reference resource must resolve through that attribute.
1123
+ """
1124
+
1125
+ def _make_base_dataset(self, context):
1126
+ """Create a minimal class-based dataset to use as a reference target."""
1127
+ from datamaestro.data import Base
1128
+ from datamaestro.definitions import dataset as dataset_dec
1129
+
1130
+ class BaseData(Base):
1131
+ """Base test dataset."""
1132
+
1133
+ DATA = DummyFileResource("base.txt")
1134
+
1135
+ @classmethod
1136
+ def __create_dataset__(cls, dataset: AbstractDataset):
1137
+ return cls.C(id="test.base")
1138
+
1139
+ BaseData.__module__ = "datamaestro.config.test"
1140
+
1141
+ # Apply the @dataset decorator (sets __dataset__ on the class)
1142
+ dataset_dec(base=BaseData, url="http://test.com")(BaseData)
1143
+ return BaseData
1144
+
1145
+ def test_resolve_via_dataset_attr(self, context):
1146
+ """_resolve_reference follows __dataset__ for class-based targets."""
1147
+ BaseData = self._make_base_dataset(context)
1148
+
1149
+ ref = reference(varname="base", reference=BaseData)
1150
+ resolved = ref._resolve_reference()
1151
+
1152
+ assert resolved is BaseData.__dataset__
1153
+
1154
+ def test_prepare_delegates_to_class_dataset(self, context):
1155
+ """prepare() calls _prepare() on the referenced DatasetWrapper."""
1156
+ BaseData = self._make_base_dataset(context)
1157
+
1158
+ ref = reference(varname="base", reference=BaseData)
1159
+
1160
+ # Mock the DatasetWrapper._prepare to avoid full experimaestro
1161
+ # Config construction (which rejects classes defined in functions)
1162
+ sentinel = object()
1163
+ BaseData.__dataset__._prepare = MagicMock(return_value=sentinel)
1164
+
1165
+ result = ref.prepare()
1166
+ BaseData.__dataset__._prepare.assert_called_once()
1167
+ assert result is sentinel
1168
+
1169
+ def test_download_delegates_to_class_dataset(self, context):
1170
+ """download() calls download() on the referenced DatasetWrapper."""
1171
+ BaseData = self._make_base_dataset(context)
1172
+
1173
+ ref = reference(varname="base", reference=BaseData)
1174
+
1175
+ # Mock the DatasetWrapper.download to verify delegation
1176
+ BaseData.__dataset__.download = MagicMock()
1177
+
1178
+ ref.download(force=True)
1179
+ BaseData.__dataset__.download.assert_called_once_with(True)
1180
+
1181
+ def test_download_no_force(self, context):
1182
+ """download(force=False) passes force=False to the target."""
1183
+ BaseData = self._make_base_dataset(context)
1184
+
1185
+ ref = reference(varname="base", reference=BaseData)
1186
+ BaseData.__dataset__.download = MagicMock()
1187
+
1188
+ ref.download(force=False)
1189
+ BaseData.__dataset__.download.assert_called_once_with(False)
1190
+
1191
+ def test_has_files_false(self, context):
1192
+ """reference has_files() is always False."""
1193
+ BaseData = self._make_base_dataset(context)
1194
+
1195
+ ref = reference(varname="base", reference=BaseData)
1196
+ assert ref.has_files() is False
1197
+
1198
+ def test_bound_in_class_based_dataset(self, context):
1199
+ """reference works as a class attribute bound via
1200
+ _bind_class_resources."""
1201
+ BaseData = self._make_base_dataset(context)
1202
+
1203
+ repository = MyRepository(context)
1204
+ ds = SimpleDataset(repository, context.datapath / "derived_test")
1205
+
1206
+ ref = reference(varname="base", reference=BaseData)
1207
+ ref.bind("BASE", ds)
1208
+
1209
+ assert "base" in ds.resources
1210
+ assert ds.resources["base"] is ref
1211
+ assert ref.has_files() is False
1212
+
1213
+ def test_full_class_attribute_integration(self, context):
1214
+ """reference as a class attribute in a full class-based dataset."""
1215
+ from datamaestro.data import Base
1216
+
1217
+ BaseData = self._make_base_dataset(context)
1218
+
1219
+ class DerivedData(Base):
1220
+ """Derived dataset referencing the base."""
1221
+
1222
+ BASE = reference(varname="base", reference=BaseData)
1223
+
1224
+ @classmethod
1225
+ def __create_dataset__(cls, dataset: AbstractDataset):
1226
+ cls.BASE.prepare()
1227
+ return cls.C(id="test.derived")
1228
+
1229
+ repository = MyRepository(context)
1230
+ ds = SimpleDataset(repository, context.datapath / "derived_full")
1231
+
1232
+ _bind_class_resources(DerivedData, ds)
1233
+
1234
+ assert "base" in ds.resources
1235
+ assert isinstance(ds.resources["base"], reference)
1236
+
1237
+ # The reference should resolve to the base dataset
1238
+ resolved = ds.resources["base"]._resolve_reference()
1239
+ assert resolved is BaseData.__dataset__
1240
+
1241
+
1112
1242
  # ==== Links Resource Tests ====
1113
1243
 
1114
1244
 
@@ -1386,3 +1516,142 @@ class TestMultiple:
1386
1516
  from datamaestro.download.multiple import Datasets
1387
1517
 
1388
1518
  assert issubclass(Datasets, Download)
1519
+
1520
+
1521
+ # ==== Dataset ID Inference Tests ====
1522
+
1523
+
1524
+ class TestDatasetIDInference:
1525
+ """Integration tests for dataset ID inference stability.
1526
+
1527
+ Verifies that DataDefinition.repository_relpath correctly derives
1528
+ path components from type modules and names, including the
1529
+ CamelCase → snake_case conversion for the final component
1530
+ (class/function name).
1531
+ """
1532
+
1533
+ @staticmethod
1534
+ def _make_type(module, name):
1535
+ """Create a mock type with given __module__ and __name__."""
1536
+ t = type(name, (), {})
1537
+ t.__module__ = module
1538
+ return t
1539
+
1540
+ def test_all_caps_class(self, context):
1541
+ """All-caps class name (e.g. MNIST) becomes lowercase."""
1542
+ t = self._make_type("datamaestro.config.lecun", "MNIST")
1543
+ _, parts = DataDefinition.repository_relpath(t)
1544
+ assert parts == ["config", "lecun", "mnist"]
1545
+
1546
+ def test_camel_case_class(self, context):
1547
+ """CamelCase class name becomes snake_case."""
1548
+ t = self._make_type("datamaestro.config.lecun", "ProcessedMNIST")
1549
+ _, parts = DataDefinition.repository_relpath(t)
1550
+ assert parts == ["config", "lecun", "processed_mnist"]
1551
+
1552
+ def test_multi_word_camel_case(self, context):
1553
+ """Multi-word CamelCase is split with underscores."""
1554
+ t = self._make_type("datamaestro.config.data", "ImageClassification")
1555
+ _, parts = DataDefinition.repository_relpath(t)
1556
+ assert parts == ["config", "data", "image_classification"]
1557
+
1558
+ def test_lowercase_function_name(self, context):
1559
+ """Lowercase function names stay as-is."""
1560
+ t = self._make_type("datamaestro.config.lecun", "mnist")
1561
+ _, parts = DataDefinition.repository_relpath(t)
1562
+ assert parts == ["config", "lecun", "mnist"]
1563
+
1564
+ def test_name_with_digits(self, context):
1565
+ """Names with trailing digits are handled correctly."""
1566
+ t = self._make_type("datamaestro.config.trec", "Robust2005")
1567
+ _, parts = DataDefinition.repository_relpath(t)
1568
+ assert parts == ["config", "trec", "robust2005"]
1569
+
1570
+ def test_acronym_then_word(self, context):
1571
+ """Acronym followed by word splits correctly."""
1572
+ t = self._make_type("datamaestro.config.web", "HTTPSConnection")
1573
+ _, parts = DataDefinition.repository_relpath(t)
1574
+ assert parts == ["config", "web", "https_connection"]
1575
+
1576
+ def test_digit_to_upper_boundary(self, context):
1577
+ """Digit-to-uppercase boundary inserts underscore."""
1578
+ t = self._make_type("datamaestro.config.data", "V2Data")
1579
+ _, parts = DataDefinition.repository_relpath(t)
1580
+ assert parts == ["config", "data", "v2_data"]
1581
+
1582
+ def test_snake_case_passthrough(self, context):
1583
+ """Already snake_case names are unchanged."""
1584
+ t = self._make_type("datamaestro.config.lecun", "my_data")
1585
+ _, parts = DataDefinition.repository_relpath(t)
1586
+ assert parts == ["config", "lecun", "my_data"]
1587
+
1588
+ def test_module_components_lowercased(self, context):
1589
+ """Module path components are lowercased, not snake_cased."""
1590
+ t = self._make_type("datamaestro.config.LeCun.SubDir", "MNIST")
1591
+ _, parts = DataDefinition.repository_relpath(t)
1592
+ assert parts == ["config", "lecun", "subdir", "mnist"]
1593
+
1594
+ def test_only_last_component_snake_cased(self, context):
1595
+ """Only the last component gets CamelCase→snake_case;
1596
+ module components are simply lowercased."""
1597
+ t = self._make_type("datamaestro.config.MyModule.SubPkg", "ProcessedData")
1598
+ _, parts = DataDefinition.repository_relpath(t)
1599
+ # MyModule/SubPkg → lowercased; ProcessedData → snake_cased
1600
+ assert parts == [
1601
+ "config",
1602
+ "mymodule",
1603
+ "subpkg",
1604
+ "processed_data",
1605
+ ]
1606
+
1607
+ def test_full_id_class_based(self, context):
1608
+ """Full dataset ID for a class-based dataset."""
1609
+ from datamaestro.data import Base
1610
+ from datamaestro.definitions import dataset as dataset_dec
1611
+
1612
+ class ProcessedMNIST(Base):
1613
+ """Test dataset."""
1614
+
1615
+ pass
1616
+
1617
+ ProcessedMNIST.__module__ = "datamaestro.config.lecun"
1618
+
1619
+ ann = dataset_dec(base=ProcessedMNIST, url="http://test.com")
1620
+ dw = DatasetWrapper(ann, ProcessedMNIST)
1621
+ assert dw.id == "lecun.processed_mnist"
1622
+
1623
+ def test_full_id_function_based(self, context):
1624
+ """Full dataset ID for a function-based (lowercase) dataset."""
1625
+ from datamaestro.data import Base
1626
+
1627
+ class MyData(Base):
1628
+ pass
1629
+
1630
+ from datamaestro.definitions import dataset as dataset_dec
1631
+
1632
+ def mnist() -> MyData:
1633
+ pass
1634
+
1635
+ mnist.__module__ = "datamaestro.config.lecun"
1636
+
1637
+ ann = dataset_dec(url="http://test.com")
1638
+ # Infer base from return annotation
1639
+ ann.base = MyData
1640
+ dw = DatasetWrapper(ann, mnist)
1641
+ assert dw.id == "lecun.mnist"
1642
+
1643
+ def test_full_id_nested_module(self, context):
1644
+ """Full dataset ID with nested module path."""
1645
+ from datamaestro.data import Base
1646
+ from datamaestro.definitions import dataset as dataset_dec
1647
+
1648
+ class Squad(Base):
1649
+ """Test dataset."""
1650
+
1651
+ pass
1652
+
1653
+ Squad.__module__ = "datamaestro.config.stanford.qa"
1654
+
1655
+ ann = dataset_dec(base=Squad, url="http://test.com")
1656
+ dw = DatasetWrapper(ann, Squad)
1657
+ assert dw.id == "stanford.qa.squad"
@@ -1,4 +1,4 @@
1
1
  # This file is auto-generated by Hatchling. As such, do not:
2
2
  # - modify
3
3
  # - track in version control e.g. be sure to add to .gitignore
4
- __version__ = VERSION = '1.7.0'
4
+ __version__ = VERSION = '1.7.2'
@@ -1,5 +0,0 @@
1
- ## [1.7.0] - 2026-01-29
2
-
3
- ### Features
4
- - Redesign Resource interface with DAG pipelines, two-path downloads, and state tracking ([dae5e06](https://github.com/experimaestro/experimaestro-python/commit/dae5e06a6de38e7ce02c2c406a305993157b1fdb))
5
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes