datamaestro 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamaestro/utils.py CHANGED
@@ -159,9 +159,9 @@ def downloadURL(url: str, path: Path, resume: bool = False, size: int = None):
159
159
  response = requests.get(url, stream=True)
160
160
 
161
161
  # Valid response
162
- assert (
163
- response.status_code >= 200 and response.status_code < 300
164
- ), f"Status code is not 2XX ({response.status_code})"
162
+ assert response.status_code >= 200 and response.status_code < 300, (
163
+ f"Status code is not 2XX ({response.status_code})"
164
+ )
165
165
 
166
166
  # Get the total size (or use the provided one)
167
167
  total_size = int(response.headers.get("content-length", size or 0))
@@ -171,9 +171,10 @@ def downloadURL(url: str, path: Path, resume: bool = False, size: int = None):
171
171
  total_size += pos
172
172
 
173
173
  CHUNK_SIZE = 1024
174
- with path.open("ab") as f, tqdm(
175
- initial=pos, total=total_size, unit_scale=True, unit="B"
176
- ) as t:
174
+ with (
175
+ path.open("ab") as f,
176
+ tqdm(initial=pos, total=total_size, unit_scale=True, unit="B") as t,
177
+ ):
177
178
  for data in response.iter_content(chunk_size=CHUNK_SIZE):
178
179
  f.write(data)
179
180
  t.update(len(data))
datamaestro/v2.md ADDED
@@ -0,0 +1,301 @@
1
+ # Resource Interface (v2)
2
+
3
+ ## Overview
4
+
5
+ Resources represent steps in a dataset preparation pipeline. They form a
6
+ directed acyclic graph (DAG) where each resource can depend on other resources.
7
+
8
+ Key concepts:
9
+
10
+ - **Two-path system**: resources write to `transient_path` during download,
11
+ then the framework moves data to `path` and marks the resource as COMPLETE.
12
+ - **Three states**: NONE, PARTIAL, COMPLETE (persisted in `.state.json`)
13
+ - **Transient resources**: intermediate resources that can be deleted after all
14
+ dependents are COMPLETE (eager cleanup)
15
+ - **`can_recover` property**: subclasses override to preserve PARTIAL data on error
16
+
17
+ ## Modern API: Class-based datasets (preferred)
18
+
19
+ ```python
20
+ from datamaestro.definitions import dataset
21
+ from datamaestro.download.single import FileDownloader
22
+
23
+ @dataset(url="http://yann.lecun.com/exdb/mnist/")
24
+ class ProcessedMNIST(ImageClassification):
25
+ """The MNIST database of handwritten digits."""
26
+
27
+ # Resources are class attributes — no decorators needed
28
+ TRAIN_IMAGES = FileDownloader(
29
+ "train_images.idx",
30
+ "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
31
+ transient=True,
32
+ )
33
+ TRAIN_IMAGES_NP = NumpyTensorFile.from_idx(TRAIN_IMAGES)
34
+
35
+ TRAIN_LABELS = FileDownloader(
36
+ "train_labels.idx",
37
+ "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
38
+ transient=True,
39
+ )
40
+ TRAIN_LABELS_NP = NumpyTensorFile.from_idx(TRAIN_LABELS)
41
+
42
+ TEST_IMAGES = FileDownloader(
43
+ "test_images.idx",
44
+ "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
45
+ transient=True,
46
+ )
47
+ TEST_IMAGES_NP = NumpyTensorFile.from_idx(TEST_IMAGES)
48
+
49
+ TEST_LABELS = FileDownloader(
50
+ "test_labels.idx",
51
+ "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
52
+ transient=True,
53
+ )
54
+ TEST_LABELS_NP = NumpyTensorFile.from_idx(TEST_LABELS)
55
+
56
+ @classmethod
57
+ def __create_dataset__(cls, dataset: AbstractDataset):
58
+ return cls.C(
59
+ train=LabelledImages(
60
+ images=NumpyTensorFile(path=cls.TRAIN_IMAGES_NP.path),
61
+ labels=NumpyTensorFile(path=cls.TRAIN_LABELS_NP.path),
62
+ ),
63
+ test=LabelledImages(
64
+ images=NumpyTensorFile(path=cls.TEST_IMAGES_NP.path),
65
+ labels=NumpyTensorFile(path=cls.TEST_LABELS_NP.path),
66
+ ),
67
+ )
68
+ ```
69
+
70
+ Advantages:
71
+
72
+ 1. **Explicit pipeline** — dependencies between resources are visible
73
+ 2. **Transient intermediaries** — intermediate files can be deleted after processing
74
+ 3. **No varname** — resource names are auto-detected from class attribute names
75
+ 4. **Two-path safety** — incomplete downloads never appear at the final path
76
+
77
+ ## Resource hierarchy
78
+
79
+ ```
80
+ Resource (ABC)
81
+ ├── FileResource — produces a single file
82
+ ├── FolderResource — produces a directory
83
+ ├── ValueResource — produces an in-memory value (no files)
84
+ ├── reference — references another dataset
85
+ └── Download — (deprecated alias for Resource)
86
+ ```
87
+
88
+ ### `ResourceState`
89
+
90
+ ```python
91
+ class ResourceState(str, Enum):
92
+ NONE = "none" # Not started
93
+ PARTIAL = "partial" # Started but incomplete
94
+ COMPLETE = "complete" # Fully available
95
+ ```
96
+
97
+ ### `Resource` base class
98
+
99
+ | Property / Method | Description |
100
+ |---|---|
101
+ | `name: str` | Resource name (auto-set from class attribute name) |
102
+ | `dataset` | Back-reference to the owning `AbstractDataset` |
103
+ | `transient: bool` | Whether data can be deleted after dependents complete |
104
+ | `can_recover: bool` | Property. If True, PARTIAL data is preserved on error |
105
+ | `dependencies` | List of resources that must be COMPLETE first |
106
+ | `dependents` | Computed inverse of dependencies |
107
+ | `path: Path` | Final storage path (after COMPLETE) |
108
+ | `transient_path: Path` | Temp path where `download()` writes |
109
+ | `state: ResourceState` | Current state (from `.state.json` metadata file) |
110
+ | `download(force)` | Abstract. Execute download/processing step |
111
+ | `prepare()` | Abstract. Return value for dataset construction |
112
+ | `cleanup()` | Remove data from disk, set state to NONE |
113
+ | `has_files() -> bool` | Whether this resource produces files on disk |
114
+ | `bind(name, dataset)` | Bind to a dataset (called by framework) |
115
+ | `stream() -> IO | None` | (FileResource only) Return byte stream or None |
116
+
117
+ ### `FileResource`
118
+
119
+ Base for resources that produce a single file. Subclasses implement
120
+ `_download(destination: Path)`.
121
+
122
+ ```python
123
+ class MyFileResource(FileResource):
124
+ def __init__(self, filename, url, **kw):
125
+ super().__init__(filename, **kw)
126
+ self.url = url
127
+
128
+ def _download(self, destination: Path):
129
+ # Write to destination (which is self.transient_path)
130
+ ...
131
+ ```
132
+
133
+ ### `FolderResource`
134
+
135
+ Base for resources that produce a directory. Subclasses implement
136
+ `_download(destination: Path)`.
137
+
138
+ ### `ValueResource`
139
+
140
+ Base for resources that produce in-memory values (no files on disk).
141
+ `has_files()` returns False.
142
+
143
+ ## Custom resource handlers (modern)
144
+
145
+ ```python
146
+ from datamaestro.download import FileResource
147
+
148
+ class MyProcessor(FileResource):
149
+ """Process a source file into a numpy array."""
150
+
151
+ @property
152
+ def can_recover(self) -> bool:
153
+ return False # or True for resumable downloads
154
+
155
+ def __init__(self, filename, source, **kw):
156
+ super().__init__(filename, **kw)
157
+ self._dependencies = [source]
158
+
159
+ def _download(self, destination):
160
+ # Read from dependency, write to destination
161
+ source_path = self.dependencies[0].path
162
+ data = load(source_path)
163
+ save(process(data), destination)
164
+
165
+ @classmethod
166
+ def from_source(cls, source):
167
+ return cls("processed.npy", source)
168
+
169
+ # Factory alias
170
+ my_processor = MyProcessor.from_source
171
+ ```
172
+
173
+ ## Built-in resource types
174
+
175
+ | Class | Module | Factory alias | Base |
176
+ |---|---|---|---|
177
+ | `FileDownloader` | `download.single` | `filedownloader` | `FileResource` |
178
+ | `ConcatDownloader` | `download.single` | `concatdownload` | `FileResource` |
179
+ | `ZipDownloader` | `download.archive` | `zipdownloader` | `FolderResource` |
180
+ | `TarDownloader` | `download.archive` | `tardownloader` | `FolderResource` |
181
+ | `HFDownloader` | `download.huggingface` | `hf_download` | `ValueResource` |
182
+ | `custom_download` | `download.custom` | — | `Resource` |
183
+ | `links` | `download.links` | — | `Resource` |
184
+ | `linkfolder` | `download.links` | — | `Resource` |
185
+ | `linkfile` | `download.links` | — | `Resource` |
186
+ | `reference` | `download` | — | `Resource` |
187
+
188
+ ## Two-path download flow
189
+
190
+ The framework (in `AbstractDataset.download()`) orchestrates:
191
+
192
+ ```
193
+ 1. Topological sort resources by dependencies
194
+ 2. For each resource:
195
+ a. COMPLETE and not force → skip
196
+ b. PARTIAL and not can_recover → delete transient_path, set NONE
197
+ c. Call resource.download(force)
198
+ → Resource writes to transient_path
199
+ d. On success: move transient_path → path, set COMPLETE
200
+ e. On failure: if can_recover → set PARTIAL, else delete → NONE
201
+ f. Eager cleanup: for each transient dependency with all
202
+ dependents COMPLETE → cleanup
203
+ ```
204
+
205
+ ## State metadata file
206
+
207
+ Location: `<dataset.datapath>/.downloads/.state.json`
208
+
209
+ ```json
210
+ {
211
+ "version": 1,
212
+ "resources": {
213
+ "TRAIN_IMAGES": {"state": "complete"},
214
+ "TRAIN_LABELS": {"state": "partial"}
215
+ }
216
+ }
217
+ ```
218
+
219
+ ---
220
+
221
+ ## Deprecated: decorator-based datasets
222
+
223
+ > **Deprecated.** The decorator-based API still works but emits deprecation
224
+ > warnings. Migrate to the class-based approach above.
225
+
226
+ ```python
227
+ # DEPRECATED — use class-based approach instead
228
+ @filedownloader("train_images.idx", "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
229
+ @filedownloader("train_labels.idx", "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")
230
+ @filedownloader("test_images.idx", "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
231
+ @filedownloader("test_labels.idx", "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")
232
+ @dataset(
233
+ ImageClassification,
234
+ url="http://yann.lecun.com/exdb/mnist/",
235
+ )
236
+ def MNIST(train_images, train_labels, test_images, test_labels):
237
+ """The MNIST database"""
238
+ return {
239
+ "train": LabelledImages(
240
+ images=IDX(path=train_images),
241
+ labels=IDX(path=train_labels)
242
+ ),
243
+ "test": LabelledImages(
244
+ images=IDX(path=test_images),
245
+ labels=IDX(path=test_labels)
246
+ ),
247
+ }
248
+ ```
249
+
250
+ ### Deprecated names
251
+
252
+ | Deprecated | Replacement |
253
+ |---|---|
254
+ | `Download` (base class) | `Resource` |
255
+ | `hasfiles()` | `has_files()` |
256
+ | `Resource.definition` | `Resource.dataset` |
257
+ | `Resource.varname` | `Resource.name` |
258
+ | `@filedownloader(...)` (decorator) | `FileDownloader(...)` (class attr) |
259
+ | `SingleDownload` | `FileDownloader` |
260
+
261
+ ### Deprecated custom handler pattern
262
+
263
+ ```python
264
+ # DEPRECATED
265
+ class MyDownload(Download):
266
+ def __init__(self, varname, custom_param):
267
+ super().__init__(varname)
268
+ self.custom_param = custom_param
269
+
270
+ def prepare(self):
271
+ return self._download_and_process()
272
+
273
+ def download(self, force=False):
274
+ if force or not self._is_cached():
275
+ self._do_download()
276
+
277
+ def hasfiles(self) -> bool:
278
+ return True
279
+
280
+ def mydownloader(varname, custom_param):
281
+ def decorator(dataset):
282
+ download = MyDownload(varname, custom_param)
283
+ download.register(dataset)
284
+ return dataset
285
+ return decorator
286
+ ```
287
+
288
+ Modern equivalent:
289
+
290
+ ```python
291
+ class MyDownload(FileResource):
292
+ def __init__(self, filename, custom_param, **kw):
293
+ super().__init__(filename, **kw)
294
+ self.custom_param = custom_param
295
+
296
+ def _download(self, destination):
297
+ # Write output to destination (self.transient_path)
298
+ self._do_download(destination)
299
+
300
+ mydownloader = MyDownload.apply
301
+ ```
datamaestro/version.py CHANGED
@@ -1,21 +1,4 @@
1
- # file generated by setuptools-scm
2
- # don't change, don't track in version control
3
-
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
-
6
- TYPE_CHECKING = False
7
- if TYPE_CHECKING:
8
- from typing import Tuple
9
- from typing import Union
10
-
11
- VERSION_TUPLE = Tuple[Union[int, str], ...]
12
- else:
13
- VERSION_TUPLE = object
14
-
15
- version: str
16
- __version__: str
17
- __version_tuple__: VERSION_TUPLE
18
- version_tuple: VERSION_TUPLE
19
-
20
- __version__ = version = '1.5.0'
21
- __version_tuple__ = version_tuple = (1, 5, 0)
1
+ # This file is auto-generated by Hatchling. As such, do not:
2
+ # - modify
3
+ # - track in version control e.g. be sure to add to .gitignore
4
+ __version__ = VERSION = '1.7.0'
@@ -1,42 +1,37 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.5.0
4
- Summary: "Dataset management command line and API"
5
- Home-page: https://github.com/experimaestro/datamaestro
6
- Author: Benjamin Piwowarski
7
- Author-email: benjamin@piwowarski.fr
8
- License: GPL-3
9
- Keywords: dataset manager
10
- Platform: any
3
+ Version: 1.7.0
4
+ Summary: Add your description here
5
+ Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
6
+ License-File: LICENSE
11
7
  Classifier: Development Status :: 4 - Beta
12
8
  Classifier: Intended Audience :: Science/Research
13
9
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
14
10
  Classifier: Operating System :: OS Independent
15
11
  Classifier: Programming Language :: Python
16
- Classifier: Programming Language :: Python :: 3.9
17
- Classifier: Programming Language :: Python :: 3.10
18
- Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3
19
13
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
- Requires-Python: >=3.8
21
- Description-Content-Type: text/markdown
22
- License-File: LICENSE
23
- Requires-Dist: click
24
- Requires-Dist: tqdm
25
- Requires-Dist: urllib3
26
- Requires-Dist: marshmallow
27
- Requires-Dist: cached_property
28
- Requires-Dist: requests
29
- Requires-Dist: bitmath
14
+ Requires-Python: >=3.10
15
+ Requires-Dist: bitmath>=1.3.3.1
16
+ Requires-Dist: cached-property>=2.0.1
17
+ Requires-Dist: click>=8.2.1
18
+ Requires-Dist: docstring-parser>=0.16
30
19
  Requires-Dist: experimaestro>=1.8.9
31
- Requires-Dist: mkdocs
32
- Requires-Dist: pymdown-extensions
33
- Requires-Dist: mkdocs-material
34
- Requires-Dist: docstring_parser
20
+ Requires-Dist: marshmallow>=3.26.1
21
+ Requires-Dist: mkdocs-material>=9.6.15
22
+ Requires-Dist: mkdocs>=1.6.1
35
23
  Requires-Dist: numpy
36
- Provides-Extra: test
37
- Requires-Dist: tox; extra == "test"
38
- Dynamic: license-file
39
- Dynamic: requires-dist
24
+ Requires-Dist: pymdown-extensions>=10.16
25
+ Requires-Dist: requests>=2.32.4
26
+ Requires-Dist: tqdm>=4.67.1
27
+ Requires-Dist: urllib3>=2.5.0
28
+ Provides-Extra: docs
29
+ Requires-Dist: myst-parser>0.18; extra == 'docs'
30
+ Requires-Dist: sphinx-codeautolink>=0.15; extra == 'docs'
31
+ Requires-Dist: sphinx-rtd-theme==1.2.2; extra == 'docs'
32
+ Requires-Dist: sphinx-toolbox>=4.1.2; extra == 'docs'
33
+ Requires-Dist: sphinx>=4.2; extra == 'docs'
34
+ Description-Content-Type: text/markdown
40
35
 
41
36
  [![PyPI version](https://badge.fury.io/py/datamaestro.svg)](https://badge.fury.io/py/datamaestro) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) [![DOI](https://zenodo.org/badge/4573876.svg)](https://zenodo.org/badge/latestdoi/4573876)
42
37
 
@@ -138,76 +133,50 @@ Out[3]: (dtype('uint8'), (60000, 28, 28))
138
133
 
139
134
  ## Python definition of datasets
140
135
 
141
- Each dataset (or a set of related datasets) is described in Python using a mix of declarative
142
- and imperative statements. This allows to quickly define how to download dataset using the
143
- datamaestro declarative API; the imperative part is used when creating the JSON output,
144
- and is integrated with [experimaestro](http://experimaestro.github.io/experimaestro-python).
136
+ Datasets are defined as Python classes with resource attributes that describe how
137
+ to download and process data. The framework automatically builds a dependency graph
138
+ and handles downloads with two-path safety and state tracking.
145
139
 
146
- Its syntax is described in the [documentation](https://datamaestro.readthedocs.io).
140
+ ```python
141
+ from datamaestro_image.data import ImageClassification, LabelledImages
142
+ from datamaestro.data.tensor import IDX
143
+ from datamaestro.download.single import FileDownloader
144
+ from datamaestro.definitions import AbstractDataset, dataset
147
145
 
148
146
 
149
- For instance, the MNIST dataset can be described by the following
147
+ @dataset(url="http://yann.lecun.com/exdb/mnist/")
148
+ class MNIST(ImageClassification):
149
+ """The MNIST database of handwritten digits."""
150
150
 
151
- ```python
152
- from datamaestro import dataset
153
- from datamaestro.download.single import download_file
154
- from datamaestro_image.data import ImageClassification, LabelledImages, IDXImage
155
-
156
-
157
- @filedownloader("train_images.idx", "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
158
- @filedownloader("train_labels.idx", "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")
159
- @filedownloader("test_images.idx", "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
160
- @filedownloader("test_labels.idx", "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")
161
- @dataset(
162
- ImageClassification,
163
- url="http://yann.lecun.com/exdb/mnist/",
164
- )
165
-
166
- return ImageClassification(
167
- train=LabelledImages(
168
- images=IDXImage(path=train_images), labels=IDXImage(path=train_labels)
169
- ),
170
- test=LabelledImages(
171
- images=IDXImage(path=test_images), labels=IDXImage(path=test_labels)
172
- ),
151
+ TRAIN_IMAGES = FileDownloader(
152
+ "train_images.idx",
153
+ "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
154
+ )
155
+ TRAIN_LABELS = FileDownloader(
156
+ "train_labels.idx",
157
+ "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
158
+ )
159
+ TEST_IMAGES = FileDownloader(
160
+ "test_images.idx",
161
+ "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
162
+ )
163
+ TEST_LABELS = FileDownloader(
164
+ "test_labels.idx",
165
+ "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
173
166
  )
174
- ```
175
167
 
176
- When building dataset modules, some extra documentation can be provided:
177
-
178
- ```yaml
179
- ids: [com.lecun.mnist]
180
- entry_point: "datamaestro_image.config.com.lecun:mnist"
181
- title: The MNIST database
182
- url: http://yann.lecun.com/exdb/mnist/
183
- groups: [image-classification]
184
- description: |
185
- The MNIST database of handwritten digits, available from this page,
186
- has a training set of 60,000 examples, and a test set of 10,000
187
- examples. It is a subset of a larger set available from NIST. The
188
- digits have been size-normalized and centered in a fixed-size image.
168
+ @classmethod
169
+ def __create_dataset__(cls, dataset: AbstractDataset):
170
+ return cls.C(
171
+ train=LabelledImages(
172
+ images=IDX(path=cls.TRAIN_IMAGES.path),
173
+ labels=IDX(path=cls.TRAIN_LABELS.path),
174
+ ),
175
+ test=LabelledImages(
176
+ images=IDX(path=cls.TEST_IMAGES.path),
177
+ labels=IDX(path=cls.TEST_LABELS.path),
178
+ ),
179
+ )
189
180
  ```
190
181
 
191
- This will allow to
192
-
193
- 1. Document the dataset
194
- 2. Allow to use the command line interface to manipulate it (download resources, etc.)
195
-
196
- # 0.8.0
197
-
198
- - Integration with other repositories: abstracting away the notion of dataset
199
- - Repository prefix
200
- - Set sub-datasets IDs automatically
201
-
202
- # 0.7.3
203
-
204
- - Updates for new experimaestro (0.8.5)
205
- - Search types with "type:..."
206
-
207
- # 0.6.17
208
-
209
- - Allow remote access through rpyc
210
-
211
- # 0.6.9
212
-
213
- `version` command
182
+ Its syntax is described in the [documentation](https://datamaestro.readthedocs.io).
@@ -0,0 +1,49 @@
1
+ datamaestro/__init__.py,sha256=oh9M4VODuvTc9EFHKirtDxpCJkLUANzpzBOIwzHc_mw,246
2
+ datamaestro/__main__.py,sha256=22v54rQoO2umL1frFO2FOQuuRljr-Jw-ER-OATTpVxw,9218
3
+ datamaestro/context.py,sha256=AL2BTi6dLA8rDGBE0PFyfV9ua29JHvBgx6_w6hDj9Dg,13977
4
+ datamaestro/definitions.py,sha256=kIwyrXZWg1tZw3G1PuUyGJ13ZPunocmu0wuxydVesbQ,27167
5
+ datamaestro/record.py,sha256=e5fjRV3ni7ZxXwYH45bVDB_jpD-n9quvh4ie4uI-MM4,7140
6
+ datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
+ datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
8
+ datamaestro/settings.py,sha256=NuUbe_C31GDlzdio2ryz7tPzuo4hsmmdCM5Cyuhqbzs,1294
9
+ datamaestro/sphinx.py,sha256=WWXB63gd0ZgEwFr_YwO2Hmuly5OoiFlu9mDvJSHFYuY,6966
10
+ datamaestro/utils.py,sha256=JUrvtVYnjNKRo0_ZypmXSQ9R4uOyImDjW1GZ14MYzKM,6547
11
+ datamaestro/v2.md,sha256=pLCxQUdfVkd4CM9Ie0ZxCnxUntqoA7k_0m7x1etcr7Y,9801
12
+ datamaestro/version.py,sha256=aCGW8aYYQ-ZQNfHZo9TrCX1MKqWbHUjj3X57h-DmRAs,171
13
+ datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
14
+ datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
15
+ datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
17
+ datamaestro/commands/site.py,sha256=mVU5JKhwx9LTMf5FddcWgGh41qXtANJqB8qWKSKa-70,14432
18
+ datamaestro/data/__init__.py,sha256=s81ZxT8MQrBGkcu45xr4NaInIsMeunHOLnkLrJE47So,1496
19
+ datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
20
+ datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
21
+ datamaestro/data/ml.py,sha256=4PlH6FJFZwtfTEStkOjOucV8t8yY8LFaPsnDBvEqAPs,710
22
+ datamaestro/data/tensor.py,sha256=in36UQz4cdUEVmCS62pInu9RNekohRON667Z_JqNdhk,2254
23
+ datamaestro/download/__init__.py,sha256=qbmSLtzo4zTLuc1cAVSAKDdbIJROsJa6BMP6ksVJWvU,19375
24
+ datamaestro/download/archive.py,sha256=fz1ElRggB9gYb6F7fek0Tkw9eAj6Glotc_Mit9OcCZU,6986
25
+ datamaestro/download/custom.py,sha256=dxyvwbweVuz0xveExtvta8xycoqTjpDZz_P98ucintA,1287
26
+ datamaestro/download/huggingface.py,sha256=inZbB5EdVvczW9CfM59SqL1Nl-H4y3bWxv1SWjrYeOs,1996
27
+ datamaestro/download/links.py,sha256=NCCpBFAIYznaskJV5NSFX3NoorqHDKyAiRCWSnEnb9E,4364
28
+ datamaestro/download/manual.py,sha256=-T2QWxKAiN3ZbSujjQUVeWDEDFonw9VnlzCfBIHcLao,190
29
+ datamaestro/download/multiple.py,sha256=iX3gtgQT1eskHok0pAecU_mgd56of1Sadz7_o95ItaA,2736
30
+ datamaestro/download/single.py,sha256=nFWBH1LeGO-WmMUBbdV6bzkd6Lfe74uhfcWeLZkCC3M,5737
31
+ datamaestro/download/sync.py,sha256=QlpoOkamiX9yE-4P8-ppCZ_wgA2P4oBSOQCX98gWnCc,784
32
+ datamaestro/download/todo.py,sha256=d-mfi_gJlrOvAoa7dXN2ecXYY-cgB-NHzU1J-dzkEkI,444
33
+ datamaestro/download/wayback.py,sha256=wpbrTtE321AwsO8Poj1a4qwEKy1kE0wEbxWgMEf5nLo,5489
34
+ datamaestro/stream/__init__.py,sha256=Angu_Yg9rNKXb8s4at-DXYcnE-OTgSMLfUEfrL6APD8,896
35
+ datamaestro/stream/compress.py,sha256=0ViFGpJc6pdvZGUNERE-3XV8jAOTSvhJurb2t0NW2eU,260
36
+ datamaestro/stream/lines.py,sha256=DhptjIqhhAJ1tu3e-uoOepHHNALSXS8qz8ASUAyaSkM,2074
37
+ datamaestro/templates/dataset.py,sha256=5065rTMAIl4gtzQ96GFiV1_46tY08miIx3WspTP8yGA,346
38
+ datamaestro/test/__init__.py,sha256=9xXqLvUgiIn74AY6k8qyYX7rq6hWz7dOJFBrUgwuX88,61
39
+ datamaestro/test/checks.py,sha256=1eTkz4YJhAPOcnQSsz4vPnvzwwfrEnpn6H_s1ADISpo,1704
40
+ datamaestro/test/conftest.py,sha256=z8rF0OIKVuCgIYJ-4fQxQL8KhgIfg_4kfkIZNETfNJ0,793
41
+ datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
42
+ datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
43
+ datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
44
+ datamaestro/test/test_resource.py,sha256=meUCDaoPg5XT3gWIToqXvaofE1vrZq_qG7gZtOHIOfQ,41044
45
+ datamaestro-1.7.0.dist-info/METADATA,sha256=tutvO9o9gHY7DLbF7zliiwcz2ajn7jnufkmotlA-cDQ,7433
46
+ datamaestro-1.7.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
47
+ datamaestro-1.7.0.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
48
+ datamaestro-1.7.0.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
49
+ datamaestro-1.7.0.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,48 +0,0 @@
1
- datamaestro/__init__.py,sha256=LR8nx7H3Fo97O0gJXV2PxQezsmSTDLAg_nQEXB5QAjc,322
2
- datamaestro/__main__.py,sha256=2p36ZcJcZAL9NZBUkMaYRUhKyqhheVPXMGw6K1KNwhk,9196
3
- datamaestro/context.py,sha256=KsXYNTt4xX4zEVrnd2hciP7PVCh1StRzjU1Ih6VeCtU,13532
4
- datamaestro/definitions.py,sha256=XFiKNHXQdZdeZWIJwGwyb_PT42r3kKpeEl_aJxQyfEc,19484
5
- datamaestro/record.py,sha256=IxxcrSIf99iluohtpnuMBTFkqeHRe5S-T_hWEqBgeME,5812
6
- datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
- datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
8
- datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
9
- datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
10
- datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
11
- datamaestro/version.py,sha256=qEW4HoWHYDkBguijNs9nZzHd38qlKSeRTDG2QQbYrGY,511
12
- datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
13
- datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
14
- datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
16
- datamaestro/commands/site.py,sha256=7H9c-ZlXt7bUlldHn8fMebzDKS7B7ijPNKrxHXMG-Lk,14233
17
- datamaestro/data/__init__.py,sha256=s81ZxT8MQrBGkcu45xr4NaInIsMeunHOLnkLrJE47So,1496
18
- datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
19
- datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
20
- datamaestro/data/ml.py,sha256=7Rv4Tb9g17HDj8mOBJpIDjgolGQAd5Wrb0mHlnm-bPE,709
21
- datamaestro/data/tensor.py,sha256=in36UQz4cdUEVmCS62pInu9RNekohRON667Z_JqNdhk,2254
22
- datamaestro/download/__init__.py,sha256=EBoAcw2wErS8ymEYs7LJKez4UO-Gwhe4YgqRAysOxRY,2865
23
- datamaestro/download/archive.py,sha256=G-2gzepknqT7Us3naMGAApGVGJMeHQIxM-tSpaa9ark,5608
24
- datamaestro/download/custom.py,sha256=DUjDVAWuHC6sV_apMQb44Yjd6HUXkHY6Ob52FQY3t-M,587
25
- datamaestro/download/huggingface.py,sha256=b4Y437ATYrugdkvqZrPQmqiXXSrmYyqEKDVI0wnIGDE,1125
26
- datamaestro/download/links.py,sha256=GFnq_AzI_uen7JBuGWD9qveeC9QFBWDrSnj7pOcwWwM,3352
27
- datamaestro/download/manual.py,sha256=-T2QWxKAiN3ZbSujjQUVeWDEDFonw9VnlzCfBIHcLao,190
28
- datamaestro/download/multiple.py,sha256=Mrr0ObHM5cE1CPSHE9PKIrox3qZVgxwRyxLzNXp0LqM,2159
29
- datamaestro/download/single.py,sha256=fCIfZdR14YN09MQTgcxL21PWu5CjELfIClgWjFpR5mg,4148
30
- datamaestro/download/sync.py,sha256=Z_LsXj4kbZWIYKTVJZEhfdpYiv6wXOOIyw8LahmEcqs,836
31
- datamaestro/download/todo.py,sha256=y3YnmWC_i-u23ce-vreIwIXZcoO-uA0HXErgJPThnco,256
32
- datamaestro/download/wayback.py,sha256=7XuWoLkmHR65wVDv3YnL3fiMtSrjKelk3UDI9ua_t8c,5504
33
- datamaestro/stream/__init__.py,sha256=Angu_Yg9rNKXb8s4at-DXYcnE-OTgSMLfUEfrL6APD8,896
34
- datamaestro/stream/compress.py,sha256=0ViFGpJc6pdvZGUNERE-3XV8jAOTSvhJurb2t0NW2eU,260
35
- datamaestro/stream/lines.py,sha256=UNGcyZlZxN0Q7kw717jbhZFdDVmtfJfkJZCgK7xzF9A,1996
36
- datamaestro/templates/dataset.py,sha256=5065rTMAIl4gtzQ96GFiV1_46tY08miIx3WspTP8yGA,346
37
- datamaestro/test/__init__.py,sha256=8-oxS68ufD45pv_HldE4S4rSWFF6L-UB_Cms-72DD2M,22
38
- datamaestro/test/checks.py,sha256=1eTkz4YJhAPOcnQSsz4vPnvzwwfrEnpn6H_s1ADISpo,1704
39
- datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,767
40
- datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
41
- datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
42
- datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
43
- datamaestro-1.5.0.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
44
- datamaestro-1.5.0.dist-info/METADATA,sha256=7gLdwMZ5Ah2tyIDq7SHbqyXzH7ZyHXtTgU4Y7fykJPI,8191
45
- datamaestro-1.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- datamaestro-1.5.0.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
47
- datamaestro-1.5.0.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
48
- datamaestro-1.5.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- datamaestro