datamaestro 1.6.2__py3-none-any.whl → 1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamaestro/utils.py CHANGED
@@ -159,9 +159,9 @@ def downloadURL(url: str, path: Path, resume: bool = False, size: int = None):
159
159
  response = requests.get(url, stream=True)
160
160
 
161
161
  # Valid response
162
- assert (
163
- response.status_code >= 200 and response.status_code < 300
164
- ), f"Status code is not 2XX ({response.status_code})"
162
+ assert response.status_code >= 200 and response.status_code < 300, (
163
+ f"Status code is not 2XX ({response.status_code})"
164
+ )
165
165
 
166
166
  # Get the total size (or use the provided one)
167
167
  total_size = int(response.headers.get("content-length", size or 0))
@@ -171,9 +171,10 @@ def downloadURL(url: str, path: Path, resume: bool = False, size: int = None):
171
171
  total_size += pos
172
172
 
173
173
  CHUNK_SIZE = 1024
174
- with path.open("ab") as f, tqdm(
175
- initial=pos, total=total_size, unit_scale=True, unit="B"
176
- ) as t:
174
+ with (
175
+ path.open("ab") as f,
176
+ tqdm(initial=pos, total=total_size, unit_scale=True, unit="B") as t,
177
+ ):
177
178
  for data in response.iter_content(chunk_size=CHUNK_SIZE):
178
179
  f.write(data)
179
180
  t.update(len(data))
datamaestro/v2.md ADDED
@@ -0,0 +1,301 @@
1
+ # Resource Interface (v2)
2
+
3
+ ## Overview
4
+
5
+ Resources represent steps in a dataset preparation pipeline. They form a
6
+ directed acyclic graph (DAG) where each resource can depend on other resources.
7
+
8
+ Key concepts:
9
+
10
+ - **Two-path system**: resources write to `transient_path` during download,
11
+ then the framework moves data to `path` and marks the resource as COMPLETE.
12
+ - **Three states**: NONE, PARTIAL, COMPLETE (persisted in `.state.json`)
13
+ - **Transient resources**: intermediate resources that can be deleted after all
14
+ dependents are COMPLETE (eager cleanup)
15
+ - **`can_recover` property**: subclasses override to preserve PARTIAL data on error
16
+
17
+ ## Modern API: Class-based datasets (preferred)
18
+
19
+ ```python
20
+ from datamaestro.definitions import dataset
21
+ from datamaestro.download.single import FileDownloader
22
+
23
+ @dataset(url="http://yann.lecun.com/exdb/mnist/")
24
+ class ProcessedMNIST(ImageClassification):
25
+ """The MNIST database of handwritten digits."""
26
+
27
+ # Resources are class attributes — no decorators needed
28
+ TRAIN_IMAGES = FileDownloader(
29
+ "train_images.idx",
30
+ "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
31
+ transient=True,
32
+ )
33
+ TRAIN_IMAGES_NP = NumpyTensorFile.from_idx(TRAIN_IMAGES)
34
+
35
+ TRAIN_LABELS = FileDownloader(
36
+ "train_labels.idx",
37
+ "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
38
+ transient=True,
39
+ )
40
+ TRAIN_LABELS_NP = NumpyTensorFile.from_idx(TRAIN_LABELS)
41
+
42
+ TEST_IMAGES = FileDownloader(
43
+ "test_images.idx",
44
+ "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
45
+ transient=True,
46
+ )
47
+ TEST_IMAGES_NP = NumpyTensorFile.from_idx(TEST_IMAGES)
48
+
49
+ TEST_LABELS = FileDownloader(
50
+ "test_labels.idx",
51
+ "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
52
+ transient=True,
53
+ )
54
+ TEST_LABELS_NP = NumpyTensorFile.from_idx(TEST_LABELS)
55
+
56
+ @classmethod
57
+ def __create_dataset__(cls, dataset: AbstractDataset):
58
+ return cls.C(
59
+ train=LabelledImages(
60
+ images=NumpyTensorFile(path=cls.TRAIN_IMAGES_NP.path),
61
+ labels=NumpyTensorFile(path=cls.TRAIN_LABELS_NP.path),
62
+ ),
63
+ test=LabelledImages(
64
+ images=NumpyTensorFile(path=cls.TEST_IMAGES_NP.path),
65
+ labels=NumpyTensorFile(path=cls.TEST_LABELS_NP.path),
66
+ ),
67
+ )
68
+ ```
69
+
70
+ Advantages:
71
+
72
+ 1. **Explicit pipeline** — dependencies between resources are visible
73
+ 2. **Transient intermediaries** — intermediate files can be deleted after processing
74
+ 3. **No varname** — resource names are auto-detected from class attribute names
75
+ 4. **Two-path safety** — incomplete downloads never appear at the final path
76
+
77
+ ## Resource hierarchy
78
+
79
+ ```
80
+ Resource (ABC)
81
+ ├── FileResource — produces a single file
82
+ ├── FolderResource — produces a directory
83
+ ├── ValueResource — produces an in-memory value (no files)
84
+ ├── reference — references another dataset
85
+ └── Download — (deprecated alias for Resource)
86
+ ```
87
+
88
+ ### `ResourceState`
89
+
90
+ ```python
91
+ class ResourceState(str, Enum):
92
+ NONE = "none" # Not started
93
+ PARTIAL = "partial" # Started but incomplete
94
+ COMPLETE = "complete" # Fully available
95
+ ```
96
+
97
+ ### `Resource` base class
98
+
99
+ | Property / Method | Description |
100
+ |---|---|
101
+ | `name: str` | Resource name (auto-set from class attribute name) |
102
+ | `dataset` | Back-reference to the owning `AbstractDataset` |
103
+ | `transient: bool` | Whether data can be deleted after dependents complete |
104
+ | `can_recover: bool` | Property. If True, PARTIAL data is preserved on error |
105
+ | `dependencies` | List of resources that must be COMPLETE first |
106
+ | `dependents` | Computed inverse of dependencies |
107
+ | `path: Path` | Final storage path (after COMPLETE) |
108
+ | `transient_path: Path` | Temp path where `download()` writes |
109
+ | `state: ResourceState` | Current state (from `.state.json` metadata file) |
110
+ | `download(force)` | Abstract. Execute download/processing step |
111
+ | `prepare()` | Abstract. Return value for dataset construction |
112
+ | `cleanup()` | Remove data from disk, set state to NONE |
113
+ | `has_files() -> bool` | Whether this resource produces files on disk |
114
+ | `bind(name, dataset)` | Bind to a dataset (called by framework) |
115
+ | `stream() -> IO | None` | (FileResource only) Return byte stream or None |
116
+
117
+ ### `FileResource`
118
+
119
+ Base for resources that produce a single file. Subclasses implement
120
+ `_download(destination: Path)`.
121
+
122
+ ```python
123
+ class MyFileResource(FileResource):
124
+ def __init__(self, filename, url, **kw):
125
+ super().__init__(filename, **kw)
126
+ self.url = url
127
+
128
+ def _download(self, destination: Path):
129
+ # Write to destination (which is self.transient_path)
130
+ ...
131
+ ```
132
+
133
+ ### `FolderResource`
134
+
135
+ Base for resources that produce a directory. Subclasses implement
136
+ `_download(destination: Path)`.
137
+
138
+ ### `ValueResource`
139
+
140
+ Base for resources that produce in-memory values (no files on disk).
141
+ `has_files()` returns False.
142
+
143
+ ## Custom resource handlers (modern)
144
+
145
+ ```python
146
+ from datamaestro.download import FileResource
147
+
148
+ class MyProcessor(FileResource):
149
+ """Process a source file into a numpy array."""
150
+
151
+ @property
152
+ def can_recover(self) -> bool:
153
+ return False # or True for resumable downloads
154
+
155
+ def __init__(self, filename, source, **kw):
156
+ super().__init__(filename, **kw)
157
+ self._dependencies = [source]
158
+
159
+ def _download(self, destination):
160
+ # Read from dependency, write to destination
161
+ source_path = self.dependencies[0].path
162
+ data = load(source_path)
163
+ save(process(data), destination)
164
+
165
+ @classmethod
166
+ def from_source(cls, source):
167
+ return cls("processed.npy", source)
168
+
169
+ # Factory alias
170
+ my_processor = MyProcessor.from_source
171
+ ```
172
+
173
+ ## Built-in resource types
174
+
175
+ | Class | Module | Factory alias | Base |
176
+ |---|---|---|---|
177
+ | `FileDownloader` | `download.single` | `filedownloader` | `FileResource` |
178
+ | `ConcatDownloader` | `download.single` | `concatdownload` | `FileResource` |
179
+ | `ZipDownloader` | `download.archive` | `zipdownloader` | `FolderResource` |
180
+ | `TarDownloader` | `download.archive` | `tardownloader` | `FolderResource` |
181
+ | `HFDownloader` | `download.huggingface` | `hf_download` | `ValueResource` |
182
+ | `custom_download` | `download.custom` | — | `Resource` |
183
+ | `links` | `download.links` | — | `Resource` |
184
+ | `linkfolder` | `download.links` | — | `Resource` |
185
+ | `linkfile` | `download.links` | — | `Resource` |
186
+ | `reference` | `download` | — | `Resource` |
187
+
188
+ ## Two-path download flow
189
+
190
+ The framework (in `AbstractDataset.download()`) orchestrates:
191
+
192
+ ```
193
+ 1. Topological sort resources by dependencies
194
+ 2. For each resource:
195
+ a. COMPLETE and not force → skip
196
+ b. PARTIAL and not can_recover → delete transient_path, set NONE
197
+ c. Call resource.download(force)
198
+ → Resource writes to transient_path
199
+ d. On success: move transient_path → path, set COMPLETE
200
+ e. On failure: if can_recover → set PARTIAL, else delete → NONE
201
+ f. Eager cleanup: for each transient dependency with all
202
+ dependents COMPLETE → cleanup
203
+ ```
204
+
205
+ ## State metadata file
206
+
207
+ Location: `<dataset.datapath>/.downloads/.state.json`
208
+
209
+ ```json
210
+ {
211
+ "version": 1,
212
+ "resources": {
213
+ "TRAIN_IMAGES": {"state": "complete"},
214
+ "TRAIN_LABELS": {"state": "partial"}
215
+ }
216
+ }
217
+ ```
218
+
219
+ ---
220
+
221
+ ## Deprecated: decorator-based datasets
222
+
223
+ > **Deprecated.** The decorator-based API still works but emits deprecation
224
+ > warnings. Migrate to the class-based approach above.
225
+
226
+ ```python
227
+ # DEPRECATED — use class-based approach instead
228
+ @filedownloader("train_images.idx", "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
229
+ @filedownloader("train_labels.idx", "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")
230
+ @filedownloader("test_images.idx", "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
231
+ @filedownloader("test_labels.idx", "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")
232
+ @dataset(
233
+ ImageClassification,
234
+ url="http://yann.lecun.com/exdb/mnist/",
235
+ )
236
+ def MNIST(train_images, train_labels, test_images, test_labels):
237
+ """The MNIST database"""
238
+ return {
239
+ "train": LabelledImages(
240
+ images=IDX(path=train_images),
241
+ labels=IDX(path=train_labels)
242
+ ),
243
+ "test": LabelledImages(
244
+ images=IDX(path=test_images),
245
+ labels=IDX(path=test_labels)
246
+ ),
247
+ }
248
+ ```
249
+
250
+ ### Deprecated names
251
+
252
+ | Deprecated | Replacement |
253
+ |---|---|
254
+ | `Download` (base class) | `Resource` |
255
+ | `hasfiles()` | `has_files()` |
256
+ | `Resource.definition` | `Resource.dataset` |
257
+ | `Resource.varname` | `Resource.name` |
258
+ | `@filedownloader(...)` (decorator) | `FileDownloader(...)` (class attr) |
259
+ | `SingleDownload` | `FileDownloader` |
260
+
261
+ ### Deprecated custom handler pattern
262
+
263
+ ```python
264
+ # DEPRECATED
265
+ class MyDownload(Download):
266
+ def __init__(self, varname, custom_param):
267
+ super().__init__(varname)
268
+ self.custom_param = custom_param
269
+
270
+ def prepare(self):
271
+ return self._download_and_process()
272
+
273
+ def download(self, force=False):
274
+ if force or not self._is_cached():
275
+ self._do_download()
276
+
277
+ def hasfiles(self) -> bool:
278
+ return True
279
+
280
+ def mydownloader(varname, custom_param):
281
+ def decorator(dataset):
282
+ download = MyDownload(varname, custom_param)
283
+ download.register(dataset)
284
+ return dataset
285
+ return decorator
286
+ ```
287
+
288
+ Modern equivalent:
289
+
290
+ ```python
291
+ class MyDownload(FileResource):
292
+ def __init__(self, filename, custom_param, **kw):
293
+ super().__init__(filename, **kw)
294
+ self.custom_param = custom_param
295
+
296
+ def _download(self, destination):
297
+ # Write output to destination (self.transient_path)
298
+ self._do_download(destination)
299
+
300
+ mydownloader = MyDownload.apply
301
+ ```
datamaestro/version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is auto-generated by Hatchling. As such, do not:
2
2
  # - modify
3
3
  # - track in version control e.g. be sure to add to .gitignore
4
- __version__ = VERSION = '1.6.2'
4
+ __version__ = VERSION = '1.7.1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.6.2
3
+ Version: 1.7.1
4
4
  Summary: Add your description here
5
5
  Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
6
6
  License-File: LICENSE
@@ -25,6 +25,12 @@ Requires-Dist: pymdown-extensions>=10.16
25
25
  Requires-Dist: requests>=2.32.4
26
26
  Requires-Dist: tqdm>=4.67.1
27
27
  Requires-Dist: urllib3>=2.5.0
28
+ Provides-Extra: docs
29
+ Requires-Dist: myst-parser>0.18; extra == 'docs'
30
+ Requires-Dist: sphinx-codeautolink>=0.15; extra == 'docs'
31
+ Requires-Dist: sphinx-rtd-theme==1.2.2; extra == 'docs'
32
+ Requires-Dist: sphinx-toolbox>=4.1.2; extra == 'docs'
33
+ Requires-Dist: sphinx>=4.2; extra == 'docs'
28
34
  Description-Content-Type: text/markdown
29
35
 
30
36
  [![PyPI version](https://badge.fury.io/py/datamaestro.svg)](https://badge.fury.io/py/datamaestro) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) [![DOI](https://zenodo.org/badge/4573876.svg)](https://zenodo.org/badge/latestdoi/4573876)
@@ -127,57 +133,50 @@ Out[3]: (dtype('uint8'), (60000, 28, 28))
127
133
 
128
134
  ## Python definition of datasets
129
135
 
130
- Each dataset (or a set of related datasets) is described in Python using a mix of declarative
131
- and imperative statements. This allows to quickly define how to download dataset using the
132
- datamaestro declarative API; the imperative part is used when creating the JSON output,
133
- and is integrated with [experimaestro](http://experimaestro.github.io/experimaestro-python).
136
+ Datasets are defined as Python classes with resource attributes that describe how
137
+ to download and process data. The framework automatically builds a dependency graph
138
+ and handles downloads with two-path safety and state tracking.
134
139
 
135
- Its syntax is described in the [documentation](https://datamaestro.readthedocs.io).
140
+ ```python
141
+ from datamaestro_image.data import ImageClassification, LabelledImages
142
+ from datamaestro.data.tensor import IDX
143
+ from datamaestro.download.single import FileDownloader
144
+ from datamaestro.definitions import AbstractDataset, dataset
136
145
 
137
146
 
138
- For instance, the MNIST dataset can be described by the following
147
+ @dataset(url="http://yann.lecun.com/exdb/mnist/")
148
+ class MNIST(ImageClassification):
149
+ """The MNIST database of handwritten digits."""
139
150
 
140
- ```python
141
- from datamaestro import dataset
142
- from datamaestro.download.single import download_file
143
- from datamaestro_image.data import ImageClassification, LabelledImages, IDXImage
144
-
145
-
146
- @filedownloader("train_images.idx", "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
147
- @filedownloader("train_labels.idx", "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")
148
- @filedownloader("test_images.idx", "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
149
- @filedownloader("test_labels.idx", "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")
150
- @dataset(
151
- ImageClassification,
152
- url="http://yann.lecun.com/exdb/mnist/",
153
- )
154
-
155
- return ImageClassification(
156
- train=LabelledImages(
157
- images=IDXImage(path=train_images), labels=IDXImage(path=train_labels)
158
- ),
159
- test=LabelledImages(
160
- images=IDXImage(path=test_images), labels=IDXImage(path=test_labels)
161
- ),
151
+ TRAIN_IMAGES = FileDownloader(
152
+ "train_images.idx",
153
+ "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
154
+ )
155
+ TRAIN_LABELS = FileDownloader(
156
+ "train_labels.idx",
157
+ "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
158
+ )
159
+ TEST_IMAGES = FileDownloader(
160
+ "test_images.idx",
161
+ "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
162
+ )
163
+ TEST_LABELS = FileDownloader(
164
+ "test_labels.idx",
165
+ "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
162
166
  )
163
- ```
164
167
 
165
- When building dataset modules, some extra documentation can be provided:
166
-
167
- ```yaml
168
- ids: [com.lecun.mnist]
169
- entry_point: "datamaestro_image.config.com.lecun:mnist"
170
- title: The MNIST database
171
- url: http://yann.lecun.com/exdb/mnist/
172
- groups: [image-classification]
173
- description: |
174
- The MNIST database of handwritten digits, available from this page,
175
- has a training set of 60,000 examples, and a test set of 10,000
176
- examples. It is a subset of a larger set available from NIST. The
177
- digits have been size-normalized and centered in a fixed-size image.
168
+ @classmethod
169
+ def __create_dataset__(cls, dataset: AbstractDataset):
170
+ return cls.C(
171
+ train=LabelledImages(
172
+ images=IDX(path=cls.TRAIN_IMAGES.path),
173
+ labels=IDX(path=cls.TRAIN_LABELS.path),
174
+ ),
175
+ test=LabelledImages(
176
+ images=IDX(path=cls.TEST_IMAGES.path),
177
+ labels=IDX(path=cls.TEST_LABELS.path),
178
+ ),
179
+ )
178
180
  ```
179
181
 
180
- This will allow to
181
-
182
- 1. Document the dataset
183
- 2. Allow to use the command line interface to manipulate it (download resources, etc.)
182
+ Its syntax is described in the [documentation](https://datamaestro.readthedocs.io).
@@ -0,0 +1,49 @@
1
+ datamaestro/__init__.py,sha256=oh9M4VODuvTc9EFHKirtDxpCJkLUANzpzBOIwzHc_mw,246
2
+ datamaestro/__main__.py,sha256=22v54rQoO2umL1frFO2FOQuuRljr-Jw-ER-OATTpVxw,9218
3
+ datamaestro/context.py,sha256=AL2BTi6dLA8rDGBE0PFyfV9ua29JHvBgx6_w6hDj9Dg,13977
4
+ datamaestro/definitions.py,sha256=xo-MhpQHcUPNFJtkdWOEp1jC-7pbv0TREJKVS0iDVh8,27979
5
+ datamaestro/record.py,sha256=e5fjRV3ni7ZxXwYH45bVDB_jpD-n9quvh4ie4uI-MM4,7140
6
+ datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
+ datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
8
+ datamaestro/settings.py,sha256=NuUbe_C31GDlzdio2ryz7tPzuo4hsmmdCM5Cyuhqbzs,1294
9
+ datamaestro/sphinx.py,sha256=WWXB63gd0ZgEwFr_YwO2Hmuly5OoiFlu9mDvJSHFYuY,6966
10
+ datamaestro/utils.py,sha256=JUrvtVYnjNKRo0_ZypmXSQ9R4uOyImDjW1GZ14MYzKM,6547
11
+ datamaestro/v2.md,sha256=pLCxQUdfVkd4CM9Ie0ZxCnxUntqoA7k_0m7x1etcr7Y,9801
12
+ datamaestro/version.py,sha256=Hy65VR_YBBs2cTGjk4KdU_bIDh0FtKY39zpOkoQNGIE,171
13
+ datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
14
+ datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
15
+ datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
17
+ datamaestro/commands/site.py,sha256=mVU5JKhwx9LTMf5FddcWgGh41qXtANJqB8qWKSKa-70,14432
18
+ datamaestro/data/__init__.py,sha256=s81ZxT8MQrBGkcu45xr4NaInIsMeunHOLnkLrJE47So,1496
19
+ datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
20
+ datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
21
+ datamaestro/data/ml.py,sha256=4PlH6FJFZwtfTEStkOjOucV8t8yY8LFaPsnDBvEqAPs,710
22
+ datamaestro/data/tensor.py,sha256=in36UQz4cdUEVmCS62pInu9RNekohRON667Z_JqNdhk,2254
23
+ datamaestro/download/__init__.py,sha256=az_H9i5ynY_tsnjYiBiFMzTzeaOoEMaxh-knj57tSLc,20087
24
+ datamaestro/download/archive.py,sha256=fz1ElRggB9gYb6F7fek0Tkw9eAj6Glotc_Mit9OcCZU,6986
25
+ datamaestro/download/custom.py,sha256=dxyvwbweVuz0xveExtvta8xycoqTjpDZz_P98ucintA,1287
26
+ datamaestro/download/huggingface.py,sha256=inZbB5EdVvczW9CfM59SqL1Nl-H4y3bWxv1SWjrYeOs,1996
27
+ datamaestro/download/links.py,sha256=m5KX93Xp7WDFEgELvAG1PbBGCIrs401u7KMZwVHrlp0,4688
28
+ datamaestro/download/manual.py,sha256=-T2QWxKAiN3ZbSujjQUVeWDEDFonw9VnlzCfBIHcLao,190
29
+ datamaestro/download/multiple.py,sha256=iX3gtgQT1eskHok0pAecU_mgd56of1Sadz7_o95ItaA,2736
30
+ datamaestro/download/single.py,sha256=nFWBH1LeGO-WmMUBbdV6bzkd6Lfe74uhfcWeLZkCC3M,5737
31
+ datamaestro/download/sync.py,sha256=QlpoOkamiX9yE-4P8-ppCZ_wgA2P4oBSOQCX98gWnCc,784
32
+ datamaestro/download/todo.py,sha256=d-mfi_gJlrOvAoa7dXN2ecXYY-cgB-NHzU1J-dzkEkI,444
33
+ datamaestro/download/wayback.py,sha256=wpbrTtE321AwsO8Poj1a4qwEKy1kE0wEbxWgMEf5nLo,5489
34
+ datamaestro/stream/__init__.py,sha256=Angu_Yg9rNKXb8s4at-DXYcnE-OTgSMLfUEfrL6APD8,896
35
+ datamaestro/stream/compress.py,sha256=0ViFGpJc6pdvZGUNERE-3XV8jAOTSvhJurb2t0NW2eU,260
36
+ datamaestro/stream/lines.py,sha256=DhptjIqhhAJ1tu3e-uoOepHHNALSXS8qz8ASUAyaSkM,2074
37
+ datamaestro/templates/dataset.py,sha256=5065rTMAIl4gtzQ96GFiV1_46tY08miIx3WspTP8yGA,346
38
+ datamaestro/test/__init__.py,sha256=9xXqLvUgiIn74AY6k8qyYX7rq6hWz7dOJFBrUgwuX88,61
39
+ datamaestro/test/checks.py,sha256=1eTkz4YJhAPOcnQSsz4vPnvzwwfrEnpn6H_s1ADISpo,1704
40
+ datamaestro/test/conftest.py,sha256=z8rF0OIKVuCgIYJ-4fQxQL8KhgIfg_4kfkIZNETfNJ0,793
41
+ datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
42
+ datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
43
+ datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
44
+ datamaestro/test/test_resource.py,sha256=QbwmZkGv_8O_jI0CKcatJSUs3IKbMfBrk0T_aTC1KcE,51124
45
+ datamaestro-1.7.1.dist-info/METADATA,sha256=7voV0DURyp-8ShRiMFK1wGE5SoMrTHL4U6if3dYvB9I,7433
46
+ datamaestro-1.7.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
47
+ datamaestro-1.7.1.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
48
+ datamaestro-1.7.1.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
49
+ datamaestro-1.7.1.dist-info/RECORD,,
@@ -1,47 +0,0 @@
1
- datamaestro/__init__.py,sha256=oh9M4VODuvTc9EFHKirtDxpCJkLUANzpzBOIwzHc_mw,246
2
- datamaestro/__main__.py,sha256=jbwzt-8Yhu4KjCkbBqsGX0yUx67IOE3Nmrc6qlGdGjs,9206
3
- datamaestro/context.py,sha256=AL2BTi6dLA8rDGBE0PFyfV9ua29JHvBgx6_w6hDj9Dg,13977
4
- datamaestro/definitions.py,sha256=byJyuh1AJ03zcaeEYOcbJQwjVI8cYJK2rrA_vAE8O1s,19776
5
- datamaestro/record.py,sha256=IxxcrSIf99iluohtpnuMBTFkqeHRe5S-T_hWEqBgeME,5812
6
- datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
- datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
8
- datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
9
- datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
10
- datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
11
- datamaestro/version.py,sha256=_c7uZJ1tNg6l2QN9t8gbOsmSW56keOdPFS_09TZT714,171
12
- datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
13
- datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
14
- datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
16
- datamaestro/commands/site.py,sha256=7H9c-ZlXt7bUlldHn8fMebzDKS7B7ijPNKrxHXMG-Lk,14233
17
- datamaestro/data/__init__.py,sha256=s81ZxT8MQrBGkcu45xr4NaInIsMeunHOLnkLrJE47So,1496
18
- datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
19
- datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
20
- datamaestro/data/ml.py,sha256=7Rv4Tb9g17HDj8mOBJpIDjgolGQAd5Wrb0mHlnm-bPE,709
21
- datamaestro/data/tensor.py,sha256=in36UQz4cdUEVmCS62pInu9RNekohRON667Z_JqNdhk,2254
22
- datamaestro/download/__init__.py,sha256=EBoAcw2wErS8ymEYs7LJKez4UO-Gwhe4YgqRAysOxRY,2865
23
- datamaestro/download/archive.py,sha256=G-2gzepknqT7Us3naMGAApGVGJMeHQIxM-tSpaa9ark,5608
24
- datamaestro/download/custom.py,sha256=DUjDVAWuHC6sV_apMQb44Yjd6HUXkHY6Ob52FQY3t-M,587
25
- datamaestro/download/huggingface.py,sha256=b4Y437ATYrugdkvqZrPQmqiXXSrmYyqEKDVI0wnIGDE,1125
26
- datamaestro/download/links.py,sha256=GFnq_AzI_uen7JBuGWD9qveeC9QFBWDrSnj7pOcwWwM,3352
27
- datamaestro/download/manual.py,sha256=-T2QWxKAiN3ZbSujjQUVeWDEDFonw9VnlzCfBIHcLao,190
28
- datamaestro/download/multiple.py,sha256=Mrr0ObHM5cE1CPSHE9PKIrox3qZVgxwRyxLzNXp0LqM,2159
29
- datamaestro/download/single.py,sha256=fCIfZdR14YN09MQTgcxL21PWu5CjELfIClgWjFpR5mg,4148
30
- datamaestro/download/sync.py,sha256=Z_LsXj4kbZWIYKTVJZEhfdpYiv6wXOOIyw8LahmEcqs,836
31
- datamaestro/download/todo.py,sha256=y3YnmWC_i-u23ce-vreIwIXZcoO-uA0HXErgJPThnco,256
32
- datamaestro/download/wayback.py,sha256=7XuWoLkmHR65wVDv3YnL3fiMtSrjKelk3UDI9ua_t8c,5504
33
- datamaestro/stream/__init__.py,sha256=Angu_Yg9rNKXb8s4at-DXYcnE-OTgSMLfUEfrL6APD8,896
34
- datamaestro/stream/compress.py,sha256=0ViFGpJc6pdvZGUNERE-3XV8jAOTSvhJurb2t0NW2eU,260
35
- datamaestro/stream/lines.py,sha256=UNGcyZlZxN0Q7kw717jbhZFdDVmtfJfkJZCgK7xzF9A,1996
36
- datamaestro/templates/dataset.py,sha256=5065rTMAIl4gtzQ96GFiV1_46tY08miIx3WspTP8yGA,346
37
- datamaestro/test/__init__.py,sha256=8-oxS68ufD45pv_HldE4S4rSWFF6L-UB_Cms-72DD2M,22
38
- datamaestro/test/checks.py,sha256=1eTkz4YJhAPOcnQSsz4vPnvzwwfrEnpn6H_s1ADISpo,1704
39
- datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,767
40
- datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
41
- datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
42
- datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
43
- datamaestro-1.6.2.dist-info/METADATA,sha256=A1NXq-dTeuS-JIBCe9-kgoydkfuxIDeO9G-Imf0t-5w,7635
44
- datamaestro-1.6.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
45
- datamaestro-1.6.2.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
46
- datamaestro-1.6.2.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
47
- datamaestro-1.6.2.dist-info/RECORD,,