datamaestro 1.4.0__tar.gz → 1.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamaestro-1.4.0 → datamaestro-1.4.1}/PKG-INFO +1 -1
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/api/download.rst +2 -2
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/definitions.py +18 -4
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/version.py +2 -2
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro.egg-info/PKG-INFO +1 -1
- {datamaestro-1.4.0 → datamaestro-1.4.1}/.coverage +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/.github/workflows/pytest.yml +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/.github/workflows/python-publish.yml +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/.gitignore +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/.pre-commit-config.yaml +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/.readthedocs.yml +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/CHANGELOG.md +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/LICENSE +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/MANIFEST.in +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/README.md +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/TODO.md +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/Makefile +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/make.bat +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/requirements.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/api/data.md +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/api/index.md +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/api/records.rst +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/conf.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/datasets.rst +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/developping.md +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/index.md +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/docs/source/style.css +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/mkdocs.yml +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/pyproject.toml +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/pytest.ini +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/requirements-dev.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/requirements.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/schema.yaml +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/setup.cfg +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/setup.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/__init__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/__main__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/annotations/__init__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/annotations/agreement.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/commands/__init__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/commands/mainstyle.css +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/commands/site.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/context.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/data/__init__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/data/csv.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/data/huggingface.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/data/ml.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/data/tensor.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/__init__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/archive.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/custom.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/huggingface.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/links.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/manual.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/multiple.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/single.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/sync.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/todo.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/download/wayback.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/record.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/registry.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/search.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/settings.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/sphinx.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/stream/__init__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/stream/compress.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/stream/lines.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/templates/dataset.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/test/__init__.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/test/checks.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/test/conftest.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/test/test_annotations.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/test/test_download_handlers.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/test/test_record.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro/utils.py +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro.egg-info/SOURCES.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro.egg-info/dependency_links.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro.egg-info/entry_points.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro.egg-info/not-zip-safe +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro.egg-info/requires.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/src/datamaestro.egg-info/top_level.txt +0 -0
- {datamaestro-1.4.0 → datamaestro-1.4.1}/tox.ini +0 -0
|
@@ -67,5 +67,5 @@ File hashes can be checked with the following checker
|
|
|
67
67
|
Custom
|
|
68
68
|
======
|
|
69
69
|
|
|
70
|
-
..
|
|
71
|
-
..
|
|
70
|
+
.. autoclass:: datamaestro.download.custom.Downloader
|
|
71
|
+
.. autofunction:: datamaestro.download.custom.custom_download
|
|
@@ -102,7 +102,7 @@ class DataDefinition(AbstractData):
|
|
|
102
102
|
if components[0] == "datamaestro":
|
|
103
103
|
longest_ix = 0
|
|
104
104
|
|
|
105
|
-
return repository, components[(longest_ix + 1) :]
|
|
105
|
+
return repository, [s.lower() for s in components[(longest_ix + 1) :]]
|
|
106
106
|
|
|
107
107
|
def ancestors(self):
|
|
108
108
|
ancestors = []
|
|
@@ -293,7 +293,11 @@ class DatasetWrapper(AbstractDataset):
|
|
|
293
293
|
|
|
294
294
|
# Builds the ID:
|
|
295
295
|
# Removes module_name.config prefix
|
|
296
|
-
if
|
|
296
|
+
if (
|
|
297
|
+
(annotation.id is None)
|
|
298
|
+
or (annotation.id == "")
|
|
299
|
+
or ("." not in annotation.id)
|
|
300
|
+
):
|
|
297
301
|
# Computes an ID
|
|
298
302
|
assert (
|
|
299
303
|
# id is empty string = use the module id
|
|
@@ -303,7 +307,15 @@ class DatasetWrapper(AbstractDataset):
|
|
|
303
307
|
"A @dataset without `id` should be in the "
|
|
304
308
|
f".config module (not {t.__module__})"
|
|
305
309
|
)
|
|
306
|
-
|
|
310
|
+
|
|
311
|
+
if annotation.id is None:
|
|
312
|
+
# There is nothing, use the full path
|
|
313
|
+
path = ".".join(components[1:])
|
|
314
|
+
else:
|
|
315
|
+
# Replace
|
|
316
|
+
path = ".".join(components[1:-1])
|
|
317
|
+
if annotation.id != "":
|
|
318
|
+
path = f"{path}.{annotation.id}"
|
|
307
319
|
|
|
308
320
|
self.id = path
|
|
309
321
|
else:
|
|
@@ -557,13 +569,15 @@ class dataset:
|
|
|
557
569
|
timestamp {bool} -- If the dataset evolves, specify its timestamp
|
|
558
570
|
(default: None)
|
|
559
571
|
|
|
560
|
-
id {[type]} -- [description] (default: {None})
|
|
572
|
+
id {[type]} -- [description] (default: {None}) Gives the full ID of
|
|
573
|
+
the dataset if it contains a ., or just the last component otherwise
|
|
561
574
|
|
|
562
575
|
url {[type]} -- [description] (default: {None})
|
|
563
576
|
|
|
564
577
|
size {str} -- The size (should be a parsable format)
|
|
565
578
|
|
|
566
579
|
doi {str} -- The DOI of the corresponding paper
|
|
580
|
+
|
|
567
581
|
"""
|
|
568
582
|
if hasattr(base, "__datamaestro__") and isinstance(
|
|
569
583
|
base.__datamaestro__, metadataset
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|