datamaestro 1.4.0__tar.gz → 1.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {datamaestro-1.4.0 → datamaestro-1.4.2}/PKG-INFO +1 -1
  2. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/api/download.rst +2 -2
  3. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/definitions.py +45 -28
  4. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/record.py +11 -2
  5. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/version.py +2 -2
  6. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro.egg-info/PKG-INFO +1 -1
  7. {datamaestro-1.4.0 → datamaestro-1.4.2}/.coverage +0 -0
  8. {datamaestro-1.4.0 → datamaestro-1.4.2}/.github/workflows/pytest.yml +0 -0
  9. {datamaestro-1.4.0 → datamaestro-1.4.2}/.github/workflows/python-publish.yml +0 -0
  10. {datamaestro-1.4.0 → datamaestro-1.4.2}/.gitignore +0 -0
  11. {datamaestro-1.4.0 → datamaestro-1.4.2}/.pre-commit-config.yaml +0 -0
  12. {datamaestro-1.4.0 → datamaestro-1.4.2}/.readthedocs.yml +0 -0
  13. {datamaestro-1.4.0 → datamaestro-1.4.2}/CHANGELOG.md +0 -0
  14. {datamaestro-1.4.0 → datamaestro-1.4.2}/LICENSE +0 -0
  15. {datamaestro-1.4.0 → datamaestro-1.4.2}/MANIFEST.in +0 -0
  16. {datamaestro-1.4.0 → datamaestro-1.4.2}/README.md +0 -0
  17. {datamaestro-1.4.0 → datamaestro-1.4.2}/TODO.md +0 -0
  18. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/Makefile +0 -0
  19. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/make.bat +0 -0
  20. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/requirements.txt +0 -0
  21. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/api/data.md +0 -0
  22. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/api/index.md +0 -0
  23. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/api/records.rst +0 -0
  24. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/conf.py +0 -0
  25. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/datasets.rst +0 -0
  26. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/developping.md +0 -0
  27. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/index.md +0 -0
  28. {datamaestro-1.4.0 → datamaestro-1.4.2}/docs/source/style.css +0 -0
  29. {datamaestro-1.4.0 → datamaestro-1.4.2}/mkdocs.yml +0 -0
  30. {datamaestro-1.4.0 → datamaestro-1.4.2}/pyproject.toml +0 -0
  31. {datamaestro-1.4.0 → datamaestro-1.4.2}/pytest.ini +0 -0
  32. {datamaestro-1.4.0 → datamaestro-1.4.2}/requirements-dev.txt +0 -0
  33. {datamaestro-1.4.0 → datamaestro-1.4.2}/requirements.txt +0 -0
  34. {datamaestro-1.4.0 → datamaestro-1.4.2}/schema.yaml +0 -0
  35. {datamaestro-1.4.0 → datamaestro-1.4.2}/setup.cfg +0 -0
  36. {datamaestro-1.4.0 → datamaestro-1.4.2}/setup.py +0 -0
  37. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/__init__.py +0 -0
  38. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/__main__.py +0 -0
  39. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/annotations/__init__.py +0 -0
  40. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/annotations/agreement.py +0 -0
  41. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/commands/__init__.py +0 -0
  42. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/commands/mainstyle.css +0 -0
  43. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/commands/site.py +0 -0
  44. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/context.py +0 -0
  45. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/data/__init__.py +0 -0
  46. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/data/csv.py +0 -0
  47. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/data/huggingface.py +0 -0
  48. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/data/ml.py +0 -0
  49. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/data/tensor.py +0 -0
  50. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/__init__.py +0 -0
  51. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/archive.py +0 -0
  52. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/custom.py +0 -0
  53. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/huggingface.py +0 -0
  54. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/links.py +0 -0
  55. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/manual.py +0 -0
  56. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/multiple.py +0 -0
  57. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/single.py +0 -0
  58. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/sync.py +0 -0
  59. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/todo.py +0 -0
  60. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/download/wayback.py +0 -0
  61. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/registry.py +0 -0
  62. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/search.py +0 -0
  63. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/settings.py +0 -0
  64. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/sphinx.py +0 -0
  65. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/stream/__init__.py +0 -0
  66. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/stream/compress.py +0 -0
  67. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/stream/lines.py +0 -0
  68. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/templates/dataset.py +0 -0
  69. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/test/__init__.py +0 -0
  70. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/test/checks.py +0 -0
  71. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/test/conftest.py +0 -0
  72. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/test/test_annotations.py +0 -0
  73. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/test/test_download_handlers.py +0 -0
  74. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/test/test_record.py +0 -0
  75. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro/utils.py +0 -0
  76. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro.egg-info/SOURCES.txt +0 -0
  77. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro.egg-info/dependency_links.txt +0 -0
  78. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro.egg-info/entry_points.txt +0 -0
  79. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro.egg-info/not-zip-safe +0 -0
  80. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro.egg-info/requires.txt +0 -0
  81. {datamaestro-1.4.0 → datamaestro-1.4.2}/src/datamaestro.egg-info/top_level.txt +0 -0
  82. {datamaestro-1.4.0 → datamaestro-1.4.2}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary: "Dataset management command line and API"
5
5
  Home-page: https://github.com/experimaestro/datamaestro
6
6
  Author: Benjamin Piwowarski
@@ -67,5 +67,5 @@ File hashes can be checked with the following checker
67
67
  Custom
68
68
  ======
69
69
 
70
- .. autofunction:: datamaestro.download.custom.Downloader
71
- .. autoclass:: datamaestro.download.custom.custom_download
70
+ .. autoclass:: datamaestro.download.custom.Downloader
71
+ .. autofunction:: datamaestro.download.custom.custom_download
@@ -102,7 +102,7 @@ class DataDefinition(AbstractData):
102
102
  if components[0] == "datamaestro":
103
103
  longest_ix = 0
104
104
 
105
- return repository, components[(longest_ix + 1) :]
105
+ return repository, [s.lower() for s in components[(longest_ix + 1) :]]
106
106
 
107
107
  def ancestors(self):
108
108
  ancestors = []
@@ -293,7 +293,11 @@ class DatasetWrapper(AbstractDataset):
293
293
 
294
294
  # Builds the ID:
295
295
  # Removes module_name.config prefix
296
- if annotation.id is None or annotation.id == "":
296
+ if (
297
+ (annotation.id is None)
298
+ or (annotation.id == "")
299
+ or ("." not in annotation.id)
300
+ ):
297
301
  # Computes an ID
298
302
  assert (
299
303
  # id is empty string = use the module id
@@ -303,7 +307,15 @@ class DatasetWrapper(AbstractDataset):
303
307
  "A @dataset without `id` should be in the "
304
308
  f".config module (not {t.__module__})"
305
309
  )
306
- path = ".".join(components[1:-1])
310
+
311
+ if annotation.id is None:
312
+ # There is nothing, use the full path
313
+ path = ".".join(components[1:])
314
+ else:
315
+ # Replace
316
+ path = ".".join(components[1:-1])
317
+ if annotation.id != "":
318
+ path = f"{path}.{annotation.id}"
307
319
 
308
320
  self.id = path
309
321
  else:
@@ -369,33 +381,36 @@ class DatasetWrapper(AbstractDataset):
369
381
  if self.base is self.t:
370
382
  self.config = self.base.__create_dataset__(self)
371
383
 
372
- # Construct the object
373
- resources = {key: value.prepare() for key, value in self.resources.items()}
374
-
375
- result = self.t(**resources)
384
+ else:
385
+ # Construct the object
386
+ resources = {key: value.prepare() for key, value in self.resources.items()}
376
387
 
377
- # Download resources
378
- logging.debug("Building with data type %s and dataset %s", self.base, self.t)
379
- for hook in self.hooks["pre-use"]:
380
- hook(self)
388
+ result = self.t(**resources)
381
389
 
382
- if result is None:
383
- name = self.t.__name__
384
- filename = inspect.getfile(self.t)
385
- raise Exception(
386
- f"The dataset method {name} defined in "
387
- f"{filename} returned a null object"
388
- )
389
-
390
- if isinstance(result, dict):
391
- self.config = self.base(**result)
392
- elif isinstance(result, self.base):
393
- self.config = result
394
- else:
395
- raise RuntimeError(
396
- f"The dataset method {name} defined in "
397
- f"{filename} returned an object of type {type(dict)}"
390
+ # Download resources
391
+ logging.debug(
392
+ "Building with data type %s and dataset %s", self.base, self.t
398
393
  )
394
+ for hook in self.hooks["pre-use"]:
395
+ hook(self)
396
+
397
+ if result is None:
398
+ name = self.t.__name__
399
+ filename = inspect.getfile(self.t)
400
+ raise Exception(
401
+ f"The dataset method {name} defined in "
402
+ f"{filename} returned a null object"
403
+ )
404
+
405
+ if isinstance(result, dict):
406
+ self.config = self.base(**result)
407
+ elif isinstance(result, self.base):
408
+ self.config = result
409
+ else:
410
+ raise RuntimeError(
411
+ f"The dataset method {name} defined in "
412
+ f"{filename} returned an object of type {type(dict)}"
413
+ )
399
414
 
400
415
  # Setup ourself
401
416
  self.config.__datamaestro_dataset__ = self
@@ -557,13 +572,15 @@ class dataset:
557
572
  timestamp {bool} -- If the dataset evolves, specify its timestamp
558
573
  (default: None)
559
574
 
560
- id {[type]} -- [description] (default: {None})
575
+ id {[type]} -- [description] (default: {None}) Gives the full ID of
576
+ the dataset if it contains a ., or just the last component otherwise
561
577
 
562
578
  url {[type]} -- [description] (default: {None})
563
579
 
564
580
  size {str} -- The size (should be a parsable format)
565
581
 
566
582
  doi {str} -- The DOI of the corresponding paper
583
+
567
584
  """
568
585
  if hasattr(base, "__datamaestro__") and isinstance(
569
586
  base.__datamaestro__, metadataset
@@ -160,11 +160,20 @@ class Record:
160
160
  def __getitem__(self, key: Type[T]) -> T:
161
161
  """Get an item given its type"""
162
162
  base = key.__get_base__()
163
- entry = self.items[base]
163
+ try:
164
+ entry = self.items[base]
165
+ except KeyError:
166
+ raise KeyError(
167
+ f"""No entry with type {key}: """
168
+ f"""{",".join(str(s) for s in self.items.keys())}"""
169
+ )
164
170
 
165
171
  # Check if this matches the expected class
166
172
  if not isinstance(entry, key):
167
- raise KeyError(f"No entry with type {key}")
173
+ raise KeyError(
174
+ f"""No entry with type {key}: """
175
+ f"""{",".join(str(s) for s in self.items.keys())}"""
176
+ )
168
177
  return entry
169
178
 
170
179
  def update(self, *items: T, target: RecordType = None) -> "Record":
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.4.0'
21
- __version_tuple__ = version_tuple = (1, 4, 0)
20
+ __version__ = version = '1.4.2'
21
+ __version_tuple__ = version_tuple = (1, 4, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary: "Dataset management command line and API"
5
5
  Home-page: https://github.com/experimaestro/datamaestro
6
6
  Author: Benjamin Piwowarski
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes