datamaestro 1.4.2__py3-none-any.whl → 1.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ from mkdocs.structure.pages import Page as MkdocPage
18
18
  from docstring_parser import parse as docstring_parse
19
19
 
20
20
  import experimaestro
21
+ import experimaestro.mkdocs.base
21
22
  from experimaestro.core.types import ObjectType
22
23
 
23
24
  from ..context import Context, Repository, Datasets
@@ -97,7 +98,7 @@ def document_data(datatype: ObjectType):
97
98
  if doc.long_description:
98
99
  s += doc.long_description + "\n"
99
100
  s += method_documentation(doc, method.__annotations__)
100
- except Exception as e:
101
+ except Exception:
101
102
  logging.error(
102
103
  "Error while parsing documetnation of %s (%s)",
103
104
  method,
@@ -108,8 +109,6 @@ def document_data(datatype: ObjectType):
108
109
 
109
110
 
110
111
  def document_object(object):
111
- from datamaestro.data import Base
112
-
113
112
  try:
114
113
  name = object.__name__
115
114
  # Get the documentation
@@ -141,7 +140,7 @@ def document_object(object):
141
140
 
142
141
  return s
143
142
 
144
- except Exception as e:
143
+ except Exception:
145
144
  logging.exception(
146
145
  "Exception while generating the documentation for %s" % object.__name__
147
146
  )
@@ -159,7 +158,7 @@ def document(match):
159
158
  module = importlib.import_module(modulename)
160
159
  try:
161
160
  object = getattr(module, name)
162
- except:
161
+ except Exception:
163
162
  return "<div class='error'>Cannot find %s in %s</div>" % (name, modulename)
164
163
 
165
164
  if ismodule(object):
@@ -182,7 +181,7 @@ class Classification:
182
181
 
183
182
  def add(self, name, value):
184
183
  key = name.lower()
185
- if not key in self.map:
184
+ if key not in self.map:
186
185
  self.map[key] = ClassificationItem(name)
187
186
  self.map[key].values.append(value)
188
187
 
@@ -201,7 +200,6 @@ class Classification:
201
200
  )
202
201
 
203
202
  def match(self, path):
204
-
205
203
  if path == "datamaestro/%s.md" % self.id:
206
204
  r = io.StringIO()
207
205
  r.write("# List of %s\n\n" % self.name)
@@ -275,7 +273,7 @@ class DatasetGenerator(mkdocs.plugins.BasePlugin):
275
273
  def parse_nav(self, nav):
276
274
  for entry in nav:
277
275
  assert len(entry) == 1
278
- key, value = *entry.keys(), *entry.values()
276
+ _, value = *entry.keys(), *entry.values()
279
277
  if isinstance(value, list):
280
278
  for value in self.parse_nav(value):
281
279
  yield value
@@ -382,7 +380,7 @@ class DatasetGenerator(mkdocs.plugins.BasePlugin):
382
380
  builder()
383
381
 
384
382
  logging.info("Watching %s", path)
385
- server.watch(path, rebuild)
383
+ # server.watch(path, rebuild)
386
384
 
387
385
  def on_page_markdown(self, markdown, page, config, **kwargs):
388
386
  if page.url.startswith("api/"):
@@ -420,7 +418,10 @@ class DatasetGenerator(mkdocs.plugins.BasePlugin):
420
418
  r.write("## List of datasets\n\n")
421
419
  for ds in df:
422
420
  r.write(
423
- """<div class="dataset-entry"><div class='dataset-id'>%s<a name="%s"></a></div>\n\n"""
421
+ (
422
+ """<div class="dataset-entry"><div class='dataset-id'>"""
423
+ """%s<a name="%s"></a></div>\n\n"""
424
+ )
424
425
  % (ds.id, ds.id)
425
426
  )
426
427
  if ds.name:
@@ -297,6 +297,7 @@ class DatasetWrapper(AbstractDataset):
297
297
  (annotation.id is None)
298
298
  or (annotation.id == "")
299
299
  or ("." not in annotation.id)
300
+ or (annotation.id[0] == ".")
300
301
  ):
301
302
  # Computes an ID
302
303
  assert (
@@ -403,7 +404,7 @@ class DatasetWrapper(AbstractDataset):
403
404
  )
404
405
 
405
406
  if isinstance(result, dict):
406
- self.config = self.base(**result)
407
+ self.config = self.base.C(**result)
407
408
  elif isinstance(result, self.base):
408
409
  self.config = result
409
410
  else:
@@ -550,38 +551,30 @@ class metadata:
550
551
 
551
552
 
552
553
  class dataset:
554
+ """Dataset decorator
555
+
556
+ Meta-datasets are not associated with any base type.
557
+
558
+ :param base: The base type (or None if inferred from type annotation).
559
+ :param timestamp: If the dataset evolves, specify its timestamp.
560
+ :param id: Gives the full ID of the dataset if it contains a '.',
561
+ the last component if not containing a '.', or the last components
562
+ if starting with '.'
563
+ :param url: The URL associated with the dataset.
564
+ :param size: The size of the dataset (should be a parsable format).
565
+ :param doi: The DOI of the corresponding paper.
566
+ """
567
+
553
568
  def __init__(
554
569
  self,
555
570
  base=None,
556
571
  *,
557
- timestamp=None,
558
- id=None,
559
- url=None,
560
- size=None,
561
- doi=None,
572
+ timestamp: str | None = None,
573
+ id: None | str = None,
574
+ url: None | str = None,
575
+ size: None | int | str = None,
576
+ doi: None | str = None,
562
577
  ):
563
- """Creates a new (meta)dataset
564
-
565
- Meta-datasets are not associated with any base type
566
-
567
- Arguments:
568
- base {[type]} -- The base type (or None if infered from type
569
- annotation)
570
-
571
- Keyword Arguments:
572
- timestamp {bool} -- If the dataset evolves, specify its timestamp
573
- (default: None)
574
-
575
- id {[type]} -- [description] (default: {None}) Gives the full ID of
576
- the dataset if it contains a ., or just the last component otherwise
577
-
578
- url {[type]} -- [description] (default: {None})
579
-
580
- size {str} -- The size (should be a parsable format)
581
-
582
- doi {str} -- The DOI of the corresponding paper
583
-
584
- """
585
578
  if hasattr(base, "__datamaestro__") and isinstance(
586
579
  base.__datamaestro__, metadataset
587
580
  ):
@@ -142,13 +142,13 @@ class wayback_documents(Resource):
142
142
  return True
143
143
 
144
144
  # Reads the URLs
145
- logging.info("Retrieving URLs from wayback")
145
+ logging.info("Retrieving URLs from wayback into %s", destination)
146
146
  pos = 0
147
147
  urls = set()
148
148
  with destination.open("at+") as fp:
149
149
  fp.seek(0)
150
150
  try:
151
- for line in fp:
151
+ while line := fp.readline():
152
152
  pos = fp.tell()
153
153
  urls.add(json.loads(line)["url"])
154
154
  except json.JSONDecodeError:
@@ -157,7 +157,8 @@ class wayback_documents(Resource):
157
157
 
158
158
  # Get the remaining ones
159
159
  for url in tqdm(self.urls_fn()):
160
- fp.write(json.dumps(download_link(url, self.timestamp)))
160
+ if url not in urls:
161
+ fp.write(json.dumps(download_link(url, self.timestamp)))
161
162
 
162
163
  # Everything is fine
163
164
  done_path.touch()
datamaestro/version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.4.2'
21
- __version_tuple__ = version_tuple = (1, 4, 2)
20
+ __version__ = version = '1.4.4'
21
+ __version_tuple__ = version_tuple = (1, 4, 4)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: "Dataset management command line and API"
5
5
  Home-page: https://github.com/experimaestro/datamaestro
6
6
  Author: Benjamin Piwowarski
@@ -27,7 +27,7 @@ Requires-Dist: marshmallow
27
27
  Requires-Dist: cached_property
28
28
  Requires-Dist: requests
29
29
  Requires-Dist: bitmath
30
- Requires-Dist: experimaestro>=1.6
30
+ Requires-Dist: experimaestro>=1.8.9
31
31
  Requires-Dist: mkdocs
32
32
  Requires-Dist: pymdown-extensions
33
33
  Requires-Dist: mkdocs-material
@@ -1,19 +1,19 @@
1
1
  datamaestro/__init__.py,sha256=LR8nx7H3Fo97O0gJXV2PxQezsmSTDLAg_nQEXB5QAjc,322
2
2
  datamaestro/__main__.py,sha256=2p36ZcJcZAL9NZBUkMaYRUhKyqhheVPXMGw6K1KNwhk,9196
3
3
  datamaestro/context.py,sha256=KsXYNTt4xX4zEVrnd2hciP7PVCh1StRzjU1Ih6VeCtU,13532
4
- datamaestro/definitions.py,sha256=BfdIbhm5jhUTLmV4Hz4aJxX3jSRVZt0iwC542-EG0d8,18961
4
+ datamaestro/definitions.py,sha256=XUUSTrZIyHIbOwRd0pR2FXqGQjLOZVkFqPkcaZGW4-Q,18954
5
5
  datamaestro/record.py,sha256=IxxcrSIf99iluohtpnuMBTFkqeHRe5S-T_hWEqBgeME,5812
6
6
  datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
7
  datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
8
8
  datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
9
9
  datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
10
10
  datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
11
- datamaestro/version.py,sha256=Ls_J-pNiuTKX1KU6pZ6tPQXAMcmLAzW_HHHgGVQNEd0,511
11
+ datamaestro/version.py,sha256=phFUvw_LOCVKjBZIXauhRRMU1pbI044HMjZGo_n3unc,511
12
12
  datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
13
13
  datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
14
14
  datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
16
- datamaestro/commands/site.py,sha256=nnz4tOwKcgUmsLfPcQVo2SgFIC3OShYfJ8S2N6vuzAw,14173
16
+ datamaestro/commands/site.py,sha256=7H9c-ZlXt7bUlldHn8fMebzDKS7B7ijPNKrxHXMG-Lk,14233
17
17
  datamaestro/data/__init__.py,sha256=Z1qZnliJwS5sRaLznK5YBVJCjvAlPbmJjbRvvLv_UVI,1547
18
18
  datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
19
19
  datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
@@ -29,7 +29,7 @@ datamaestro/download/multiple.py,sha256=Mrr0ObHM5cE1CPSHE9PKIrox3qZVgxwRyxLzNXp0
29
29
  datamaestro/download/single.py,sha256=fCIfZdR14YN09MQTgcxL21PWu5CjELfIClgWjFpR5mg,4148
30
30
  datamaestro/download/sync.py,sha256=Z_LsXj4kbZWIYKTVJZEhfdpYiv6wXOOIyw8LahmEcqs,836
31
31
  datamaestro/download/todo.py,sha256=y3YnmWC_i-u23ce-vreIwIXZcoO-uA0HXErgJPThnco,256
32
- datamaestro/download/wayback.py,sha256=B9X1P9jElvd_qnUs9aX0TAO-NrNyvuHLYDAcpNq354w,5430
32
+ datamaestro/download/wayback.py,sha256=7XuWoLkmHR65wVDv3YnL3fiMtSrjKelk3UDI9ua_t8c,5504
33
33
  datamaestro/stream/__init__.py,sha256=Angu_Yg9rNKXb8s4at-DXYcnE-OTgSMLfUEfrL6APD8,896
34
34
  datamaestro/stream/compress.py,sha256=0ViFGpJc6pdvZGUNERE-3XV8jAOTSvhJurb2t0NW2eU,260
35
35
  datamaestro/stream/lines.py,sha256=UNGcyZlZxN0Q7kw717jbhZFdDVmtfJfkJZCgK7xzF9A,1996
@@ -40,9 +40,9 @@ datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,
40
40
  datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
41
41
  datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
42
42
  datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
43
- datamaestro-1.4.2.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
44
- datamaestro-1.4.2.dist-info/METADATA,sha256=hrUkhcdWUHsKQKfHuXvcpdiT8RRRlCP-G0b9k4s5BMo,8189
45
- datamaestro-1.4.2.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
46
- datamaestro-1.4.2.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
47
- datamaestro-1.4.2.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
48
- datamaestro-1.4.2.dist-info/RECORD,,
43
+ datamaestro-1.4.4.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
44
+ datamaestro-1.4.4.dist-info/METADATA,sha256=uKnq6WmuKjAVVe2nO3I3sozPV3ef4WJAp0zjNcn19jE,8191
45
+ datamaestro-1.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
+ datamaestro-1.4.4.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
47
+ datamaestro-1.4.4.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
48
+ datamaestro-1.4.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5