datamaestro 1.4.2__py3-none-any.whl → 1.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamaestro/commands/site.py +11 -10
- datamaestro/definitions.py +21 -28
- datamaestro/download/wayback.py +4 -3
- datamaestro/version.py +2 -2
- {datamaestro-1.4.2.dist-info → datamaestro-1.4.4.dist-info}/METADATA +2 -2
- {datamaestro-1.4.2.dist-info → datamaestro-1.4.4.dist-info}/RECORD +10 -10
- {datamaestro-1.4.2.dist-info → datamaestro-1.4.4.dist-info}/WHEEL +1 -1
- {datamaestro-1.4.2.dist-info → datamaestro-1.4.4.dist-info}/entry_points.txt +0 -0
- {datamaestro-1.4.2.dist-info → datamaestro-1.4.4.dist-info}/licenses/LICENSE +0 -0
- {datamaestro-1.4.2.dist-info → datamaestro-1.4.4.dist-info}/top_level.txt +0 -0
datamaestro/commands/site.py
CHANGED
|
@@ -18,6 +18,7 @@ from mkdocs.structure.pages import Page as MkdocPage
|
|
|
18
18
|
from docstring_parser import parse as docstring_parse
|
|
19
19
|
|
|
20
20
|
import experimaestro
|
|
21
|
+
import experimaestro.mkdocs.base
|
|
21
22
|
from experimaestro.core.types import ObjectType
|
|
22
23
|
|
|
23
24
|
from ..context import Context, Repository, Datasets
|
|
@@ -97,7 +98,7 @@ def document_data(datatype: ObjectType):
|
|
|
97
98
|
if doc.long_description:
|
|
98
99
|
s += doc.long_description + "\n"
|
|
99
100
|
s += method_documentation(doc, method.__annotations__)
|
|
100
|
-
except Exception
|
|
101
|
+
except Exception:
|
|
101
102
|
logging.error(
|
|
102
103
|
"Error while parsing documetnation of %s (%s)",
|
|
103
104
|
method,
|
|
@@ -108,8 +109,6 @@ def document_data(datatype: ObjectType):
|
|
|
108
109
|
|
|
109
110
|
|
|
110
111
|
def document_object(object):
|
|
111
|
-
from datamaestro.data import Base
|
|
112
|
-
|
|
113
112
|
try:
|
|
114
113
|
name = object.__name__
|
|
115
114
|
# Get the documentation
|
|
@@ -141,7 +140,7 @@ def document_object(object):
|
|
|
141
140
|
|
|
142
141
|
return s
|
|
143
142
|
|
|
144
|
-
except Exception
|
|
143
|
+
except Exception:
|
|
145
144
|
logging.exception(
|
|
146
145
|
"Exception while generating the documentation for %s" % object.__name__
|
|
147
146
|
)
|
|
@@ -159,7 +158,7 @@ def document(match):
|
|
|
159
158
|
module = importlib.import_module(modulename)
|
|
160
159
|
try:
|
|
161
160
|
object = getattr(module, name)
|
|
162
|
-
except:
|
|
161
|
+
except Exception:
|
|
163
162
|
return "<div class='error'>Cannot find %s in %s</div>" % (name, modulename)
|
|
164
163
|
|
|
165
164
|
if ismodule(object):
|
|
@@ -182,7 +181,7 @@ class Classification:
|
|
|
182
181
|
|
|
183
182
|
def add(self, name, value):
|
|
184
183
|
key = name.lower()
|
|
185
|
-
if not
|
|
184
|
+
if key not in self.map:
|
|
186
185
|
self.map[key] = ClassificationItem(name)
|
|
187
186
|
self.map[key].values.append(value)
|
|
188
187
|
|
|
@@ -201,7 +200,6 @@ class Classification:
|
|
|
201
200
|
)
|
|
202
201
|
|
|
203
202
|
def match(self, path):
|
|
204
|
-
|
|
205
203
|
if path == "datamaestro/%s.md" % self.id:
|
|
206
204
|
r = io.StringIO()
|
|
207
205
|
r.write("# List of %s\n\n" % self.name)
|
|
@@ -275,7 +273,7 @@ class DatasetGenerator(mkdocs.plugins.BasePlugin):
|
|
|
275
273
|
def parse_nav(self, nav):
|
|
276
274
|
for entry in nav:
|
|
277
275
|
assert len(entry) == 1
|
|
278
|
-
|
|
276
|
+
_, value = *entry.keys(), *entry.values()
|
|
279
277
|
if isinstance(value, list):
|
|
280
278
|
for value in self.parse_nav(value):
|
|
281
279
|
yield value
|
|
@@ -382,7 +380,7 @@ class DatasetGenerator(mkdocs.plugins.BasePlugin):
|
|
|
382
380
|
builder()
|
|
383
381
|
|
|
384
382
|
logging.info("Watching %s", path)
|
|
385
|
-
server.watch(path, rebuild)
|
|
383
|
+
# server.watch(path, rebuild)
|
|
386
384
|
|
|
387
385
|
def on_page_markdown(self, markdown, page, config, **kwargs):
|
|
388
386
|
if page.url.startswith("api/"):
|
|
@@ -420,7 +418,10 @@ class DatasetGenerator(mkdocs.plugins.BasePlugin):
|
|
|
420
418
|
r.write("## List of datasets\n\n")
|
|
421
419
|
for ds in df:
|
|
422
420
|
r.write(
|
|
423
|
-
|
|
421
|
+
(
|
|
422
|
+
"""<div class="dataset-entry"><div class='dataset-id'>"""
|
|
423
|
+
"""%s<a name="%s"></a></div>\n\n"""
|
|
424
|
+
)
|
|
424
425
|
% (ds.id, ds.id)
|
|
425
426
|
)
|
|
426
427
|
if ds.name:
|
datamaestro/definitions.py
CHANGED
|
@@ -297,6 +297,7 @@ class DatasetWrapper(AbstractDataset):
|
|
|
297
297
|
(annotation.id is None)
|
|
298
298
|
or (annotation.id == "")
|
|
299
299
|
or ("." not in annotation.id)
|
|
300
|
+
or (annotation.id[0] == ".")
|
|
300
301
|
):
|
|
301
302
|
# Computes an ID
|
|
302
303
|
assert (
|
|
@@ -403,7 +404,7 @@ class DatasetWrapper(AbstractDataset):
|
|
|
403
404
|
)
|
|
404
405
|
|
|
405
406
|
if isinstance(result, dict):
|
|
406
|
-
self.config = self.base(**result)
|
|
407
|
+
self.config = self.base.C(**result)
|
|
407
408
|
elif isinstance(result, self.base):
|
|
408
409
|
self.config = result
|
|
409
410
|
else:
|
|
@@ -550,38 +551,30 @@ class metadata:
|
|
|
550
551
|
|
|
551
552
|
|
|
552
553
|
class dataset:
|
|
554
|
+
"""Dataset decorator
|
|
555
|
+
|
|
556
|
+
Meta-datasets are not associated with any base type.
|
|
557
|
+
|
|
558
|
+
:param base: The base type (or None if inferred from type annotation).
|
|
559
|
+
:param timestamp: If the dataset evolves, specify its timestamp.
|
|
560
|
+
:param id: Gives the full ID of the dataset if it contains a '.',
|
|
561
|
+
the last component if not containing a '.', or the last components
|
|
562
|
+
if starting with '.'
|
|
563
|
+
:param url: The URL associated with the dataset.
|
|
564
|
+
:param size: The size of the dataset (should be a parsable format).
|
|
565
|
+
:param doi: The DOI of the corresponding paper.
|
|
566
|
+
"""
|
|
567
|
+
|
|
553
568
|
def __init__(
|
|
554
569
|
self,
|
|
555
570
|
base=None,
|
|
556
571
|
*,
|
|
557
|
-
timestamp=None,
|
|
558
|
-
id=None,
|
|
559
|
-
url=None,
|
|
560
|
-
size=None,
|
|
561
|
-
doi=None,
|
|
572
|
+
timestamp: str | None = None,
|
|
573
|
+
id: None | str = None,
|
|
574
|
+
url: None | str = None,
|
|
575
|
+
size: None | int | str = None,
|
|
576
|
+
doi: None | str = None,
|
|
562
577
|
):
|
|
563
|
-
"""Creates a new (meta)dataset
|
|
564
|
-
|
|
565
|
-
Meta-datasets are not associated with any base type
|
|
566
|
-
|
|
567
|
-
Arguments:
|
|
568
|
-
base {[type]} -- The base type (or None if infered from type
|
|
569
|
-
annotation)
|
|
570
|
-
|
|
571
|
-
Keyword Arguments:
|
|
572
|
-
timestamp {bool} -- If the dataset evolves, specify its timestamp
|
|
573
|
-
(default: None)
|
|
574
|
-
|
|
575
|
-
id {[type]} -- [description] (default: {None}) Gives the full ID of
|
|
576
|
-
the dataset if it contains a ., or just the last component otherwise
|
|
577
|
-
|
|
578
|
-
url {[type]} -- [description] (default: {None})
|
|
579
|
-
|
|
580
|
-
size {str} -- The size (should be a parsable format)
|
|
581
|
-
|
|
582
|
-
doi {str} -- The DOI of the corresponding paper
|
|
583
|
-
|
|
584
|
-
"""
|
|
585
578
|
if hasattr(base, "__datamaestro__") and isinstance(
|
|
586
579
|
base.__datamaestro__, metadataset
|
|
587
580
|
):
|
datamaestro/download/wayback.py
CHANGED
|
@@ -142,13 +142,13 @@ class wayback_documents(Resource):
|
|
|
142
142
|
return True
|
|
143
143
|
|
|
144
144
|
# Reads the URLs
|
|
145
|
-
logging.info("Retrieving URLs from wayback")
|
|
145
|
+
logging.info("Retrieving URLs from wayback into %s", destination)
|
|
146
146
|
pos = 0
|
|
147
147
|
urls = set()
|
|
148
148
|
with destination.open("at+") as fp:
|
|
149
149
|
fp.seek(0)
|
|
150
150
|
try:
|
|
151
|
-
|
|
151
|
+
while line := fp.readline():
|
|
152
152
|
pos = fp.tell()
|
|
153
153
|
urls.add(json.loads(line)["url"])
|
|
154
154
|
except json.JSONDecodeError:
|
|
@@ -157,7 +157,8 @@ class wayback_documents(Resource):
|
|
|
157
157
|
|
|
158
158
|
# Get the remaining ones
|
|
159
159
|
for url in tqdm(self.urls_fn()):
|
|
160
|
-
|
|
160
|
+
if url not in urls:
|
|
161
|
+
fp.write(json.dumps(download_link(url, self.timestamp)))
|
|
161
162
|
|
|
162
163
|
# Everything is fine
|
|
163
164
|
done_path.touch()
|
datamaestro/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datamaestro
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: "Dataset management command line and API"
|
|
5
5
|
Home-page: https://github.com/experimaestro/datamaestro
|
|
6
6
|
Author: Benjamin Piwowarski
|
|
@@ -27,7 +27,7 @@ Requires-Dist: marshmallow
|
|
|
27
27
|
Requires-Dist: cached_property
|
|
28
28
|
Requires-Dist: requests
|
|
29
29
|
Requires-Dist: bitmath
|
|
30
|
-
Requires-Dist: experimaestro>=1.
|
|
30
|
+
Requires-Dist: experimaestro>=1.8.9
|
|
31
31
|
Requires-Dist: mkdocs
|
|
32
32
|
Requires-Dist: pymdown-extensions
|
|
33
33
|
Requires-Dist: mkdocs-material
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
datamaestro/__init__.py,sha256=LR8nx7H3Fo97O0gJXV2PxQezsmSTDLAg_nQEXB5QAjc,322
|
|
2
2
|
datamaestro/__main__.py,sha256=2p36ZcJcZAL9NZBUkMaYRUhKyqhheVPXMGw6K1KNwhk,9196
|
|
3
3
|
datamaestro/context.py,sha256=KsXYNTt4xX4zEVrnd2hciP7PVCh1StRzjU1Ih6VeCtU,13532
|
|
4
|
-
datamaestro/definitions.py,sha256=
|
|
4
|
+
datamaestro/definitions.py,sha256=XUUSTrZIyHIbOwRd0pR2FXqGQjLOZVkFqPkcaZGW4-Q,18954
|
|
5
5
|
datamaestro/record.py,sha256=IxxcrSIf99iluohtpnuMBTFkqeHRe5S-T_hWEqBgeME,5812
|
|
6
6
|
datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
|
|
7
7
|
datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
|
|
8
8
|
datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
|
|
9
9
|
datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
|
|
10
10
|
datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
|
|
11
|
-
datamaestro/version.py,sha256=
|
|
11
|
+
datamaestro/version.py,sha256=phFUvw_LOCVKjBZIXauhRRMU1pbI044HMjZGo_n3unc,511
|
|
12
12
|
datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
|
|
13
13
|
datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
|
|
14
14
|
datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
|
|
16
|
-
datamaestro/commands/site.py,sha256=
|
|
16
|
+
datamaestro/commands/site.py,sha256=7H9c-ZlXt7bUlldHn8fMebzDKS7B7ijPNKrxHXMG-Lk,14233
|
|
17
17
|
datamaestro/data/__init__.py,sha256=Z1qZnliJwS5sRaLznK5YBVJCjvAlPbmJjbRvvLv_UVI,1547
|
|
18
18
|
datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
|
|
19
19
|
datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
|
|
@@ -29,7 +29,7 @@ datamaestro/download/multiple.py,sha256=Mrr0ObHM5cE1CPSHE9PKIrox3qZVgxwRyxLzNXp0
|
|
|
29
29
|
datamaestro/download/single.py,sha256=fCIfZdR14YN09MQTgcxL21PWu5CjELfIClgWjFpR5mg,4148
|
|
30
30
|
datamaestro/download/sync.py,sha256=Z_LsXj4kbZWIYKTVJZEhfdpYiv6wXOOIyw8LahmEcqs,836
|
|
31
31
|
datamaestro/download/todo.py,sha256=y3YnmWC_i-u23ce-vreIwIXZcoO-uA0HXErgJPThnco,256
|
|
32
|
-
datamaestro/download/wayback.py,sha256=
|
|
32
|
+
datamaestro/download/wayback.py,sha256=7XuWoLkmHR65wVDv3YnL3fiMtSrjKelk3UDI9ua_t8c,5504
|
|
33
33
|
datamaestro/stream/__init__.py,sha256=Angu_Yg9rNKXb8s4at-DXYcnE-OTgSMLfUEfrL6APD8,896
|
|
34
34
|
datamaestro/stream/compress.py,sha256=0ViFGpJc6pdvZGUNERE-3XV8jAOTSvhJurb2t0NW2eU,260
|
|
35
35
|
datamaestro/stream/lines.py,sha256=UNGcyZlZxN0Q7kw717jbhZFdDVmtfJfkJZCgK7xzF9A,1996
|
|
@@ -40,9 +40,9 @@ datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,
|
|
|
40
40
|
datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
|
|
41
41
|
datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
|
|
42
42
|
datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
|
|
43
|
-
datamaestro-1.4.
|
|
44
|
-
datamaestro-1.4.
|
|
45
|
-
datamaestro-1.4.
|
|
46
|
-
datamaestro-1.4.
|
|
47
|
-
datamaestro-1.4.
|
|
48
|
-
datamaestro-1.4.
|
|
43
|
+
datamaestro-1.4.4.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
|
|
44
|
+
datamaestro-1.4.4.dist-info/METADATA,sha256=uKnq6WmuKjAVVe2nO3I3sozPV3ef4WJAp0zjNcn19jE,8191
|
|
45
|
+
datamaestro-1.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
46
|
+
datamaestro-1.4.4.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
|
|
47
|
+
datamaestro-1.4.4.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
|
|
48
|
+
datamaestro-1.4.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|