audbcards 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audbcards/__init__.py +16 -0
- audbcards/core/__init__.py +0 -0
- audbcards/core/datacard.py +401 -0
- audbcards/core/dataset.py +624 -0
- audbcards/core/templates/datacard.j2 +5 -0
- audbcards/core/templates/datacard_description.j2 +9 -0
- audbcards/core/templates/datacard_example.j2 +10 -0
- audbcards/core/templates/datacard_header.j2 +31 -0
- audbcards/core/templates/datacard_schemes.j2 +12 -0
- audbcards/core/templates/datacard_tables.j2 +13 -0
- audbcards/core/templates/datasets.j2 +32 -0
- audbcards/core/utils.py +125 -0
- audbcards/sphinx/__init__.py +135 -0
- audbcards-0.1.0.dist-info/LICENSE +25 -0
- audbcards-0.1.0.dist-info/METADATA +63 -0
- audbcards-0.1.0.dist-info/RECORD +36 -0
- audbcards-0.1.0.dist-info/WHEEL +5 -0
- audbcards-0.1.0.dist-info/top_level.txt +3 -0
- docs/api-src/audbcards.rst +11 -0
- docs/changelog.rst +1 -0
- docs/conf.py +68 -0
- docs/contributing.rst +3 -0
- docs/images/file-duration-distribution.png +0 -0
- docs/index.rst +33 -0
- docs/install.rst +42 -0
- docs/requirements.txt +6 -0
- docs/sphinx-extension.rst +145 -0
- tests/conftest.py +237 -0
- tests/requirements.txt +2 -0
- tests/test_data/rendered_templates/bare_db.rst +20 -0
- tests/test_data/rendered_templates/medium_db.rst +62 -0
- tests/test_data/rendered_templates/minimal_db.rst +37 -0
- tests/test_datacard.py +234 -0
- tests/test_dataset.py +330 -0
- tests/test_fixtures.py +22 -0
- tests/test_utils.py +71 -0
audbcards/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from audbcards.core.datacard import Datacard
|
|
2
|
+
from audbcards.core.dataset import Dataset
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
__all__ = []
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Dynamically get the version of the installed module
|
|
9
|
+
try:
|
|
10
|
+
import importlib.metadata
|
|
11
|
+
|
|
12
|
+
__version__ = importlib.metadata.version(__name__)
|
|
13
|
+
except Exception: # pragma: no cover
|
|
14
|
+
importlib = None # pragma: no cover
|
|
15
|
+
finally:
|
|
16
|
+
del importlib
|
|
File without changes
|
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
import jinja2
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import numpy as np
|
|
9
|
+
import seaborn as sns
|
|
10
|
+
|
|
11
|
+
import audb
|
|
12
|
+
import audeer
|
|
13
|
+
import audiofile
|
|
14
|
+
import audplot
|
|
15
|
+
|
|
16
|
+
from audbcards.core.dataset import Dataset
|
|
17
|
+
from audbcards.core.utils import set_plot_margins
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Datacard(object):
|
|
21
|
+
r"""Datacard of a dataset.
|
|
22
|
+
|
|
23
|
+
The datacard object
|
|
24
|
+
writes a RST file
|
|
25
|
+
for a given dataset,
|
|
26
|
+
which can then be used
|
|
27
|
+
to generate an HTML datacard page
|
|
28
|
+
using ``sphinx``.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
dataset: dataset object
|
|
32
|
+
path: path to folder
|
|
33
|
+
that store datacard files
|
|
34
|
+
example: if ``True``,
|
|
35
|
+
include an audio example in the data card
|
|
36
|
+
showing the waveform of the audio
|
|
37
|
+
and an interactive player
|
|
38
|
+
sphinx_build_dir: build dir of sphinx.
|
|
39
|
+
If not ``None``
|
|
40
|
+
and ``example`` is ``True``,
|
|
41
|
+
a call to :meth:`audbcards.Datacard.player`
|
|
42
|
+
will store an example audio file
|
|
43
|
+
under
|
|
44
|
+
``<sphinx_build_dir>/<path>/<db-name>/<media-file-in-db>``
|
|
45
|
+
sphinx_src_dir: source dir of sphinx.
|
|
46
|
+
If not ``None``
|
|
47
|
+
and ``example`` is ``True``,
|
|
48
|
+
a call to :meth:`audbcards.Datacard.player`
|
|
49
|
+
will store a wavplot of the example audio file
|
|
50
|
+
under
|
|
51
|
+
``<sphinx_src_dir>/<path>/<db-name>/<db-name>.png``
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
dataset: Dataset,
|
|
58
|
+
*,
|
|
59
|
+
path: str = "datasets",
|
|
60
|
+
example: bool = True,
|
|
61
|
+
sphinx_build_dir: str = None,
|
|
62
|
+
sphinx_src_dir: str = None,
|
|
63
|
+
):
|
|
64
|
+
self.dataset = dataset
|
|
65
|
+
"""Dataset object."""
|
|
66
|
+
|
|
67
|
+
self.path = path
|
|
68
|
+
"""Folder to store datacard."""
|
|
69
|
+
|
|
70
|
+
self.example = example
|
|
71
|
+
"""If an audio example should be included."""
|
|
72
|
+
|
|
73
|
+
self.sphinx_build_dir = sphinx_build_dir
|
|
74
|
+
"""Sphinx build dir."""
|
|
75
|
+
|
|
76
|
+
self.sphinx_src_dir = sphinx_src_dir
|
|
77
|
+
"""Sphinx source dir."""
|
|
78
|
+
|
|
79
|
+
self.rst_preamble = ""
|
|
80
|
+
"""RST code added at top of data card."""
|
|
81
|
+
|
|
82
|
+
@functools.cached_property
|
|
83
|
+
def content(self):
|
|
84
|
+
"""Property Accessor for rendered jinja2 content."""
|
|
85
|
+
return self._render_template()
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def example_media(self) -> typing.Optional[str]:
|
|
89
|
+
r"""Select example media file.
|
|
90
|
+
|
|
91
|
+
This select a media file
|
|
92
|
+
based on the median duration
|
|
93
|
+
of all files
|
|
94
|
+
between 0.5 s and 300 s
|
|
95
|
+
and downloads it to the cache.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
# Pick a meaningful duration for the example audio file
|
|
99
|
+
min_dur = 0.5
|
|
100
|
+
max_dur = 300 # 5 min
|
|
101
|
+
durations = self.dataset.file_durations
|
|
102
|
+
selected_durations = [d for d in durations if d >= min_dur and d <= max_dur]
|
|
103
|
+
if len(selected_durations) == 0:
|
|
104
|
+
return None
|
|
105
|
+
selected_duration = np.median(selected_durations)
|
|
106
|
+
|
|
107
|
+
# Get index for duration closest to selected duration
|
|
108
|
+
# see https://stackoverflow.com/a/9706105
|
|
109
|
+
# durations.index(selected_duration)
|
|
110
|
+
# is an alternative but fails due to rounding errors
|
|
111
|
+
index = min(
|
|
112
|
+
range(len(durations)),
|
|
113
|
+
key=lambda n: abs(durations[n] - selected_duration),
|
|
114
|
+
)
|
|
115
|
+
# Download of example data might fail
|
|
116
|
+
try:
|
|
117
|
+
media = self.dataset.deps.media[index]
|
|
118
|
+
audb.load_media(
|
|
119
|
+
self.dataset.name,
|
|
120
|
+
media,
|
|
121
|
+
version=self.dataset.version,
|
|
122
|
+
verbose=False,
|
|
123
|
+
)
|
|
124
|
+
except: # noqa: E722
|
|
125
|
+
media = None
|
|
126
|
+
return media
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def file_duration_distribution(self) -> str:
|
|
130
|
+
r"""Minimum and maximum of files durations, and plotted distribution.
|
|
131
|
+
|
|
132
|
+
This generates a single line
|
|
133
|
+
containing the mininimum and maximum values
|
|
134
|
+
of files durations.
|
|
135
|
+
|
|
136
|
+
If :attr:`audbcards.Datacard.self.sphinx_src_dir` is set
|
|
137
|
+
(e.g. when used in the sphinx extension),
|
|
138
|
+
an inline image is stored
|
|
139
|
+
in the sphinx source folder
|
|
140
|
+
under ``<dataset-name>/<dataset-name>-file-durations.png``
|
|
141
|
+
and displayed
|
|
142
|
+
between the minimum and maximum values.
|
|
143
|
+
|
|
144
|
+
"""
|
|
145
|
+
min_ = 0
|
|
146
|
+
max_ = 0
|
|
147
|
+
unit = "s"
|
|
148
|
+
durations = self.dataset.file_durations
|
|
149
|
+
if len(durations) > 0:
|
|
150
|
+
min_ = np.min(durations)
|
|
151
|
+
max_ = np.max(durations)
|
|
152
|
+
distribution_str = f"{min_:.1f} {unit} .. {max_:.1f} {unit}"
|
|
153
|
+
|
|
154
|
+
# Save distribution plot
|
|
155
|
+
if self.sphinx_src_dir is not None:
|
|
156
|
+
self._plot_distribution(durations)
|
|
157
|
+
name = "file-durations"
|
|
158
|
+
image_file = audeer.path(
|
|
159
|
+
self.sphinx_src_dir,
|
|
160
|
+
self.path,
|
|
161
|
+
self.dataset.name,
|
|
162
|
+
f"{self.dataset.name}-{name}.png",
|
|
163
|
+
)
|
|
164
|
+
audeer.mkdir(os.path.dirname(image_file))
|
|
165
|
+
plt.savefig(image_file, transparent=True)
|
|
166
|
+
plt.close()
|
|
167
|
+
distribution_str = self._inline_image(
|
|
168
|
+
f"{min_:.1f} {unit}",
|
|
169
|
+
f"./{self.dataset.name}/{self.dataset.name}-{name}.png",
|
|
170
|
+
f"{max_:.1f} {unit}",
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return distribution_str
|
|
174
|
+
|
|
175
|
+
def player(
|
|
176
|
+
self,
|
|
177
|
+
file: str = None,
|
|
178
|
+
) -> str:
|
|
179
|
+
r"""Create an audio player showing the waveform.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
file: input audio file to be used in the player.
|
|
183
|
+
If ``None``,
|
|
184
|
+
:attr:`audbcards.Datacard.example_media`
|
|
185
|
+
is used
|
|
186
|
+
|
|
187
|
+
"""
|
|
188
|
+
if file is None:
|
|
189
|
+
file = self.example_media
|
|
190
|
+
|
|
191
|
+
# use audb cache instead of dataset.cache_root
|
|
192
|
+
media_src_dir = (
|
|
193
|
+
f"{audb.default_cache_root()}/"
|
|
194
|
+
f"{audb.flavor_path(self.dataset.name, self.dataset.version)}"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Move file to build folder
|
|
198
|
+
if self.sphinx_build_dir is not None:
|
|
199
|
+
media_dst_dir = audeer.path(
|
|
200
|
+
self.sphinx_build_dir,
|
|
201
|
+
self.path,
|
|
202
|
+
self.dataset.name,
|
|
203
|
+
)
|
|
204
|
+
audeer.mkdir(os.path.join(media_dst_dir, os.path.dirname(file)))
|
|
205
|
+
shutil.copy(
|
|
206
|
+
os.path.join(media_src_dir, file),
|
|
207
|
+
os.path.join(media_dst_dir, file),
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Add plot of waveform
|
|
211
|
+
if self.sphinx_src_dir is not None:
|
|
212
|
+
signal, sampling_rate = audiofile.read(
|
|
213
|
+
os.path.join(media_src_dir, file),
|
|
214
|
+
always_2d=True,
|
|
215
|
+
)
|
|
216
|
+
image_file = audeer.path(
|
|
217
|
+
self.sphinx_src_dir,
|
|
218
|
+
self.path,
|
|
219
|
+
self.dataset.name,
|
|
220
|
+
f"{self.dataset.name}.png",
|
|
221
|
+
)
|
|
222
|
+
audeer.mkdir(os.path.dirname(image_file))
|
|
223
|
+
plt.figure(figsize=[3, 0.5])
|
|
224
|
+
ax = plt.subplot(111)
|
|
225
|
+
audplot.waveform(signal[0, :], ax=ax)
|
|
226
|
+
set_plot_margins()
|
|
227
|
+
plt.savefig(image_file)
|
|
228
|
+
plt.close()
|
|
229
|
+
|
|
230
|
+
player_src = f"./{self.dataset.name}/{file}"
|
|
231
|
+
player_str = (
|
|
232
|
+
f".. image:: ./{self.dataset.name}/{self.dataset.name}.png\n"
|
|
233
|
+
"\n"
|
|
234
|
+
".. raw:: html\n"
|
|
235
|
+
"\n"
|
|
236
|
+
f' <p><audio controls src="{player_src}"></audio></p>'
|
|
237
|
+
)
|
|
238
|
+
return player_str
|
|
239
|
+
|
|
240
|
+
def save(self, file: str = None):
|
|
241
|
+
"""Save content of rendered template to rst.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
file: name of output RST file.
|
|
245
|
+
If ``None``
|
|
246
|
+
and :attr:`audbcards.Datacard.sphinx_src_dir`
|
|
247
|
+
is not ``None``,
|
|
248
|
+
the RST file will be stored
|
|
249
|
+
as ``<sphinx_src_dir>/<path>/<dataset>.rst``
|
|
250
|
+
|
|
251
|
+
"""
|
|
252
|
+
if file is None and self.sphinx_src_dir is not None:
|
|
253
|
+
file = audeer.path(
|
|
254
|
+
self.sphinx_src_dir,
|
|
255
|
+
self.path,
|
|
256
|
+
f"{self.dataset.name}.rst",
|
|
257
|
+
)
|
|
258
|
+
if file is not None:
|
|
259
|
+
with open(file, mode="w", encoding="utf-8") as fp:
|
|
260
|
+
fp.write(self.content)
|
|
261
|
+
|
|
262
|
+
def _inline_image(
|
|
263
|
+
self,
|
|
264
|
+
text1: str,
|
|
265
|
+
file: str,
|
|
266
|
+
text2: str,
|
|
267
|
+
) -> str:
|
|
268
|
+
r"""RST string for rendering inline image between text.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
text1: text to the left of the image
|
|
272
|
+
file: image file
|
|
273
|
+
text2: text to the right of the image
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
RST code to generate the desired inline image
|
|
277
|
+
|
|
278
|
+
"""
|
|
279
|
+
# In RST there is no easy way to insert inline images.
|
|
280
|
+
# We use the following workaround:
|
|
281
|
+
#
|
|
282
|
+
# .. |ref| image:: file
|
|
283
|
+
#
|
|
284
|
+
# text1 |ref| text2
|
|
285
|
+
#
|
|
286
|
+
ref = audeer.basename_wo_ext(file)
|
|
287
|
+
self.rst_preamble += f".. |{ref}| image:: {file}\n"
|
|
288
|
+
return f"{text1} |{ref}| {text2}"
|
|
289
|
+
|
|
290
|
+
def _plot_distribution(
|
|
291
|
+
self,
|
|
292
|
+
values: typing.Sequence,
|
|
293
|
+
):
|
|
294
|
+
r"""Plot inline distribution.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
values: sequence of values
|
|
298
|
+
|
|
299
|
+
"""
|
|
300
|
+
if len(values) == 0:
|
|
301
|
+
min_ = 0
|
|
302
|
+
max_ = 0
|
|
303
|
+
else:
|
|
304
|
+
min_ = np.min(values)
|
|
305
|
+
max_ = np.max(values)
|
|
306
|
+
plt.figure(figsize=[0.5, 0.15])
|
|
307
|
+
# Remove all margins besides bottom
|
|
308
|
+
plt.subplot(111)
|
|
309
|
+
plt.subplots_adjust(
|
|
310
|
+
left=0,
|
|
311
|
+
bottom=0.25,
|
|
312
|
+
right=1,
|
|
313
|
+
top=1,
|
|
314
|
+
wspace=0,
|
|
315
|
+
hspace=0,
|
|
316
|
+
)
|
|
317
|
+
# Plot duration distribution
|
|
318
|
+
sns.kdeplot(
|
|
319
|
+
values,
|
|
320
|
+
fill=True,
|
|
321
|
+
cut=0,
|
|
322
|
+
clip=(min_, max_),
|
|
323
|
+
linewidth=0,
|
|
324
|
+
alpha=1,
|
|
325
|
+
color="#d54239",
|
|
326
|
+
)
|
|
327
|
+
# Remove all tiks, labels
|
|
328
|
+
sns.despine(left=True, bottom=True)
|
|
329
|
+
plt.tick_params(
|
|
330
|
+
axis="both",
|
|
331
|
+
which="both",
|
|
332
|
+
bottom=False,
|
|
333
|
+
left=False,
|
|
334
|
+
labelbottom=False,
|
|
335
|
+
labelleft=False,
|
|
336
|
+
)
|
|
337
|
+
plt.xlabel("")
|
|
338
|
+
plt.ylabel("")
|
|
339
|
+
|
|
340
|
+
def _expand_dataset(
|
|
341
|
+
self,
|
|
342
|
+
dataset: typing.Dict,
|
|
343
|
+
) -> typing.Dict:
|
|
344
|
+
r"""Expand dataset dict by additional entries.
|
|
345
|
+
|
|
346
|
+
Additional properties are added
|
|
347
|
+
that are only part of the data card,
|
|
348
|
+
but not the dataset object,
|
|
349
|
+
e.g. :meth:`audbcards.Datacard.player`
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
dataset: dataset object as dictionary representation
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
extended datasets dictionary
|
|
356
|
+
|
|
357
|
+
"""
|
|
358
|
+
# Add path of datacard folder
|
|
359
|
+
dataset["path"] = self.path
|
|
360
|
+
# Add audio player for example file
|
|
361
|
+
dataset["example"] = None
|
|
362
|
+
if self.example:
|
|
363
|
+
example = self.example_media
|
|
364
|
+
if example is not None:
|
|
365
|
+
player = self.player(example)
|
|
366
|
+
dataset["player"] = player
|
|
367
|
+
dataset["example"] = example
|
|
368
|
+
dataset["file_duration_distribution"] = self.file_duration_distribution
|
|
369
|
+
return dataset
|
|
370
|
+
|
|
371
|
+
def _render_template(self) -> str:
|
|
372
|
+
r"""Render content of data card with Jinja2.
|
|
373
|
+
|
|
374
|
+
It uses the dictionary representation
|
|
375
|
+
:attr:`audbcards.Datacard._dataset_dict`
|
|
376
|
+
as bases for rendering.
|
|
377
|
+
The result might vary
|
|
378
|
+
depending if :meth:`audbcards.Datacard._expand_dataset`
|
|
379
|
+
was called before or not.
|
|
380
|
+
|
|
381
|
+
"""
|
|
382
|
+
template_dir = os.path.join(os.path.dirname(__file__), "templates")
|
|
383
|
+
environment = jinja2.Environment(
|
|
384
|
+
loader=jinja2.FileSystemLoader(template_dir),
|
|
385
|
+
trim_blocks=True,
|
|
386
|
+
)
|
|
387
|
+
template = environment.get_template("datacard.j2")
|
|
388
|
+
|
|
389
|
+
# Convert dataset object to dictionary
|
|
390
|
+
dataset = self.dataset.properties()
|
|
391
|
+
|
|
392
|
+
# Add additional datacard only properties
|
|
393
|
+
dataset = self._expand_dataset(dataset)
|
|
394
|
+
|
|
395
|
+
content = template.render(dataset)
|
|
396
|
+
|
|
397
|
+
# Add RST preamble
|
|
398
|
+
if len(self.rst_preamble) > 0:
|
|
399
|
+
content = self.rst_preamble + "\n" + content
|
|
400
|
+
|
|
401
|
+
return content
|