audbcards 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
audbcards/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ from audbcards.core.datacard import Datacard
2
+ from audbcards.core.dataset import Dataset
3
+
4
+
5
+ __all__ = []
6
+
7
+
8
+ # Dynamically get the version of the installed module
9
+ try:
10
+ import importlib.metadata
11
+
12
+ __version__ = importlib.metadata.version(__name__)
13
+ except Exception: # pragma: no cover
14
+ importlib = None # pragma: no cover
15
+ finally:
16
+ del importlib
File without changes
@@ -0,0 +1,401 @@
1
+ import functools
2
+ import os
3
+ import shutil
4
+ import typing
5
+
6
+ import jinja2
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import seaborn as sns
10
+
11
+ import audb
12
+ import audeer
13
+ import audiofile
14
+ import audplot
15
+
16
+ from audbcards.core.dataset import Dataset
17
+ from audbcards.core.utils import set_plot_margins
18
+
19
+
20
+ class Datacard(object):
21
+ r"""Datacard of a dataset.
22
+
23
+ The datacard object
24
+ writes a RST file
25
+ for a given dataset,
26
+ which can then be used
27
+ to generate an HTML datacard page
28
+ using ``sphinx``.
29
+
30
+ Args:
31
+ dataset: dataset object
32
+ path: path to folder
33
+ that store datacard files
34
+ example: if ``True``,
35
+ include an audio example in the data card
36
+ showing the waveform of the audio
37
+ and an interactive player
38
+ sphinx_build_dir: build dir of sphinx.
39
+ If not ``None``
40
+ and ``example`` is ``True``,
41
+ a call to :meth:`audbcards.Datacard.player`
42
+ will store an example audio file
43
+ under
44
+ ``<sphinx_build_dir>/<path>/<db-name>/<media-file-in-db>``
45
+ sphinx_src_dir: source dir of sphinx.
46
+ If not ``None``
47
+ and ``example`` is ``True``,
48
+ a call to :meth:`audbcards.Datacard.player`
49
+ will store a wavplot of the example audio file
50
+ under
51
+ ``<sphinx_src_dir>/<path>/<db-name>/<db-name>.png``
52
+
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ dataset: Dataset,
58
+ *,
59
+ path: str = "datasets",
60
+ example: bool = True,
61
+ sphinx_build_dir: str = None,
62
+ sphinx_src_dir: str = None,
63
+ ):
64
+ self.dataset = dataset
65
+ """Dataset object."""
66
+
67
+ self.path = path
68
+ """Folder to store datacard."""
69
+
70
+ self.example = example
71
+ """If an audio example should be included."""
72
+
73
+ self.sphinx_build_dir = sphinx_build_dir
74
+ """Sphinx build dir."""
75
+
76
+ self.sphinx_src_dir = sphinx_src_dir
77
+ """Sphinx source dir."""
78
+
79
+ self.rst_preamble = ""
80
+ """RST code added at top of data card."""
81
+
82
+ @functools.cached_property
83
+ def content(self):
84
+ """Property Accessor for rendered jinja2 content."""
85
+ return self._render_template()
86
+
87
+ @property
88
+ def example_media(self) -> typing.Optional[str]:
89
+ r"""Select example media file.
90
+
91
+ This select a media file
92
+ based on the median duration
93
+ of all files
94
+ between 0.5 s and 300 s
95
+ and downloads it to the cache.
96
+
97
+ """
98
+ # Pick a meaningful duration for the example audio file
99
+ min_dur = 0.5
100
+ max_dur = 300 # 5 min
101
+ durations = self.dataset.file_durations
102
+ selected_durations = [d for d in durations if d >= min_dur and d <= max_dur]
103
+ if len(selected_durations) == 0:
104
+ return None
105
+ selected_duration = np.median(selected_durations)
106
+
107
+ # Get index for duration closest to selected duration
108
+ # see https://stackoverflow.com/a/9706105
109
+ # durations.index(selected_duration)
110
+ # is an alternative but fails due to rounding errors
111
+ index = min(
112
+ range(len(durations)),
113
+ key=lambda n: abs(durations[n] - selected_duration),
114
+ )
115
+ # Download of example data might fail
116
+ try:
117
+ media = self.dataset.deps.media[index]
118
+ audb.load_media(
119
+ self.dataset.name,
120
+ media,
121
+ version=self.dataset.version,
122
+ verbose=False,
123
+ )
124
+ except: # noqa: E722
125
+ media = None
126
+ return media
127
+
128
+ @property
129
+ def file_duration_distribution(self) -> str:
130
+ r"""Minimum and maximum of files durations, and plotted distribution.
131
+
132
+ This generates a single line
133
+ containing the mininimum and maximum values
134
+ of files durations.
135
+
136
+ If :attr:`audbcards.Datacard.self.sphinx_src_dir` is set
137
+ (e.g. when used in the sphinx extension),
138
+ an inline image is stored
139
+ in the sphinx source folder
140
+ under ``<dataset-name>/<dataset-name>-file-durations.png``
141
+ and displayed
142
+ between the minimum and maximum values.
143
+
144
+ """
145
+ min_ = 0
146
+ max_ = 0
147
+ unit = "s"
148
+ durations = self.dataset.file_durations
149
+ if len(durations) > 0:
150
+ min_ = np.min(durations)
151
+ max_ = np.max(durations)
152
+ distribution_str = f"{min_:.1f} {unit} .. {max_:.1f} {unit}"
153
+
154
+ # Save distribution plot
155
+ if self.sphinx_src_dir is not None:
156
+ self._plot_distribution(durations)
157
+ name = "file-durations"
158
+ image_file = audeer.path(
159
+ self.sphinx_src_dir,
160
+ self.path,
161
+ self.dataset.name,
162
+ f"{self.dataset.name}-{name}.png",
163
+ )
164
+ audeer.mkdir(os.path.dirname(image_file))
165
+ plt.savefig(image_file, transparent=True)
166
+ plt.close()
167
+ distribution_str = self._inline_image(
168
+ f"{min_:.1f} {unit}",
169
+ f"./{self.dataset.name}/{self.dataset.name}-{name}.png",
170
+ f"{max_:.1f} {unit}",
171
+ )
172
+
173
+ return distribution_str
174
+
175
+ def player(
176
+ self,
177
+ file: str = None,
178
+ ) -> str:
179
+ r"""Create an audio player showing the waveform.
180
+
181
+ Args:
182
+ file: input audio file to be used in the player.
183
+ If ``None``,
184
+ :attr:`audbcards.Datacard.example_media`
185
+ is used
186
+
187
+ """
188
+ if file is None:
189
+ file = self.example_media
190
+
191
+ # use audb cache instead of dataset.cache_root
192
+ media_src_dir = (
193
+ f"{audb.default_cache_root()}/"
194
+ f"{audb.flavor_path(self.dataset.name, self.dataset.version)}"
195
+ )
196
+
197
+ # Move file to build folder
198
+ if self.sphinx_build_dir is not None:
199
+ media_dst_dir = audeer.path(
200
+ self.sphinx_build_dir,
201
+ self.path,
202
+ self.dataset.name,
203
+ )
204
+ audeer.mkdir(os.path.join(media_dst_dir, os.path.dirname(file)))
205
+ shutil.copy(
206
+ os.path.join(media_src_dir, file),
207
+ os.path.join(media_dst_dir, file),
208
+ )
209
+
210
+ # Add plot of waveform
211
+ if self.sphinx_src_dir is not None:
212
+ signal, sampling_rate = audiofile.read(
213
+ os.path.join(media_src_dir, file),
214
+ always_2d=True,
215
+ )
216
+ image_file = audeer.path(
217
+ self.sphinx_src_dir,
218
+ self.path,
219
+ self.dataset.name,
220
+ f"{self.dataset.name}.png",
221
+ )
222
+ audeer.mkdir(os.path.dirname(image_file))
223
+ plt.figure(figsize=[3, 0.5])
224
+ ax = plt.subplot(111)
225
+ audplot.waveform(signal[0, :], ax=ax)
226
+ set_plot_margins()
227
+ plt.savefig(image_file)
228
+ plt.close()
229
+
230
+ player_src = f"./{self.dataset.name}/{file}"
231
+ player_str = (
232
+ f".. image:: ./{self.dataset.name}/{self.dataset.name}.png\n"
233
+ "\n"
234
+ ".. raw:: html\n"
235
+ "\n"
236
+ f' <p><audio controls src="{player_src}"></audio></p>'
237
+ )
238
+ return player_str
239
+
240
+ def save(self, file: str = None):
241
+ """Save content of rendered template to rst.
242
+
243
+ Args:
244
+ file: name of output RST file.
245
+ If ``None``
246
+ and :attr:`audbcards.Datacard.sphinx_src_dir`
247
+ is not ``None``,
248
+ the RST file will be stored
249
+ as ``<sphinx_src_dir>/<path>/<dataset>.rst``
250
+
251
+ """
252
+ if file is None and self.sphinx_src_dir is not None:
253
+ file = audeer.path(
254
+ self.sphinx_src_dir,
255
+ self.path,
256
+ f"{self.dataset.name}.rst",
257
+ )
258
+ if file is not None:
259
+ with open(file, mode="w", encoding="utf-8") as fp:
260
+ fp.write(self.content)
261
+
262
+ def _inline_image(
263
+ self,
264
+ text1: str,
265
+ file: str,
266
+ text2: str,
267
+ ) -> str:
268
+ r"""RST string for rendering inline image between text.
269
+
270
+ Args:
271
+ text1: text to the left of the image
272
+ file: image file
273
+ text2: text to the right of the image
274
+
275
+ Returns:
276
+ RST code to generate the desired inline image
277
+
278
+ """
279
+ # In RST there is no easy way to insert inline images.
280
+ # We use the following workaround:
281
+ #
282
+ # .. |ref| image:: file
283
+ #
284
+ # text1 |ref| text2
285
+ #
286
+ ref = audeer.basename_wo_ext(file)
287
+ self.rst_preamble += f".. |{ref}| image:: {file}\n"
288
+ return f"{text1} |{ref}| {text2}"
289
+
290
+ def _plot_distribution(
291
+ self,
292
+ values: typing.Sequence,
293
+ ):
294
+ r"""Plot inline distribution.
295
+
296
+ Args:
297
+ values: sequence of values
298
+
299
+ """
300
+ if len(values) == 0:
301
+ min_ = 0
302
+ max_ = 0
303
+ else:
304
+ min_ = np.min(values)
305
+ max_ = np.max(values)
306
+ plt.figure(figsize=[0.5, 0.15])
307
+ # Remove all margins besides bottom
308
+ plt.subplot(111)
309
+ plt.subplots_adjust(
310
+ left=0,
311
+ bottom=0.25,
312
+ right=1,
313
+ top=1,
314
+ wspace=0,
315
+ hspace=0,
316
+ )
317
+ # Plot duration distribution
318
+ sns.kdeplot(
319
+ values,
320
+ fill=True,
321
+ cut=0,
322
+ clip=(min_, max_),
323
+ linewidth=0,
324
+ alpha=1,
325
+ color="#d54239",
326
+ )
327
+ # Remove all tiks, labels
328
+ sns.despine(left=True, bottom=True)
329
+ plt.tick_params(
330
+ axis="both",
331
+ which="both",
332
+ bottom=False,
333
+ left=False,
334
+ labelbottom=False,
335
+ labelleft=False,
336
+ )
337
+ plt.xlabel("")
338
+ plt.ylabel("")
339
+
340
+ def _expand_dataset(
341
+ self,
342
+ dataset: typing.Dict,
343
+ ) -> typing.Dict:
344
+ r"""Expand dataset dict by additional entries.
345
+
346
+ Additional properties are added
347
+ that are only part of the data card,
348
+ but not the dataset object,
349
+ e.g. :meth:`audbcards.Datacard.player`
350
+
351
+ Args:
352
+ dataset: dataset object as dictionary representation
353
+
354
+ Returns:
355
+ extended datasets dictionary
356
+
357
+ """
358
+ # Add path of datacard folder
359
+ dataset["path"] = self.path
360
+ # Add audio player for example file
361
+ dataset["example"] = None
362
+ if self.example:
363
+ example = self.example_media
364
+ if example is not None:
365
+ player = self.player(example)
366
+ dataset["player"] = player
367
+ dataset["example"] = example
368
+ dataset["file_duration_distribution"] = self.file_duration_distribution
369
+ return dataset
370
+
371
+ def _render_template(self) -> str:
372
+ r"""Render content of data card with Jinja2.
373
+
374
+ It uses the dictionary representation
375
+ :attr:`audbcards.Datacard._dataset_dict`
376
+ as bases for rendering.
377
+ The result might vary
378
+ depending if :meth:`audbcards.Datacard._expand_dataset`
379
+ was called before or not.
380
+
381
+ """
382
+ template_dir = os.path.join(os.path.dirname(__file__), "templates")
383
+ environment = jinja2.Environment(
384
+ loader=jinja2.FileSystemLoader(template_dir),
385
+ trim_blocks=True,
386
+ )
387
+ template = environment.get_template("datacard.j2")
388
+
389
+ # Convert dataset object to dictionary
390
+ dataset = self.dataset.properties()
391
+
392
+ # Add additional datacard only properties
393
+ dataset = self._expand_dataset(dataset)
394
+
395
+ content = template.render(dataset)
396
+
397
+ # Add RST preamble
398
+ if len(self.rst_preamble) > 0:
399
+ content = self.rst_preamble + "\n" + content
400
+
401
+ return content