crieur 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crieur/models.py ADDED
@@ -0,0 +1,408 @@
1
+ import re
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from os import closerange
5
+ from pathlib import Path
6
+ from textwrap import dedent
7
+ from typing import Optional
8
+
9
+ import mistune
10
+ from citeproc import (
11
+ Citation,
12
+ CitationItem,
13
+ CitationStylesBibliography,
14
+ CitationStylesStyle,
15
+ formatter,
16
+ )
17
+ from citeproc.source.bibtex import BibTeX
18
+ from dataclass_wizard import DatePattern, DumpMeta, YAMLWizard
19
+ from dataclass_wizard import errors as dw_errors
20
+ from PIL import Image, UnidentifiedImageError
21
+ from slugify import slugify
22
+ from yaml.composer import ComposerError
23
+
24
+ from .generator import mistune_plugins
25
+ from .typography import typographie
26
+
27
+ RE_CITATIONS_REFS = re.compile(r"\[\@(?P<ref>.*)\]")
28
+
29
+
30
+ @dataclass
31
+ class Settings:
32
+ title: str
33
+ base_url: str
34
+ extra_vars: str
35
+ target_path: Path
36
+ source_path: Path
37
+ statics_path: Path
38
+ templates_path: Path
39
+ csl_path: Path
40
+ without_statics: bool
41
+ feed_limit: int
42
+
43
+
44
+ class FrenchTypographyRenderer(mistune.HTMLRenderer):
45
+ """Apply French typographic rules to text."""
46
+
47
+ def text(self, text):
48
+ text = text.replace("\\ ", " ")
49
+ return typographie(super().text(text), html=True)
50
+
51
+ def block_html(self, html):
52
+ html = html.replace("\\ ", " ")
53
+ return typographie(super().block_html(html), html=True)
54
+
55
+
56
+ class ImgsWithSizesRenderer(FrenchTypographyRenderer):
57
+ """Renders images as <figure>s and add sizes."""
58
+
59
+ def __init__(self, escape=True, allow_harmful_protocols=None, article=None):
60
+ super().__init__(escape, allow_harmful_protocols)
61
+ self._article = article
62
+
63
+ def paragraph(self, text):
64
+ # In case of a figure, we do not want the (non-standard) paragraph.
65
+ if text.strip().startswith("<figure>"):
66
+ return text
67
+ return super().paragraph(text)
68
+
69
+ def image(self, text, url, title=None):
70
+ if self._article.images_path is None:
71
+ print(f"Image with URL `{url}` is discarded.")
72
+ return ""
73
+ full_path = self._article.images_path.resolve().parent / url
74
+ try:
75
+ image = Image.open(full_path)
76
+ except (IsADirectoryError, FileNotFoundError, UnidentifiedImageError):
77
+ print(f"`{full_path}` is not a valid image.")
78
+ return ""
79
+ width, height = image.size
80
+ caption = f"<figcaption>{text}</figcaption>" if text else ""
81
+ full_url = f"{self._article.settings.base_url}{self._article.url}{url}"
82
+ return dedent(
83
+ f"""\
84
+ <figure>
85
+ <a href="{full_url}"
86
+ title="Cliquer pour une version haute résolution">
87
+ <img
88
+ src="{full_url}"
89
+ width="{width}" height="{height}"
90
+ loading="lazy"
91
+ decoding="async"
92
+ alt="{text}">
93
+ </a>
94
+ {caption}
95
+ </figure>
96
+ """
97
+ )
98
+
99
+
100
+ @dataclass
101
+ class Numero(YAMLWizard):
102
+ _id: str
103
+ name: str
104
+ description: str
105
+ metadata: str
106
+ articles: list
107
+ title: str = ""
108
+ title_f: str = ""
109
+
110
+ @property
111
+ def date(self):
112
+ return max(article.date for article in self.articles)
113
+
114
+ def __lt__(self, other: "Numero"):
115
+ if not isinstance(other, Numero):
116
+ return NotImplemented
117
+ return self.date < other.date
118
+
119
+ def __post_init__(self):
120
+ self.slug = slugify(self.name)
121
+
122
+ def configure_articles(self, yaml_path, settings):
123
+ # Preserves abstract_fr key (vs. abstract-fr) when converting to_yaml()
124
+ DumpMeta(key_transform="SNAKE").bind_to(Article)
125
+
126
+ loaded_articles = []
127
+ for article in self.articles:
128
+ article_slug = slugify(article["article"]["title"])
129
+ article_folder = (
130
+ yaml_path.parent / f"{article_slug}-{article['article']['_id']}"
131
+ )
132
+ article_yaml_path = article_folder / f"{article_slug}.yaml"
133
+ try:
134
+ try:
135
+ loaded_article = Article.from_yaml_file(article_yaml_path)
136
+ except ComposerError:
137
+ loaded_article = Article.from_yaml(
138
+ article_yaml_path.read_text().split("---")[1]
139
+ )
140
+ except dw_errors.ParseError as e:
141
+ print(f"Metadata error in `{article['article']['title']}`:")
142
+ print(e)
143
+ exit(1)
144
+ if not loaded_article.date:
145
+ print(f"Article `{loaded_article.title}` skipped (no date).")
146
+ continue
147
+ if loaded_article.date > datetime.today().date():
148
+ print(
149
+ f"Article `{loaded_article.title}` skipped "
150
+ f"(future date: {loaded_article.date})."
151
+ )
152
+ continue
153
+ if not loaded_article.id:
154
+ loaded_article.id = article_slug
155
+ loaded_article.content_md = (
156
+ article_folder / f"{article_slug}.md"
157
+ ).read_text()
158
+ loaded_article.content_bib_path = article_folder / f"{article_slug}.bib"
159
+ loaded_article.images_path = (
160
+ article_folder / "images"
161
+ if (article_folder / "images").exists()
162
+ else None
163
+ )
164
+ self.title = loaded_article.dossier[0]["title"]
165
+ self.title_f = loaded_article.dossier[0]["title_f"]
166
+ loaded_article.numero = self
167
+ loaded_article.settings = settings
168
+ loaded_articles.append(loaded_article)
169
+ self.articles = sorted(loaded_articles, reverse=True)
170
+
171
+
172
+ @dataclass
173
+ class Article(YAMLWizard):
174
+ title: str
175
+ title_f: str
176
+ id: str = ""
177
+ subtitle: str = ""
178
+ subtitle_f: str = ""
179
+ content_md: str = ""
180
+ content_bib_path: Path = ""
181
+ settings: dict | None = None
182
+ dossier: list | None = None
183
+ date: Optional[DatePattern["%Y/%m/%d"]] = None # noqa: F722
184
+ authors: list = None
185
+ abstract: list = None
186
+ keywords: list = None
187
+
188
+ def __post_init__(self):
189
+ self.slug = slugify(self.title)
190
+ md = mistune.create_markdown(escape=False)
191
+ self.title_f = md(self.title_f).strip()[len("<p>") : -len("</p>")]
192
+ self.subtitle_f = md(self.subtitle_f).strip()[len("<p>") : -len("</p>")]
193
+
194
+ def __eq__(self, other):
195
+ return self.id == other.id
196
+
197
+ def __lt__(self, other: "Article"):
198
+ if not isinstance(other, Article):
199
+ return NotImplemented
200
+ return self.date < other.date
201
+
202
+ @property
203
+ def abstract_fr(self):
204
+ for abstract in self.abstract:
205
+ if abstract.get("text_f") and (
206
+ abstract.get("lang") == "fr" or abstract.get("lang") is None
207
+ ):
208
+ return abstract["text_f"]
209
+
210
+ @property
211
+ def abstract_en(self):
212
+ for abstract in self.abstract:
213
+ if abstract.get("text_f") and abstract.get("lang") == "en":
214
+ return abstract["text_f"]
215
+
216
+ @property
217
+ def url(self):
218
+ return f"numero/{self.numero.slug}/article/{self.id}/"
219
+
220
+ @property
221
+ def content_html(self):
222
+ md = mistune.create_markdown(
223
+ renderer=ImgsWithSizesRenderer(escape=False, article=self),
224
+ plugins=mistune_plugins,
225
+ escape=False,
226
+ )
227
+ html_content = md(self.content_md)
228
+
229
+ bib_source = BibTeX(self.content_bib_path, encoding="utf-8")
230
+ bib_style = CitationStylesStyle(self.settings.csl_path)
231
+ bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.html)
232
+
233
+ # Processing citations in a document needs to be done in two passes as for some
234
+ # CSL styles, a citation can depend on the order of citations in the
235
+ # bibliography and thus on citations following the current one.
236
+ # For this reason, we first need to register all citations with the
237
+ # CitationStylesBibliography.
238
+
239
+ citations = {}
240
+
241
+ def clean_ref(citation_ref):
242
+ # TODO: deal with page references (for instance `[@goody_raison_1979, pp.115]`).
243
+ return citation_ref.split(",")[0]
244
+
245
+ for citation_ref in RE_CITATIONS_REFS.findall(html_content):
246
+ citation_ref = clean_ref(citation_ref)
247
+ citation = Citation([CitationItem(citation_ref)])
248
+ bibliography.register(citation)
249
+ citations[citation_ref] = citation
250
+
251
+ def warn(citation_item):
252
+ print(
253
+ "WARNING: Reference with key '{}' not found in the bibliography.".format(
254
+ citation_item.key
255
+ )
256
+ )
257
+
258
+ # In the second pass, CitationStylesBibliography can generate citations.
259
+ # CitationStylesBibliography.cite() requires a callback function to be passed
260
+ # along to be called in case a CitationItem's key is not present in the
261
+ # bibliography.
262
+
263
+ def replace_reference(match_object):
264
+ citation_ref = match_object.group("ref")
265
+ citation_ref = clean_ref(citation_ref)
266
+ return "".join(
267
+ f"""
268
+ <a href="#ref_{citation_ref}" id="anchor_{citation_ref}">
269
+ {bibliography.cite(citations[citation_ref], warn)}
270
+ </a>
271
+ """.split("\n")
272
+ )
273
+
274
+ html_content = re.sub(RE_CITATIONS_REFS, replace_reference, html_content)
275
+ html_bibliography = ""
276
+
277
+ def clean_item(item):
278
+ # As of 2025, citeproc-py does not support repeated punctuation.
279
+ return str(item).replace("..", ".").replace(".</i>.", ".</i>")
280
+
281
+ for citation, item in zip(bibliography.items, bibliography.bibliography()):
282
+ citation_ref = citation.reference.get("key")
283
+ cleaned_item = clean_item(item)
284
+ html_bibliography += f"""
285
+ <li>
286
+ <span id="ref_{citation_ref}">
287
+ {cleaned_item}
288
+ <a href="#anchor_{citation_ref}">↩</a>
289
+ </span>
290
+ </li>
291
+ """
292
+
293
+ html_content = html_content.replace(
294
+ "<h2>Bibliographie</h2>",
295
+ f"<h2>Bibliographie</h2>\n\n<ul>{html_bibliography}</ul>",
296
+ )
297
+
298
+ return html_content
299
+
300
+
301
+ def configure_numero(yaml_path, settings):
302
+ # Preserves abstract_fr key (vs. abstract-fr) when converting to_yaml()
303
+ DumpMeta(key_transform="SNAKE").bind_to(Numero)
304
+
305
+ try:
306
+ numero = Numero.from_yaml_file(yaml_path)
307
+ except ComposerError:
308
+ numero = Numero.from_yaml(yaml_path.read_text().split("---")[1])
309
+
310
+ numero.configure_articles(yaml_path, settings)
311
+ return numero
312
+
313
+
314
+ @dataclass
315
+ class Keyword:
316
+ slug: str
317
+ name: str
318
+ articles: list
319
+
320
+ def __eq__(self, other):
321
+ return self.slug == other.slug
322
+
323
+ def __lt__(self, other: "Keyword"):
324
+ if not isinstance(other, Keyword):
325
+ return NotImplemented
326
+ len_self = len(self.articles)
327
+ len_other = len(other.articles)
328
+ if len_self == len_other:
329
+ return self.slug > other.slug
330
+ return len_self < len_other
331
+
332
+
333
+ @dataclass
334
+ class Author:
335
+ slug: str
336
+ forname: str
337
+ surname: str
338
+ articles: list
339
+ biography: str = ""
340
+
341
+ def __str__(self):
342
+ return f"{self.forname} {self.surname}"
343
+
344
+ def __eq__(self, other):
345
+ return self.slug == other.slug
346
+
347
+ def __lt__(self, other: "Author"):
348
+ if not isinstance(other, Author):
349
+ return NotImplemented
350
+ len_self = len(self.articles)
351
+ len_other = len(other.articles)
352
+ if len_self == len_other:
353
+ return self.slug > other.slug
354
+ return len_self < len_other
355
+
356
+
357
+ def collect_keywords(numeros):
358
+ keywords = {}
359
+ for numero in numeros:
360
+ for article in numero.articles:
361
+ article_keywords = []
362
+ for kwds in article.keywords:
363
+ if kwds.get("list") and kwds.get("lang") == "fr": # TODO: en?
364
+ for keyword in kwds.get("list", "").split(", "):
365
+ keyword_slug = slugify(keyword)
366
+ if keyword_slug in keywords:
367
+ keywords[keyword_slug].articles.append(article)
368
+ kw = keywords[keyword_slug]
369
+ else:
370
+ kw = Keyword(
371
+ slug=keyword_slug, name=keyword, articles=[article]
372
+ )
373
+ keywords[keyword_slug] = kw
374
+ article_keywords.append(kw)
375
+ article.keywords = article_keywords
376
+ return dict(sorted(keywords.items(), key=lambda item: item[1], reverse=True))
377
+
378
+
379
+ def collect_authors(numeros):
380
+ authors = {}
381
+ for numero in numeros:
382
+ for article in numero.articles:
383
+ article_authors = []
384
+ if not article.authors:
385
+ continue
386
+ for athr in article.authors:
387
+ author_forname = athr.get("forname", "")
388
+ author_surname = athr.get("surname", "")
389
+ author_biography = athr.get("biography", "")
390
+ author_name = f"{author_forname} {author_surname}".strip()
391
+ if not author_name:
392
+ continue
393
+ author_slug = slugify(author_name)
394
+ if author_slug in authors:
395
+ authors[author_slug].articles.append(article)
396
+ kw = authors[author_slug]
397
+ else:
398
+ kw = Author(
399
+ slug=author_slug,
400
+ forname=author_forname,
401
+ surname=author_surname,
402
+ articles=[article],
403
+ biography=author_biography,
404
+ )
405
+ authors[author_slug] = kw
406
+ article_authors.append(kw)
407
+ article.authors = article_authors
408
+ return dict(sorted(authors.items(), key=lambda item: item[1], reverse=True))
crieur/plugins.py ADDED
@@ -0,0 +1,110 @@
1
+ from typing import Any, Dict, List, Match, Union
2
+
3
+ from mistune.core import BlockState
4
+
5
+ __all__ = ["inline_footnotes"]
6
+
7
+ # https://michelf.ca/projects/php-markdown/extra/#footnotes
8
+ INLINE_FOOTNOTE = r"\^\[(?P<footnote_inlined>[^\]]*)\]"
9
+
10
+
11
+ def parse_inline_footnote(
12
+ inline: "InlineParser", m: Match[str], state: "InlineState"
13
+ ) -> int:
14
+ key = m.group("footnote_inlined")
15
+ notes = state.env.get("inline_footnotes")
16
+ if not notes:
17
+ notes = []
18
+ if key not in notes:
19
+ notes.append(key)
20
+ state.env["inline_footnotes"] = notes
21
+ state.append_token(
22
+ {
23
+ "type": "footnote_ref",
24
+ "raw": key,
25
+ "attrs": {"index": notes.index(key) + 1},
26
+ }
27
+ )
28
+ return m.end()
29
+
30
+
31
+ def parse_footnote_item(
32
+ block: "BlockParser", key: str, index: int, state: BlockState
33
+ ) -> Dict[str, Any]:
34
+ return {
35
+ "type": "footnote_item",
36
+ "children": [{"type": "paragraph", "text": key}],
37
+ "attrs": {"key": key, "index": index},
38
+ }
39
+
40
+
41
+ def md_footnotes_hook(
42
+ md: "Markdown", result: Union[str, List[Dict[str, Any]]], state: BlockState
43
+ ) -> Union[str, List[Dict[str, Any]]]:
44
+ notes = state.env.get("inline_footnotes")
45
+ if not notes:
46
+ return result
47
+
48
+ children = [
49
+ parse_footnote_item(md.block, k, i + 1, state) for i, k in enumerate(notes)
50
+ ]
51
+ state = BlockState()
52
+ state.tokens = [{"type": "footnotes", "children": children}]
53
+ output = md.render_state(state)
54
+ return result + output # type: ignore[operator]
55
+
56
+
57
+ def render_inline_footnote_ref(renderer: "BaseRenderer", key: str, index: int) -> str:
58
+ i = str(index)
59
+ html = '<sup class="footnote-ref" id="fnref-' + i + '">'
60
+ return html + '<a href="#fn-' + i + '">' + i + "</a></sup>"
61
+
62
+
63
+ def render_inline_footnotes(renderer: "BaseRenderer", text: str) -> str:
64
+ return '<hr><section class="footnotes">\n<ol>\n' + text + "</ol>\n</section>\n"
65
+
66
+
67
+ def render_inline_footnote_item(
68
+ renderer: "BaseRenderer", text: str, key: str, index: int
69
+ ) -> str:
70
+ i = str(index)
71
+ back = '<a href="#fnref-' + i + '" class="footnote">&#8617;</a>'
72
+ text = text.rstrip()[:-4] + back + "</p>"
73
+ return '<li id="fn-' + i + '">' + text + "</li>\n"
74
+
75
+
76
+ def inline_footnotes(md: "Markdown") -> None:
77
+ """A mistune plugin to support inline footnotes, spec defined at
78
+ https://michelf.ca/projects/php-markdown/extra/#footnotes
79
+
80
+ Here is an example:
81
+
82
+ .. code-block:: text
83
+
84
+ That's some text with a footnote.^[And that's the footnote.]
85
+
86
+ It will be converted into HTML:
87
+
88
+ .. code-block:: html
89
+
90
+ <p>That's some text with a footnote.<sup class="footnote-ref" id="fnref-1"><a href="#fn-1">1</a></sup></p>
91
+ <section class="footnotes">
92
+ <ol>
93
+ <li id="fn-1"><p>And that's the footnote.<a href="#fnref-1" class="footnote">&#8617;</a></p></li>
94
+ </ol>
95
+ </section>
96
+
97
+ :param md: Markdown instance
98
+ """
99
+ md.inline.register(
100
+ "inline_footnote",
101
+ INLINE_FOOTNOTE,
102
+ parse_inline_footnote,
103
+ before="link",
104
+ )
105
+ md.after_render_hooks.append(md_footnotes_hook)
106
+
107
+ if md.renderer and md.renderer.NAME == "html":
108
+ md.renderer.register("footnote_ref", render_inline_footnote_ref)
109
+ md.renderer.register("footnote_item", render_inline_footnote_item)
110
+ md.renderer.register("footnotes", render_inline_footnotes)