crieur 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crieur might be problematic. Click here for more details.

crieur/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from pathlib import Path
2
+
3
+ VERSION = "1.0.0"
4
+ ROOT_DIR = Path(__file__).parent
crieur/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .cli import main
2
+
3
+ main()
crieur/cli.py ADDED
@@ -0,0 +1,124 @@
1
+ import contextlib
2
+ import shutil
3
+ import socket
4
+ import zipfile
5
+ from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer, test
6
+ from pathlib import Path
7
+
8
+ import httpx
9
+ from minicli import cli, run
10
+
11
+ from . import VERSION
12
+ from .generator import generate_html
13
+ from .models import collect_keywords, configure_numero
14
+ from .utils import each_file_from, each_folder_from
15
+
16
+
17
+ @cli
18
+ def version():
19
+ """Return the current version."""
20
+ print(f"Crieur version: {VERSION}")
21
+
22
+
23
+ @cli
24
+ def generate(
25
+ title="Crieur",
26
+ target_path: Path = Path() / "public",
27
+ source_path: Path = Path() / "sources",
28
+ ):
29
+ """Generate a new revue website.
30
+
31
+ :title: Title of the website (default: Crieur).
32
+ :target_path: Path where site is built (default: /public/).
33
+ :source_path: Path where stylo source were downloaded (default: /sources/).
34
+ """
35
+ numeros = []
36
+ for numero in each_folder_from(source_path):
37
+ for corpus_yaml in each_file_from(numero, pattern="*.yaml"):
38
+ numero = configure_numero(corpus_yaml)
39
+ numeros.append(numero)
40
+
41
+ keywords = collect_keywords(numeros)
42
+ generate_html(title, numeros, keywords, target_path)
43
+
44
+ static_path_local = Path(__file__).parent / "statics"
45
+ shutil.copytree(static_path_local, target_path / "statics", dirs_exist_ok=True)
46
+
47
+
48
+ @cli
49
+ def stylo(
50
+ *stylo_ids: str,
51
+ stylo_instance: str = "stylo.huma-num.fr",
52
+ stylo_export: str = "https://export.stylo.huma-num.fr",
53
+ force_download: bool = False,
54
+ ):
55
+ """Initialize a new revue to current directory from Stylo.
56
+
57
+ :stylo_ids: Corpus ids from Stylo, separated by commas.
58
+ :stylo_instance: Instance of Stylo (default: stylo.huma-num.fr).
59
+ :stylo_export: Stylo export URL (default: https://export.stylo.huma-num.fr).
60
+ :force_download: Force download of sources from Stylo (default: False).
61
+ """
62
+ print(
63
+ f"Initializing a new revue: `{stylo_ids}` from `{stylo_instance}` "
64
+ f"through export service `{stylo_export}`."
65
+ )
66
+
67
+ sources_path = Path() / "sources"
68
+ if not sources_path.exists():
69
+ Path.mkdir(sources_path)
70
+
71
+ for i, stylo_id in enumerate(stylo_ids):
72
+ zip_path = Path() / f"export-{i + 1}-{stylo_id}.zip"
73
+ if force_download or not zip_path.exists():
74
+ url = (
75
+ f"{stylo_export}/generique/corpus/export/"
76
+ f"{stylo_instance}/{stylo_id}/Extract-corpus/"
77
+ "?with_toc=0&with_ascii=0&with_link_citations=0&with_nocite=0"
78
+ "&version=&bibliography_style=chicagomodified&formats=originals"
79
+ )
80
+ print(f"Downloading data from {url} to {zip_path}")
81
+ with Path.open(zip_path, "wb") as fd:
82
+ with httpx.stream("GET", url, timeout=None) as r:
83
+ for data in r.iter_bytes():
84
+ fd.write(data)
85
+
86
+ target_path = sources_path / f"{i + 1}-{stylo_id}"
87
+ try:
88
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
89
+ zip_ref.extractall(target_path)
90
+ print(f"Data downloaded and extracted to {target_path}")
91
+ except zipfile.BadZipFile:
92
+ print(f"Unable to find corpus with id {stylo_id}!")
93
+ return
94
+
95
+
96
+ @cli
97
+ def serve(repository_path: Path = Path(), port: int = 8000):
98
+ """Serve an HTML book from `repository_path`/public or current directory/public.
99
+
100
+ :repository_path: Absolute or relative path to book’s sources (default: current).
101
+ :port: Port to serve the book from (default=8000)
102
+ """
103
+ print(
104
+ f"Serving HTML book from `{repository_path}/public` to http://127.0.0.1:{port}"
105
+ )
106
+
107
+ # From https://github.com/python/cpython/blob/main/Lib/http/server.py#L1307-L1326
108
+ class DirectoryServer(ThreadingHTTPServer):
109
+ def server_bind(self):
110
+ # suppress exception when protocol is IPv4
111
+ with contextlib.suppress(Exception):
112
+ self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
113
+ return super().server_bind()
114
+
115
+ def finish_request(self, request, client_address):
116
+ self.RequestHandlerClass(
117
+ request, client_address, self, directory=str(repository_path / "public")
118
+ )
119
+
120
+ test(HandlerClass=SimpleHTTPRequestHandler, ServerClass=DirectoryServer, port=port)
121
+
122
+
123
+ def main():
124
+ run()
crieur/generator.py ADDED
@@ -0,0 +1,73 @@
1
+ import locale
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import mistune
6
+ from jinja2 import Environment as Env
7
+ from jinja2 import FileSystemLoader
8
+ from slugify import slugify
9
+
10
+ from .utils import neighborhood
11
+
12
+ locale.setlocale(locale.LC_ALL, "fr_FR.UTF-8")
13
+ environment = Env(loader=FileSystemLoader(str(Path(__file__).parent / "templates")))
14
+
15
+
16
+ def slugify_(value):
17
+ return slugify(value)
18
+
19
+
20
+ environment.filters["slugify"] = slugify_
21
+
22
+ md = mistune.create_markdown(plugins=["footnotes", "superscript"])
23
+
24
+
25
+ def markdown(value):
26
+ return md(value) if value else ""
27
+
28
+
29
+ environment.filters["markdown"] = markdown
30
+
31
+
32
+ def generate_html(title, numeros, keywords, target_path):
33
+ common_params = {
34
+ "numeros": numeros,
35
+ "keywords": keywords,
36
+ "title": title,
37
+ }
38
+
39
+ template_homepage = environment.get_template("homepage.html")
40
+ content = template_homepage.render(**common_params)
41
+ target_path.mkdir(parents=True, exist_ok=True)
42
+ (target_path / "index.html").write_text(content)
43
+
44
+ for numero in numeros:
45
+ template_numero = environment.get_template("numero.html")
46
+ content = template_numero.render(numero=numero, **common_params)
47
+ numero_folder = target_path / "numero" / numero.name
48
+ numero_folder.mkdir(parents=True, exist_ok=True)
49
+ (numero_folder / "index.html").write_text(content)
50
+
51
+ template_article = environment.get_template("article.html")
52
+ for index, previous, article, next_ in neighborhood(numero.articles):
53
+ content = template_article.render(
54
+ article=article,
55
+ numero=numero,
56
+ previous_situation=previous,
57
+ next_situation=next_,
58
+ **common_params,
59
+ )
60
+ article_folder = numero_folder / "article" / article.id
61
+ article_folder.mkdir(parents=True, exist_ok=True)
62
+ (article_folder / "index.html").write_text(content)
63
+ if article.images_path:
64
+ shutil.copytree(
65
+ article.images_path, article_folder / "images", dirs_exist_ok=True
66
+ )
67
+
68
+ for slug, keyword in keywords.items():
69
+ template_keyword = environment.get_template("keyword.html")
70
+ content = template_keyword.render(keyword=keyword, **common_params)
71
+ keyword_folder = target_path / "mot-clef" / keyword.slug
72
+ keyword_folder.mkdir(parents=True, exist_ok=True)
73
+ (keyword_folder / "index.html").write_text(content)
crieur/models.py ADDED
@@ -0,0 +1,112 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ from dataclass_wizard import DatePattern, DumpMeta, YAMLWizard
5
+ from slugify import slugify
6
+ from yaml.composer import ComposerError
7
+
8
+
9
+ @dataclass
10
+ class Numero(YAMLWizard):
11
+ _id: str
12
+ name: str
13
+ description: str
14
+ metadata: str
15
+ articles: list
16
+
17
+ def configure_articles(self, yaml_path):
18
+ # Preserves abstract_fr key (vs. abstract-fr) when converting to_yaml()
19
+ DumpMeta(key_transform="SNAKE").bind_to(Article)
20
+
21
+ loaded_articles = []
22
+ for article in self.articles:
23
+ article_folder = (
24
+ yaml_path.parent
25
+ / f"{article['article']['title']}-{article['article']['_id']}"
26
+ )
27
+ article_yaml_path = article_folder / f"{article['article']['title']}.yaml"
28
+ try:
29
+ loaded_article = Article.from_yaml_file(article_yaml_path)
30
+ except ComposerError:
31
+ loaded_article = Article.from_yaml(
32
+ article_yaml_path.read_text().split("---")[1]
33
+ )
34
+ loaded_article.content_md = (
35
+ article_folder / f"{article['article']['title']}.md"
36
+ ).read_text()
37
+ loaded_article.images_path = (
38
+ article_folder / "images"
39
+ if (article_folder / "images").exists()
40
+ else None
41
+ )
42
+ loaded_article.numero = self
43
+ loaded_articles.append(loaded_article)
44
+ self.articles = loaded_articles
45
+
46
+
47
+ @dataclass
48
+ class Article(YAMLWizard):
49
+ id: str
50
+ title: str
51
+ title_f: str
52
+ date: Optional[DatePattern["%Y/%m/%d"]] # noqa: F722
53
+ subtitle: str = ""
54
+ subtitle_f: str = ""
55
+ content_md: str = ""
56
+ authors: list = None
57
+ abstract: list = None
58
+ keywords: list = None
59
+
60
+
61
+ def configure_numero(yaml_path):
62
+ # Preserves abstract_fr key (vs. abstract-fr) when converting to_yaml()
63
+ DumpMeta(key_transform="SNAKE").bind_to(Numero)
64
+
65
+ try:
66
+ numero = Numero.from_yaml_file(yaml_path)
67
+ except ComposerError:
68
+ numero = Numero.from_yaml(yaml_path.read_text().split("---")[1])
69
+
70
+ numero.configure_articles(yaml_path)
71
+ return numero
72
+
73
+
74
+ @dataclass
75
+ class Keyword:
76
+ slug: str
77
+ name: str
78
+ articles: list
79
+
80
+ def __eq__(self, other):
81
+ return self.slug == other.slug
82
+
83
+ def __lt__(self, other: "Keyword"):
84
+ if not isinstance(other, Keyword):
85
+ return NotImplemented
86
+ len_self = len(self.articles)
87
+ len_other = len(other.articles)
88
+ if len_self == len_other:
89
+ return self.slug > other.slug
90
+ return len_self < len_other
91
+
92
+
93
+ def collect_keywords(numeros):
94
+ keywords = {}
95
+ for numero in numeros:
96
+ for article in numero.articles:
97
+ article_keywords = []
98
+ for kwds in article.keywords:
99
+ if kwds.get("list") and kwds.get("lang") == "fr": # TODO: en?
100
+ for keyword in kwds.get("list", "").split(", "):
101
+ keyword_slug = slugify(keyword)
102
+ if keyword_slug in keywords:
103
+ keywords[keyword_slug].articles.append(article)
104
+ kw = keywords[keyword_slug]
105
+ else:
106
+ kw = Keyword(
107
+ slug=keyword_slug, name=keyword, articles=[article]
108
+ )
109
+ keywords[keyword_slug] = kw
110
+ article_keywords.append(kw)
111
+ article.keywords = article_keywords
112
+ return dict(sorted(keywords.items(), key=lambda item: item[1], reverse=True))