crieur 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crieur/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from pathlib import Path
2
+
3
+ VERSION = "2.0.2"
4
+ ROOT_DIR = Path(__file__).parent
crieur/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .cli import main
2
+
3
+ main()
crieur/cli.py ADDED
@@ -0,0 +1,169 @@
1
+ import contextlib
2
+ import shutil
3
+ import socket
4
+ import zipfile
5
+ from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer, test
6
+ from pathlib import Path
7
+
8
+ import httpx
9
+ from minicli import cli, run
10
+
11
+ from . import VERSION
12
+ from .generator import generate_feed, generate_html
13
+ from .models import Settings, collect_authors, collect_keywords, configure_numero
14
+ from .utils import each_file_from, each_folder_from
15
+
16
+
17
+ @cli
18
+ def version():
19
+ """Return the current version."""
20
+ print(f"Crieur version: {VERSION}")
21
+
22
+
23
+ @cli
24
+ def generate(
25
+ title: str = "Crieur",
26
+ base_url: str = "/",
27
+ extra_vars: str = "",
28
+ target_path: Path = Path() / "public",
29
+ source_path: Path = Path() / "sources",
30
+ statics_path: Path = Path(__file__).parent / "statics",
31
+ templates_path: Path = Path(__file__).parent / "templates",
32
+ csl_path: Path = Path(__file__).parent / "styles" / "apa.csl",
33
+ without_statics: bool = False,
34
+ feed_limit: int = 10,
35
+ ):
36
+ """Generate a new revue website.
37
+
38
+ :title: Title of the website (default: Crieur).
39
+ :base_url: Base URL of the website, ending with / (default: /).
40
+ :extra_vars: stringified JSON extra vars passed to the templates.
41
+ :target_path: Path where site is built (default: /public/).
42
+ :source_path: Path where stylo source were downloaded (default: /sources/).
43
+ :statics_path: Path where statics are located (default: @crieur/statics/).
44
+ :template_path: Path where templates are located (default: @crieur/templates/).
45
+ :csl_path: Path to the CSL applied for bibliography (default: @crieur/styles/apa.csl).
46
+ :without_statics: Do not copy statics if True (default: False).
47
+ :feed_limit: Number of max items in the feed (default: 10).
48
+ """
49
+ settings = Settings(
50
+ title,
51
+ base_url,
52
+ extra_vars,
53
+ target_path,
54
+ source_path,
55
+ statics_path,
56
+ templates_path,
57
+ csl_path,
58
+ without_statics,
59
+ feed_limit,
60
+ )
61
+
62
+ numeros = []
63
+ for numero in each_folder_from(source_path):
64
+ for corpus_yaml in each_file_from(numero, pattern="*.yaml"):
65
+ numero = configure_numero(corpus_yaml, settings)
66
+ numeros.append(numero)
67
+
68
+ keywords = collect_keywords(numeros)
69
+ authors = collect_authors(numeros)
70
+ generate_html(numeros, keywords, authors, settings)
71
+ generate_feed(numeros, settings)
72
+
73
+ if not settings.without_statics:
74
+ target_statics_path = settings.target_path / "statics"
75
+ if not target_statics_path.exists():
76
+ target_statics_path.mkdir(parents=True, exist_ok=True)
77
+ shutil.copytree(settings.statics_path, target_statics_path, dirs_exist_ok=True)
78
+
79
+
80
+ @cli
81
+ def stylo(
82
+ *stylo_ids: str,
83
+ stylo_instance: str = "stylo.huma-num.fr",
84
+ stylo_export: str = "https://export.stylo.huma-num.fr",
85
+ force_download: bool = False,
86
+ ):
87
+ """Initialize a new revue to current directory from Stylo.
88
+
89
+ :stylo_ids: Corpus ids from Stylo, separated by commas.
90
+ :stylo_instance: Instance of Stylo (default: stylo.huma-num.fr).
91
+ :stylo_export: Stylo export URL (default: https://export.stylo.huma-num.fr).
92
+ :force_download: Force download of sources from Stylo (default: False).
93
+ """
94
+ print(
95
+ f"Initializing a new revue: `{stylo_ids}` from `{stylo_instance}` "
96
+ f"through export service `{stylo_export}`."
97
+ )
98
+
99
+ sources_path = Path() / "sources"
100
+ if not sources_path.exists():
101
+ Path.mkdir(sources_path)
102
+
103
+ for i, stylo_id in enumerate(stylo_ids):
104
+ zip_path = Path() / f"export-{i + 1}-{stylo_id}.zip"
105
+ if force_download or not zip_path.exists():
106
+ url = (
107
+ f"{stylo_export}/generique/corpus/export/"
108
+ f"{stylo_instance}/{stylo_id}/Extract-corpus/"
109
+ "?with_toc=0&with_ascii=0&with_link_citations=0&with_nocite=0"
110
+ "&version=&bibliography_style=chicagomodified&formats=originals"
111
+ )
112
+ print(f"Downloading data from {url} to {zip_path}")
113
+ with Path.open(zip_path, "wb") as fd:
114
+ with httpx.stream("GET", url, timeout=None) as r:
115
+ for data in r.iter_bytes():
116
+ fd.write(data)
117
+ else:
118
+ print(
119
+ f"Source already exists: `{zip_path}` (no download). "
120
+ "Use the `--force` option to download it again"
121
+ )
122
+
123
+ target_path = sources_path / f"{i + 1}-{stylo_id}"
124
+ try:
125
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
126
+ zip_ref.extractall(target_path)
127
+ print(f"Data extracted to {target_path}")
128
+ except zipfile.BadZipFile:
129
+ zip_problematic_path = Path() / f"problematic-export-{i + 1}-{stylo_id}.zip"
130
+ zip_path.rename(zip_problematic_path)
131
+ print(f"Unable to find corpus with id {stylo_id}!")
132
+ print(
133
+ f"Check out the content of {zip_problematic_path} to try to understand."
134
+ )
135
+ print(
136
+ "Either you use a wrong corpus id or there is an issue with the export."
137
+ )
138
+ return
139
+
140
+
141
+ @cli
142
+ def serve(repository_path: Path = Path(), port: int = 8000):
143
+ """Serve an HTML book from `repository_path`/public or current directory/public.
144
+
145
+ :repository_path: Absolute or relative path to book’s sources (default: current).
146
+ :port: Port to serve the book from (default=8000)
147
+ """
148
+ print(
149
+ f"Serving HTML book from `{repository_path}/public` to http://127.0.0.1:{port}"
150
+ )
151
+
152
+ # From https://github.com/python/cpython/blob/main/Lib/http/server.py#L1307-L1326
153
+ class DirectoryServer(ThreadingHTTPServer):
154
+ def server_bind(self):
155
+ # suppress exception when protocol is IPv4
156
+ with contextlib.suppress(Exception):
157
+ self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
158
+ return super().server_bind()
159
+
160
+ def finish_request(self, request, client_address):
161
+ self.RequestHandlerClass(
162
+ request, client_address, self, directory=str(repository_path / "public")
163
+ )
164
+
165
+ test(HandlerClass=SimpleHTTPRequestHandler, ServerClass=DirectoryServer, port=port)
166
+
167
+
168
+ def main():
169
+ run()
crieur/generator.py ADDED
@@ -0,0 +1,181 @@
1
+ import json
2
+ import locale
3
+ import shutil
4
+ from datetime import datetime, timedelta, timezone
5
+ from pathlib import Path
6
+
7
+ import mistune
8
+ from feedgen.feed import FeedGenerator
9
+ from jinja2 import Environment as Env
10
+ from jinja2 import FileSystemLoader
11
+ from slugify import slugify
12
+
13
+ from . import VERSION
14
+ from .typography import typographie
15
+ from .utils import neighborhood
16
+
17
+ for locale_ in ["fr_FR", "fr_FR.UTF-8", "fr_CA", "fr_CA.UTF-8"]:
18
+ try:
19
+ locale.setlocale(locale.LC_ALL, locale_)
20
+ break
21
+ except locale.Error:
22
+ continue
23
+ locale.setlocale(locale.LC_ALL, "")
24
+
25
+
26
+ mistune_plugins = [
27
+ "footnotes",
28
+ "superscript",
29
+ "table",
30
+ "crieur.plugins.inline_footnotes",
31
+ ]
32
+ md = mistune.create_markdown(plugins=mistune_plugins, escape=False)
33
+
34
+
35
+ def slugify_(value):
36
+ return slugify(value)
37
+
38
+
39
+ def markdown(value):
40
+ return md(value) if value else ""
41
+
42
+
43
+ def typography(value):
44
+ value = value.replace("\\ ", " ")
45
+ value = value.replace("'", "’")
46
+ return typographie(value) if value else ""
47
+
48
+
49
+ def pluralize(number, singular="", plural="s"):
50
+ if number == 1:
51
+ return singular
52
+ else:
53
+ return plural
54
+
55
+
56
+ def generate_html(numeros, keywords, authors, settings):
57
+ environment = Env(
58
+ loader=FileSystemLoader(
59
+ [str(settings.templates_path), str(Path(__file__).parent / "templates")]
60
+ )
61
+ )
62
+ environment.filters["slugify"] = slugify_
63
+ environment.filters["markdown"] = markdown
64
+ environment.filters["typography"] = typography
65
+ environment.filters["pluralize"] = pluralize
66
+
67
+ extra_vars = json.loads(settings.extra_vars) if settings.extra_vars else {}
68
+
69
+ common_params = {
70
+ "title": settings.title,
71
+ "base_url": settings.base_url,
72
+ "numeros": numeros,
73
+ "articles": sorted(
74
+ [article for numero in numeros for article in numero.articles], reverse=True
75
+ ),
76
+ "keywords": keywords,
77
+ "authors": authors,
78
+ "crieur_version": VERSION,
79
+ **extra_vars,
80
+ }
81
+
82
+ template_homepage = environment.get_template("homepage.html")
83
+ content = template_homepage.render(is_homepage=True, **common_params)
84
+ settings.target_path.mkdir(parents=True, exist_ok=True)
85
+ (settings.target_path / "index.html").write_text(content)
86
+
87
+ template_numeros = environment.get_template("numeros.html")
88
+ content = template_numeros.render(is_numeros=True, **common_params)
89
+ numeros_folder = settings.target_path / "numero"
90
+ numeros_folder.mkdir(parents=True, exist_ok=True)
91
+ (numeros_folder / "index.html").write_text(content)
92
+
93
+ template_blog = environment.get_template("blog.html")
94
+ content = template_blog.render(is_blog=True, **common_params)
95
+ blog_folder = settings.target_path / "blog"
96
+ blog_folder.mkdir(parents=True, exist_ok=True)
97
+ (blog_folder / "index.html").write_text(content)
98
+
99
+ for numero in numeros:
100
+ template_numero = environment.get_template("numero.html")
101
+ content = template_numero.render(numero=numero, **common_params)
102
+ numero_folder = settings.target_path / "numero" / numero.slug
103
+ numero_folder.mkdir(parents=True, exist_ok=True)
104
+ (numero_folder / "index.html").write_text(content)
105
+
106
+ template_article = environment.get_template("article.html")
107
+ for index, previous, article, next_ in neighborhood(numero.articles):
108
+ content = template_article.render(
109
+ article=article,
110
+ previous_situation=previous,
111
+ next_situation=next_,
112
+ **common_params,
113
+ )
114
+ article_folder = numero_folder / "article" / article.id
115
+ article_folder.mkdir(parents=True, exist_ok=True)
116
+ (article_folder / "index.html").write_text(content)
117
+ if article.images_path:
118
+ shutil.copytree(
119
+ article.images_path, article_folder / "images", dirs_exist_ok=True
120
+ )
121
+
122
+ template_keywords = environment.get_template("keywords.html")
123
+ content = template_keywords.render(is_keywords=True, **common_params)
124
+ keywords_folder = settings.target_path / "mot-clef"
125
+ keywords_folder.mkdir(parents=True, exist_ok=True)
126
+ (keywords_folder / "index.html").write_text(content)
127
+
128
+ for slug, keyword in keywords.items():
129
+ template_keyword = environment.get_template("keyword.html")
130
+ content = template_keyword.render(keyword=keyword, **common_params)
131
+ keyword_folder = settings.target_path / "mot-clef" / keyword.slug
132
+ keyword_folder.mkdir(parents=True, exist_ok=True)
133
+ (keyword_folder / "index.html").write_text(content)
134
+
135
+ template_authors = environment.get_template("authors.html")
136
+ content = template_authors.render(is_authors=True, **common_params)
137
+ authors_folder = settings.target_path / "auteur"
138
+ authors_folder.mkdir(parents=True, exist_ok=True)
139
+ (authors_folder / "index.html").write_text(content)
140
+
141
+ for slug, author in authors.items():
142
+ template_author = environment.get_template("author.html")
143
+ content = template_author.render(author=author, **common_params)
144
+ author_folder = settings.target_path / "auteur" / author.slug
145
+ author_folder.mkdir(parents=True, exist_ok=True)
146
+ (author_folder / "index.html").write_text(content)
147
+
148
+
149
+ def generate_feed(numeros, settings, lang="fr"):
150
+ feed = FeedGenerator()
151
+ feed.id(settings.base_url)
152
+ feed.title(settings.title)
153
+ feed.link(href=settings.base_url, rel="alternate")
154
+ feed.link(href=f"{settings.base_url}feed.xml", rel="self")
155
+ feed.language(lang)
156
+
157
+ articles = sorted(
158
+ [article for numero in numeros for article in numero.articles], reverse=True
159
+ )
160
+
161
+ for article in articles[: settings.feed_limit]:
162
+ feed_entry = feed.add_entry(order="append")
163
+ feed_entry.id(f"{settings.base_url}{article.url}")
164
+ feed_entry.title(article.title_f)
165
+ feed_entry.link(href=f"{settings.base_url}{article.url}")
166
+ feed_entry.updated(
167
+ datetime.combine(
168
+ article.date,
169
+ datetime.min.time(),
170
+ tzinfo=timezone(timedelta(hours=-4), "ET"),
171
+ )
172
+ )
173
+ for author in article.authors:
174
+ feed_entry.author(name=str(author))
175
+ feed_entry.summary(summary=article.content_html, type="html")
176
+ if article.keywords:
177
+ for keyword in article.keywords:
178
+ feed_entry.category(term=keyword.name)
179
+
180
+ feed.atom_file(settings.target_path / "feed.xml", pretty=True)
181
+ print(f"Generated meta-feed with {settings.feed_limit} items.")