lino 25.6.1__py3-none-any.whl → 25.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lino/__init__.py +1 -1
- lino/api/doctest.py +21 -0
- lino/core/actions.py +59 -25
- lino/core/actors.py +38 -16
- lino/core/boundaction.py +16 -0
- lino/core/choicelists.py +7 -7
- lino/core/constants.py +3 -0
- lino/core/dashboard.py +1 -0
- lino/core/dbtables.py +1 -1
- lino/core/elems.py +38 -13
- lino/core/fields.py +20 -11
- lino/core/kernel.py +8 -0
- lino/core/layouts.py +6 -2
- lino/core/menus.py +3 -6
- lino/core/model.py +5 -4
- lino/core/renderer.py +14 -5
- lino/core/requests.py +8 -7
- lino/core/signals.py +1 -0
- lino/core/site.py +48 -28
- lino/core/store.py +4 -2
- lino/core/tables.py +23 -10
- lino/core/utils.py +4 -1
- lino/core/workflows.py +2 -1
- lino/help_texts.py +1 -2
- lino/management/commands/prep.py +2 -2
- lino/management/commands/show.py +8 -10
- lino/mixins/__init__.py +14 -13
- lino/mixins/periods.py +2 -0
- lino/mixins/sequenced.py +1 -1
- lino/modlib/about/models.py +4 -3
- lino/modlib/checkdata/__init__.py +42 -36
- lino/modlib/checkdata/choicelists.py +9 -1
- lino/modlib/checkdata/fixtures/checkdata.py +4 -2
- lino/modlib/checkdata/models.py +9 -2
- lino/modlib/comments/models.py +4 -3
- lino/modlib/extjs/ext_renderer.py +4 -4
- lino/modlib/extjs/views.py +8 -2
- lino/modlib/gfks/fields.py +1 -1
- lino/modlib/help/__init__.py +3 -3
- lino/modlib/help/config/makehelp/conf.tpl.py +2 -2
- lino/modlib/help/fixtures/demo2.py +6 -1
- lino/modlib/help/management/commands/makehelp.py +4 -1
- lino/modlib/help/models.py +2 -1
- lino/modlib/help/utils.py +12 -6
- lino/modlib/linod/choicelists.py +57 -4
- lino/modlib/linod/fixtures/{linod.py → checkdata.py} +3 -13
- lino/modlib/linod/management/commands/linod.py +0 -13
- lino/modlib/linod/mixins.py +8 -0
- lino/modlib/linod/models.py +29 -30
- lino/modlib/memo/__init__.py +7 -7
- lino/modlib/memo/management/__init__,py +0 -0
- lino/modlib/memo/management/commands/__init__.py +0 -0
- lino/modlib/memo/management/commands/removeurls.py +67 -0
- lino/modlib/memo/mixins.py +1 -9
- lino/modlib/memo/parser.py +1 -1
- lino/modlib/notify/config/notify/summary.eml +5 -2
- lino/modlib/notify/fixtures/demo2.py +5 -6
- lino/modlib/notify/models.py +9 -10
- lino/modlib/periods/__init__.py +11 -8
- lino/modlib/periods/choicelists.py +16 -10
- lino/modlib/periods/models.py +45 -45
- lino/modlib/summaries/fixtures/checksummaries.py +4 -2
- lino/modlib/system/models.py +17 -18
- lino/modlib/uploads/fixtures/demo.py +9 -3
- lino/modlib/uploads/mixins.py +5 -2
- lino/modlib/uploads/models.py +15 -9
- lino/modlib/uploads/utils.py +4 -1
- lino/modlib/users/__init__.py +59 -18
- lino/modlib/users/actions.py +24 -20
- lino/modlib/users/fixtures/demo_users.py +2 -35
- lino/modlib/users/mixins.py +3 -4
- lino/modlib/users/models.py +53 -13
- lino/modlib/users/ui.py +30 -16
- lino/modlib/users/utils.py +5 -6
- lino/projects/std/settings.py +1 -1
- lino/sphinxcontrib/logo/templates/footer.html +1 -0
- lino/utils/ajax.py +1 -1
- lino/utils/cycler.py +5 -0
- lino/utils/dbhash.py +4 -9
- lino/utils/dpy.py +2 -2
- lino/utils/format_date.py +4 -3
- lino/utils/html.py +13 -5
- lino/utils/jsgen.py +1 -1
- lino/utils/quantities.py +8 -0
- lino/utils/soup.py +75 -106
- {lino-25.6.1.dist-info → lino-25.7.0.dist-info}/METADATA +1 -1
- {lino-25.6.1.dist-info → lino-25.7.0.dist-info}/RECORD +90 -87
- {lino-25.6.1.dist-info → lino-25.7.0.dist-info}/WHEEL +0 -0
- {lino-25.6.1.dist-info → lino-25.7.0.dist-info}/licenses/AUTHORS.rst +0 -0
- {lino-25.6.1.dist-info → lino-25.7.0.dist-info}/licenses/COPYING +0 -0
lino/utils/soup.py
CHANGED
@@ -2,14 +2,45 @@
|
|
2
2
|
# Copyright 2016-2025 Rumma & Ko Ltd
|
3
3
|
# License: GNU Affero General Public License v3 (see file COPYING for details)
|
4
4
|
|
5
|
-
# See https://dev.lino-framework.org/
|
5
|
+
# See https://dev.lino-framework.org/src/lino/utils/soup.html
|
6
|
+
|
7
|
+
# Inspired by
|
8
|
+
# https://chase-seibert.github.io/blog/2011/01/28/sanitize-html-with-beautiful-soup.html
|
9
|
+
# https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url
|
10
|
+
# https://www.geeksforgeeks.org/python-check-url-string/
|
11
|
+
|
12
|
+
# TODO: Explain why we don't use Django's Truncator instead of this.
|
13
|
+
# from django.utils.text import Truncator
|
14
|
+
# def truncate_comment(html_str, max_length=300):
|
15
|
+
# return Truncator(html_str).chars(max_length, html=True)
|
16
|
+
|
6
17
|
|
7
18
|
import re
|
19
|
+
from html import escape
|
20
|
+
from urllib.parse import urlparse
|
8
21
|
from bs4 import BeautifulSoup, NavigableString, Comment, Doctype
|
9
|
-
from
|
10
|
-
|
11
|
-
|
12
|
-
|
22
|
+
from django.conf import settings
|
23
|
+
|
24
|
+
MORE_INDICATOR = "..."
|
25
|
+
|
26
|
+
URL_REGEX = re.compile(
|
27
|
+
r'([^"]|^)(https?:\/\/)((www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*))'
|
28
|
+
)
|
29
|
+
|
30
|
+
# URL_REGEX = re.compile(r'([^"])(https?://\S+|www\.\S+)')
|
31
|
+
|
32
|
+
|
33
|
+
def urlrepl(match):
|
34
|
+
url = match[2] + match[3]
|
35
|
+
# raise Exception(repr(url))
|
36
|
+
parsed = urlparse(url)
|
37
|
+
if parsed.scheme and parsed.netloc:
|
38
|
+
return match[1] + f'<a href="{url}" target="_blank">{match[3]}</a>'
|
39
|
+
return match[0]
|
40
|
+
|
41
|
+
|
42
|
+
def url2a(s):
|
43
|
+
return URL_REGEX.sub(urlrepl, s)
|
13
44
|
|
14
45
|
|
15
46
|
PARAGRAPH_TAGS = {
|
@@ -138,6 +169,8 @@ class TextCollector:
|
|
138
169
|
# assert ch.name != "IMG"
|
139
170
|
we_want_more = True
|
140
171
|
|
172
|
+
# print(f"20250207c add_chunk {ch.__class__} {ch}")
|
173
|
+
|
141
174
|
# Ignore all images except the first one. And for the first one we
|
142
175
|
# enforce our style.
|
143
176
|
if ch.name == "img":
|
@@ -147,7 +180,7 @@ class TextCollector:
|
|
147
180
|
return True
|
148
181
|
self.found_image = True
|
149
182
|
style = Style(ch.get("style", None))
|
150
|
-
if
|
183
|
+
if "float" not in style:
|
151
184
|
style["float"] = "right"
|
152
185
|
style.adjust_size()
|
153
186
|
if style.is_dirty:
|
@@ -156,45 +189,21 @@ class TextCollector:
|
|
156
189
|
|
157
190
|
elif ch.string is not None:
|
158
191
|
text = ch.string
|
192
|
+
if self.sep == "" and self.text == "":
|
193
|
+
text = text.lstrip()
|
159
194
|
strlen = len(text)
|
160
|
-
# print(f"20250208b add_chunk {repr(ch)} len={strlen} remaining={self.remaining}")
|
161
|
-
# chop = self.remaining
|
162
195
|
if strlen > self.remaining:
|
163
196
|
we_want_more = False
|
164
|
-
|
165
|
-
end_text = text[:self.remaining] + "..."
|
197
|
+
text = text[:self.remaining] + MORE_INDICATOR
|
166
198
|
# raise Exception(f"20250208 {strlen} > {self.remaining} {end_text}")
|
167
|
-
if isinstance(ch, NavigableString):
|
168
|
-
# ch = NavigableString(end_text)
|
169
|
-
ch = end_text
|
170
|
-
else:
|
171
|
-
ch.string.replace_with(end_text)
|
172
|
-
# # ch = NavigableString(ch.string[:chop] + "...")
|
173
|
-
# # self.text += self.sep + ch.string
|
174
|
-
# self.text += self.sep + end_text
|
175
|
-
# return False
|
176
|
-
# p = ch.string.parent
|
177
|
-
# previous_sibling = ch.previous_sibling
|
178
|
-
# ch = NavigableString(end_text)
|
179
|
-
# ch = previous_sibling.next_sibling
|
180
|
-
# raise Exception(f"20250208 Old {p} and new parent {ch.parent}")
|
181
|
-
# if isinstance(ch, NavigableString):
|
182
|
-
# ch.replace_with(end_text)
|
183
|
-
# else:
|
184
|
-
# ch.string.replace_with(end_text)
|
185
|
-
# self.text += self.sep + str(ch)
|
186
|
-
# for c in ch.children:
|
187
|
-
# self.add_chunk(c)
|
188
|
-
# return False
|
189
|
-
# raise Exception(f"20250208 {end_text} -- {ch}")
|
190
|
-
# print(f"20250208c {repr(end_text)} in {ch}")
|
191
|
-
# print("20230927", ch.string, ch)
|
192
|
-
# self.text += str(ch.string) + "..."
|
193
|
-
# self.remaining = 0
|
194
|
-
# return True
|
195
|
-
# return we_want_more
|
196
199
|
self.remaining -= strlen
|
197
|
-
# print(f"
|
200
|
+
# print(f"20250606 {text} becomes {escape(text, quote=False)}")
|
201
|
+
text = escape(text, quote=False)
|
202
|
+
if isinstance(ch, NavigableString):
|
203
|
+
# ch = NavigableString(end_text)
|
204
|
+
ch = text
|
205
|
+
else:
|
206
|
+
ch.string.replace_with(text)
|
198
207
|
|
199
208
|
# if isinstance(ch, NavigableString):
|
200
209
|
# self.text += self.sep + ch.string
|
@@ -207,33 +216,6 @@ class TextCollector:
|
|
207
216
|
return we_want_more
|
208
217
|
|
209
218
|
|
210
|
-
def truncate_comment(html_str, max_length=300):
|
211
|
-
# Returns a single paragraph with a maximum number of visible chars.
|
212
|
-
# new implementation since 20230713
|
213
|
-
html_str = html_str.strip() # remove leading or trailing newlines
|
214
|
-
|
215
|
-
if False: # no longer need to test for specil case
|
216
|
-
if not html_str.startswith("<"):
|
217
|
-
# print("20231023 c", html_str)
|
218
|
-
if len(html_str) > max_length:
|
219
|
-
return html_str[:max_length] + "..."
|
220
|
-
return html_str
|
221
|
-
|
222
|
-
# if "choose one or the other" in html_str:
|
223
|
-
# print(html_str)
|
224
|
-
# raise Exception("20230928 {} {}".format(len(html_str), max_length))
|
225
|
-
|
226
|
-
# soup = BeautifulSoup(html_str, features="html.parser")
|
227
|
-
soup = BeautifulSoup(html_str, features="lxml")
|
228
|
-
# soup = sanitized_soup(html_str)
|
229
|
-
# truncate_soup(soup, max_length)
|
230
|
-
# return str(soup)
|
231
|
-
# return "".join([str(s) for s in walk(soup, max_length)])
|
232
|
-
tc = TextCollector(max_length)
|
233
|
-
tc.add_chunk(soup)
|
234
|
-
return tc.text
|
235
|
-
|
236
|
-
|
237
219
|
# remove these tags including their content.
|
238
220
|
blacklist = frozenset(["script", "style", "head"])
|
239
221
|
|
@@ -271,7 +253,7 @@ GENERALLY_ALLOWED_ATTRS = {"title", "style", "class"}
|
|
271
253
|
|
272
254
|
# Map of allowed attributes by tag. Originally copied from bleach.sanitizer.
|
273
255
|
ALLOWED_ATTRIBUTES = {
|
274
|
-
"a": {"href"} | GENERALLY_ALLOWED_ATTRS,
|
256
|
+
"a": {"href", "target"} | GENERALLY_ALLOWED_ATTRS,
|
275
257
|
"img": {"src", "alt"} | GENERALLY_ALLOWED_ATTRS,
|
276
258
|
}
|
277
259
|
|
@@ -292,16 +274,18 @@ ALLOWED_ATTRIBUTES["p"] = GENERALLY_ALLOWED_ATTRS | {"align"}
|
|
292
274
|
# return css
|
293
275
|
|
294
276
|
|
295
|
-
|
277
|
+
SANITIZERS = []
|
296
278
|
|
297
|
-
# Inspired by https://chase-seibert.github.io/blog/2011/01/28/sanitize-html-with-beautiful-soup.html
|
298
279
|
|
299
|
-
|
300
|
-
|
301
|
-
except HTMLParseError as e:
|
302
|
-
logger.warning("Could not sanitize %r : %s", old, e)
|
303
|
-
return f"Could not sanitize content ({e})"
|
280
|
+
def register_sanitizer(func):
|
281
|
+
SANITIZERS.append(func)
|
304
282
|
|
283
|
+
|
284
|
+
def sanitized_soup(htmlstr):
|
285
|
+
if not htmlstr.startswith("<"):
|
286
|
+
htmlstr = f"<p>{htmlstr}</p>"
|
287
|
+
htmlstr = url2a(htmlstr)
|
288
|
+
soup = BeautifulSoup(htmlstr, features="lxml")
|
305
289
|
for tag in soup.find_all():
|
306
290
|
# print(tag)
|
307
291
|
tag_name = tag.name.lower()
|
@@ -324,21 +308,8 @@ def sanitized_soup(old):
|
|
324
308
|
tag.name = "span"
|
325
309
|
tag.attrs = dict()
|
326
310
|
|
327
|
-
links = soup.find_all('p', string=(expr := re.compile(
|
328
|
-
r"(?P<url>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*))"
|
329
|
-
)))
|
330
|
-
if links:
|
331
|
-
for link in links:
|
332
|
-
m = re.search(expr, link.text)
|
333
|
-
url = m['url']
|
334
|
-
old_link = str(link).replace(url, "<a href=\"{0}\">{0}</a>".format(url))
|
335
|
-
link_soup = BeautifulSoup(old_link, features="lxml")
|
336
|
-
link.replaceWith(link_soup.p)
|
337
|
-
|
338
|
-
|
339
311
|
# remove all comments because they might contain scripts
|
340
|
-
comments = soup.find_all(
|
341
|
-
text=lambda text: isinstance(text, (Comment, Doctype)))
|
312
|
+
comments = soup.find_all(text=lambda t: isinstance(t, (Comment, Doctype)))
|
342
313
|
for comment in comments:
|
343
314
|
comment.extract()
|
344
315
|
|
@@ -351,24 +322,22 @@ def sanitized_soup(old):
|
|
351
322
|
return soup
|
352
323
|
|
353
324
|
|
354
|
-
def sanitize(
|
355
|
-
|
356
|
-
if
|
357
|
-
return
|
358
|
-
|
359
|
-
soup = sanitized_soup(s)
|
360
|
-
|
325
|
+
def sanitize(htmlstr, save=False, ar=None):
|
326
|
+
htmlstr = htmlstr.strip()
|
327
|
+
if htmlstr == "":
|
328
|
+
return htmlstr
|
329
|
+
soup = sanitized_soup(htmlstr)
|
361
330
|
for func in SANITIZERS:
|
362
|
-
func(soup,
|
363
|
-
|
364
|
-
# do we want to remove whitespace between tags?
|
365
|
-
# s = re.sub(">\s+<", "><", s)
|
366
|
-
# return sanitized_soup(s).decode(formatter="html").strip()
|
331
|
+
func(soup, save=save, ar=ar)
|
367
332
|
return str(soup).strip()
|
368
333
|
|
369
334
|
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
335
|
+
def truncate_comment(htmlstr, max_length=300):
|
336
|
+
# new implementation since 20230713
|
337
|
+
htmlstr = htmlstr.strip() # remove leading or trailing newlines
|
338
|
+
if htmlstr == '':
|
339
|
+
return htmlstr
|
340
|
+
soup = sanitized_soup(htmlstr)
|
341
|
+
tc = TextCollector(max_length)
|
342
|
+
tc.add_chunk(soup)
|
343
|
+
return tc.text.strip()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: lino
|
3
|
-
Version: 25.
|
3
|
+
Version: 25.7.0
|
4
4
|
Summary: A framework for writing desktop-like web applications using Django and ExtJS or React
|
5
5
|
Project-URL: Homepage, https://www.lino-framework.org
|
6
6
|
Project-URL: Repository, https://gitlab.com/lino-framework/lino
|