pdoc 14.5.1__py3-none-any.whl → 14.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdoc/__init__.py +7 -1
- pdoc/__main__.py +2 -2
- pdoc/_compat.py +11 -0
- pdoc/doc.py +40 -16
- pdoc/doc_ast.py +2 -2
- pdoc/doc_pyi.py +22 -6
- pdoc/docstrings.py +1 -1
- pdoc/extract.py +1 -1
- pdoc/markdown2/__init__.py +1667 -765
- pdoc/render_helpers.py +76 -38
- pdoc/templates/content.css +10 -6
- pdoc/templates/default/module.html.jinja2 +1 -1
- pdoc-14.6.1.dist-info/LICENSE +5 -0
- {pdoc-14.5.1.dist-info → pdoc-14.6.1.dist-info}/METADATA +2 -2
- {pdoc-14.5.1.dist-info → pdoc-14.6.1.dist-info}/RECORD +18 -18
- {pdoc-14.5.1.dist-info → pdoc-14.6.1.dist-info}/WHEEL +1 -1
- pdoc-14.5.1.dist-info/LICENSE +0 -24
- {pdoc-14.5.1.dist-info → pdoc-14.6.1.dist-info}/entry_points.txt +0 -0
- {pdoc-14.5.1.dist-info → pdoc-14.6.1.dist-info}/top_level.txt +0 -0
pdoc/markdown2/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# fmt: off
|
2
2
|
# flake8: noqa
|
3
3
|
# type: ignore
|
4
|
-
# Taken from here: https://github.com/trentm/python-markdown2/blob/
|
4
|
+
# Taken from here: https://github.com/trentm/python-markdown2/blob/8d3a65bc7d4f8b64af89f668eb6c60841dc0578c/lib/markdown2.py
|
5
5
|
|
6
6
|
#!/usr/bin/env python
|
7
7
|
# Copyright (c) 2012 Trent Mick.
|
@@ -46,7 +46,11 @@ Supported extra syntax options (see -x|--extras option below and
|
|
46
46
|
see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
|
47
47
|
|
48
48
|
* admonitions: Enable parsing of RST admonitions.
|
49
|
-
*
|
49
|
+
* breaks: Control where hard breaks are inserted in the markdown.
|
50
|
+
Options include:
|
51
|
+
- on_newline: Replace single new line characters with <br> when True
|
52
|
+
- on_backslash: Replace backslashes at the end of a line with <br>
|
53
|
+
* break-on-newline: Alias for the on_newline option in the breaks extra.
|
50
54
|
* code-friendly: Disable _ and __ for em and strong.
|
51
55
|
* cuddled-lists: Allow lists to be cuddled to the preceding paragraph.
|
52
56
|
* fenced-code-blocks: Allows a code block to not have to be indented
|
@@ -71,6 +75,9 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
|
|
71
75
|
some limitations.
|
72
76
|
* metadata: Extract metadata from a leading '---'-fenced block.
|
73
77
|
See <https://github.com/trentm/python-markdown2/issues/77> for details.
|
78
|
+
* middle-word-em: Allows or disallows emphasis syntax in the middle of words,
|
79
|
+
defaulting to allow. Disabling this means that `this_text_here` will not be
|
80
|
+
converted to `this<em>text</em>here`.
|
74
81
|
* nofollow: Add `rel="nofollow"` to add `<a>` tags with an href. See
|
75
82
|
<http://en.wikipedia.org/wiki/Nofollow>.
|
76
83
|
* numbering: Support of generic counters. Non standard extension to
|
@@ -104,7 +111,7 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
|
|
104
111
|
# not yet sure if there implications with this. Compare 'pydoc sre'
|
105
112
|
# and 'perldoc perlre'.
|
106
113
|
|
107
|
-
__version_info__ = (2,
|
114
|
+
__version_info__ = (2, 5, 1)
|
108
115
|
__version__ = '.'.join(map(str, __version_info__))
|
109
116
|
__author__ = "Trent Mick"
|
110
117
|
|
@@ -113,9 +120,24 @@ import codecs
|
|
113
120
|
import logging
|
114
121
|
import re
|
115
122
|
import sys
|
116
|
-
from collections import defaultdict
|
123
|
+
from collections import defaultdict, OrderedDict
|
124
|
+
from abc import ABC, abstractmethod
|
125
|
+
import functools
|
117
126
|
from hashlib import sha256
|
118
127
|
from random import randint, random
|
128
|
+
from typing import Any, Callable, Collection, Dict, List, Literal, Optional, Tuple, Type, TypedDict, Union
|
129
|
+
from enum import IntEnum, auto
|
130
|
+
|
131
|
+
if sys.version_info[1] < 9:
|
132
|
+
from typing import Iterable
|
133
|
+
else:
|
134
|
+
from collections.abc import Iterable
|
135
|
+
|
136
|
+
# ---- type defs
|
137
|
+
_safe_mode = Literal['replace', 'escape']
|
138
|
+
_extras_dict = Dict[str, Any]
|
139
|
+
_extras_param = Union[List[str], _extras_dict]
|
140
|
+
_link_patterns = Iterable[Tuple[re.Pattern, Union[str, Callable[[re.Match], str]]]]
|
119
141
|
|
120
142
|
# ---- globals
|
121
143
|
|
@@ -128,7 +150,7 @@ DEFAULT_TAB_WIDTH = 4
|
|
128
150
|
SECRET_SALT = bytes(randint(0, 1000000))
|
129
151
|
# MD5 function was previously used for this; the "md5" prefix was kept for
|
130
152
|
# backwards compatibility.
|
131
|
-
def _hash_text(s):
|
153
|
+
def _hash_text(s: str) -> str:
|
132
154
|
return 'md5-' + sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:]
|
133
155
|
|
134
156
|
# Table of hash values for escaped characters:
|
@@ -147,11 +169,18 @@ class MarkdownError(Exception):
|
|
147
169
|
|
148
170
|
# ---- public api
|
149
171
|
|
150
|
-
def markdown_path(
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
172
|
+
def markdown_path(
|
173
|
+
path: str,
|
174
|
+
encoding: str = "utf-8",
|
175
|
+
html4tags: bool = False,
|
176
|
+
tab_width: int = DEFAULT_TAB_WIDTH,
|
177
|
+
safe_mode: Optional[_safe_mode] = None,
|
178
|
+
extras: Optional[_extras_param] = None,
|
179
|
+
link_patterns: Optional[_link_patterns] = None,
|
180
|
+
footnote_title: Optional[str] = None,
|
181
|
+
footnote_return_symbol: Optional[str] = None,
|
182
|
+
use_file_vars: bool = False
|
183
|
+
) -> 'UnicodeWithAttrs':
|
155
184
|
fp = codecs.open(path, 'r', encoding)
|
156
185
|
text = fp.read()
|
157
186
|
fp.close()
|
@@ -163,10 +192,18 @@ def markdown_path(path, encoding="utf-8",
|
|
163
192
|
use_file_vars=use_file_vars).convert(text)
|
164
193
|
|
165
194
|
|
166
|
-
def markdown(
|
167
|
-
|
168
|
-
|
169
|
-
|
195
|
+
def markdown(
|
196
|
+
text: str,
|
197
|
+
html4tags: bool = False,
|
198
|
+
tab_width: int = DEFAULT_TAB_WIDTH,
|
199
|
+
safe_mode: Optional[_safe_mode] = None,
|
200
|
+
extras: Optional[_extras_param] = None,
|
201
|
+
link_patterns: Optional[_link_patterns] = None,
|
202
|
+
footnote_title: Optional[str] = None,
|
203
|
+
footnote_return_symbol: Optional[str] =None,
|
204
|
+
use_file_vars: bool = False,
|
205
|
+
cli: bool = False
|
206
|
+
) -> 'UnicodeWithAttrs':
|
170
207
|
return Markdown(html4tags=html4tags, tab_width=tab_width,
|
171
208
|
safe_mode=safe_mode, extras=extras,
|
172
209
|
link_patterns=link_patterns,
|
@@ -175,6 +212,66 @@ def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
|
|
175
212
|
use_file_vars=use_file_vars, cli=cli).convert(text)
|
176
213
|
|
177
214
|
|
215
|
+
class Stage(IntEnum):
|
216
|
+
PREPROCESS = auto()
|
217
|
+
HASH_HTML = auto()
|
218
|
+
LINK_DEFS = auto()
|
219
|
+
|
220
|
+
BLOCK_GAMUT = auto()
|
221
|
+
HEADERS = auto()
|
222
|
+
LISTS = auto()
|
223
|
+
CODE_BLOCKS = auto()
|
224
|
+
BLOCK_QUOTES = auto()
|
225
|
+
PARAGRAPHS = auto()
|
226
|
+
|
227
|
+
SPAN_GAMUT = auto()
|
228
|
+
CODE_SPANS = auto()
|
229
|
+
ESCAPE_SPECIAL = auto()
|
230
|
+
LINKS = auto() # and auto links
|
231
|
+
ITALIC_AND_BOLD = auto()
|
232
|
+
|
233
|
+
POSTPROCESS = auto()
|
234
|
+
UNHASH_HTML = auto()
|
235
|
+
|
236
|
+
|
237
|
+
def mark_stage(stage: Stage):
|
238
|
+
'''
|
239
|
+
Decorator that handles executing relevant `Extra`s before and after this `Stage` executes.
|
240
|
+
'''
|
241
|
+
def wrapper(func):
|
242
|
+
@functools.wraps(func)
|
243
|
+
def inner(md: 'Markdown', text, *args, **kwargs):
|
244
|
+
md.stage = stage
|
245
|
+
# set "order" prop so extras can tell if they're being invoked before/after the stage
|
246
|
+
md.order = stage - 0.5
|
247
|
+
|
248
|
+
if stage in Extra._exec_order:
|
249
|
+
for klass in Extra._exec_order[stage][0]:
|
250
|
+
if klass.name not in md.extra_classes:
|
251
|
+
continue
|
252
|
+
extra = md.extra_classes[klass.name]
|
253
|
+
if extra.test(text):
|
254
|
+
text = extra.run(text)
|
255
|
+
|
256
|
+
md.order = stage
|
257
|
+
text = func(md, text, *args, **kwargs)
|
258
|
+
md.order = stage + 0.5
|
259
|
+
|
260
|
+
if stage in Extra._exec_order:
|
261
|
+
for klass in Extra._exec_order[stage][1]:
|
262
|
+
if klass.name not in md.extra_classes:
|
263
|
+
continue
|
264
|
+
extra = md.extra_classes[klass.name]
|
265
|
+
if extra.test(text):
|
266
|
+
text = extra.run(text)
|
267
|
+
|
268
|
+
return text
|
269
|
+
|
270
|
+
return inner
|
271
|
+
|
272
|
+
return wrapper
|
273
|
+
|
274
|
+
|
178
275
|
class Markdown(object):
|
179
276
|
# The dict of "extras" to enable in processing -- a mapping of
|
180
277
|
# extra name to argument for the extra. Most extras do not have an
|
@@ -182,27 +279,47 @@ class Markdown(object):
|
|
182
279
|
#
|
183
280
|
# This can be set via (a) subclassing and (b) the constructor
|
184
281
|
# "extras" argument.
|
185
|
-
extras
|
282
|
+
extras: _extras_dict
|
283
|
+
# dict of `Extra` names and associated class instances, populated during _setup_extras
|
284
|
+
extra_classes: Dict[str, 'Extra']
|
186
285
|
|
187
|
-
urls
|
188
|
-
titles
|
189
|
-
html_blocks
|
190
|
-
html_spans
|
191
|
-
html_removed_text = "{(#HTML#)}" # placeholder removed text that does not trigger bold
|
192
|
-
html_removed_text_compat = "[HTML_REMOVED]" # for compat with markdown.py
|
286
|
+
urls: Dict[str, str]
|
287
|
+
titles: Dict[str, str]
|
288
|
+
html_blocks: Dict[str, str]
|
289
|
+
html_spans: Dict[str, str]
|
290
|
+
html_removed_text: str = "{(#HTML#)}" # placeholder removed text that does not trigger bold
|
291
|
+
html_removed_text_compat: str = "[HTML_REMOVED]" # for compat with markdown.py
|
292
|
+
safe_mode: Optional[_safe_mode]
|
193
293
|
|
194
|
-
_toc
|
294
|
+
_toc: List[Tuple[int, str, str]]
|
195
295
|
|
196
296
|
# Used to track when we're inside an ordered or unordered list
|
197
297
|
# (see _ProcessListItems() for details):
|
198
298
|
list_level = 0
|
199
299
|
|
300
|
+
stage: Stage
|
301
|
+
'''Current "stage" of markdown conversion taking place'''
|
302
|
+
order: float
|
303
|
+
'''
|
304
|
+
Same as `Stage` but will be +/- 0.5 of the value of `Stage`.
|
305
|
+
This allows extras to check if they are running before or after a particular stage
|
306
|
+
with `if md.order < md.stage`.
|
307
|
+
'''
|
308
|
+
|
200
309
|
_ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
|
201
310
|
|
202
|
-
def __init__(
|
203
|
-
|
204
|
-
|
205
|
-
|
311
|
+
def __init__(
|
312
|
+
self,
|
313
|
+
html4tags: bool = False,
|
314
|
+
tab_width: int = DEFAULT_TAB_WIDTH,
|
315
|
+
safe_mode: Optional[_safe_mode] = None,
|
316
|
+
extras: Optional[_extras_param] = None,
|
317
|
+
link_patterns: Optional[_link_patterns] = None,
|
318
|
+
footnote_title: Optional[str] = None,
|
319
|
+
footnote_return_symbol: Optional[str] = None,
|
320
|
+
use_file_vars: bool = False,
|
321
|
+
cli: bool = False
|
322
|
+
):
|
206
323
|
if html4tags:
|
207
324
|
self.empty_element_suffix = ">"
|
208
325
|
else:
|
@@ -219,10 +336,13 @@ class Markdown(object):
|
|
219
336
|
self.safe_mode = safe_mode
|
220
337
|
|
221
338
|
# Massaging and building the "extras" info.
|
222
|
-
if self
|
339
|
+
if getattr(self, 'extras', None) is None:
|
223
340
|
self.extras = {}
|
224
341
|
elif not isinstance(self.extras, dict):
|
225
|
-
|
342
|
+
# inheriting classes may set `self.extras` as List[str].
|
343
|
+
# we can't allow it through type hints but we can convert it
|
344
|
+
self.extras = dict([(e, None) for e in self.extras]) # type:ignore
|
345
|
+
|
226
346
|
if extras:
|
227
347
|
if not isinstance(extras, dict):
|
228
348
|
extras = dict([(e, None) for e in extras])
|
@@ -237,14 +357,30 @@ class Markdown(object):
|
|
237
357
|
self._toc_depth = 6
|
238
358
|
else:
|
239
359
|
self._toc_depth = self.extras["toc"].get("depth", 6)
|
240
|
-
|
360
|
+
|
361
|
+
if 'header-ids' in self.extras:
|
362
|
+
if not isinstance(self.extras['header-ids'], dict):
|
363
|
+
self.extras['header-ids'] = {
|
364
|
+
'mixed': False,
|
365
|
+
'prefix': self.extras['header-ids'],
|
366
|
+
'reset-count': True
|
367
|
+
}
|
368
|
+
|
369
|
+
if 'break-on-newline' in self.extras:
|
370
|
+
self.extras.setdefault('breaks', {})
|
371
|
+
self.extras['breaks']['on_newline'] = True
|
241
372
|
|
242
373
|
if 'link-patterns' in self.extras:
|
374
|
+
# allow link patterns via extras dict without kwarg explicitly set
|
375
|
+
link_patterns = link_patterns or self.extras['link-patterns']
|
243
376
|
if link_patterns is None:
|
244
377
|
# if you have specified that the link-patterns extra SHOULD
|
245
378
|
# be used (via self.extras) but you haven't provided anything
|
246
379
|
# via the link_patterns argument then an error is raised
|
247
380
|
raise MarkdownError("If the 'link-patterns' extra is used, an argument for 'link_patterns' is required")
|
381
|
+
self.extras['link-patterns'] = link_patterns
|
382
|
+
|
383
|
+
self._instance_extras = self.extras.copy()
|
248
384
|
self.link_patterns = link_patterns
|
249
385
|
self.footnote_title = footnote_title
|
250
386
|
self.footnote_return_symbol = footnote_return_symbol
|
@@ -266,16 +402,25 @@ class Markdown(object):
|
|
266
402
|
self.list_level = 0
|
267
403
|
self.extras = self._instance_extras.copy()
|
268
404
|
self._setup_extras()
|
269
|
-
self._toc =
|
405
|
+
self._toc = []
|
270
406
|
|
271
407
|
def _setup_extras(self):
|
272
408
|
if "footnotes" in self.extras:
|
273
|
-
|
409
|
+
# order of insertion matters for footnotes. Use ordered dict for Python < 3.7
|
410
|
+
# https://docs.python.org/3/whatsnew/3.7.html#summary-release-highlights
|
411
|
+
self.footnotes = OrderedDict()
|
274
412
|
self.footnote_ids = []
|
275
413
|
if "header-ids" in self.extras:
|
276
|
-
self
|
414
|
+
if not hasattr(self, '_count_from_header_id') or self.extras['header-ids'].get('reset-count', False):
|
415
|
+
self._count_from_header_id = defaultdict(int)
|
277
416
|
if "metadata" in self.extras:
|
278
|
-
self.metadata = {}
|
417
|
+
self.metadata: Dict[str, Any] = {}
|
418
|
+
|
419
|
+
self.extra_classes = {}
|
420
|
+
for name, klass in Extra._registry.items():
|
421
|
+
if name not in self.extras:
|
422
|
+
continue
|
423
|
+
self.extra_classes[name] = klass(self, (self.extras.get(name, {})))
|
279
424
|
|
280
425
|
# Per <https://developer.mozilla.org/en-US/docs/HTML/Element/a> "rel"
|
281
426
|
# should only be used in <a> tags with an "href" attribute.
|
@@ -295,7 +440,7 @@ class Markdown(object):
|
|
295
440
|
re.IGNORECASE | re.VERBOSE
|
296
441
|
)
|
297
442
|
|
298
|
-
def convert(self, text):
|
443
|
+
def convert(self, text: str) -> 'UnicodeWithAttrs':
|
299
444
|
"""Convert the given text."""
|
300
445
|
# Main function. The order in which other subs are called here is
|
301
446
|
# essential. Link and image substitutions need to happen before
|
@@ -353,29 +498,12 @@ class Markdown(object):
|
|
353
498
|
|
354
499
|
text = self.preprocess(text)
|
355
500
|
|
356
|
-
if 'wavedrom' in self.extras:
|
357
|
-
text = self._do_wavedrom_blocks(text)
|
358
|
-
|
359
|
-
if "fenced-code-blocks" in self.extras and not self.safe_mode:
|
360
|
-
text = self._do_fenced_code_blocks(text)
|
361
|
-
|
362
501
|
if self.safe_mode:
|
363
502
|
text = self._hash_html_spans(text)
|
364
503
|
|
365
504
|
# Turn block-level HTML blocks into hash entries
|
366
505
|
text = self._hash_html_blocks(text, raw=True)
|
367
506
|
|
368
|
-
if "fenced-code-blocks" in self.extras and self.safe_mode:
|
369
|
-
text = self._do_fenced_code_blocks(text)
|
370
|
-
|
371
|
-
if 'admonitions' in self.extras:
|
372
|
-
text = self._do_admonitions(text)
|
373
|
-
|
374
|
-
# Because numbering references aren't links (yet?) then we can do everything associated with counters
|
375
|
-
# before we get started
|
376
|
-
if "numbering" in self.extras:
|
377
|
-
text = self._do_numbering(text)
|
378
|
-
|
379
507
|
# Strip link definitions, store in hashes.
|
380
508
|
if "footnotes" in self.extras:
|
381
509
|
# Must do footnotes first because an unlucky footnote defn
|
@@ -409,10 +537,22 @@ class Markdown(object):
|
|
409
537
|
text = self._a_nofollow_or_blank_links.sub(r'<\1 rel="nofollow"\2', text)
|
410
538
|
|
411
539
|
if "toc" in self.extras and self._toc:
|
540
|
+
if self.extras['header-ids'].get('mixed'):
|
541
|
+
# TOC will only be out of order if mixed headers is enabled
|
542
|
+
def toc_sort(entry):
|
543
|
+
'''Sort the TOC by order of appearance in text'''
|
544
|
+
match = re.search(
|
545
|
+
# header tag, any attrs, the ID, any attrs, the text, close tag
|
546
|
+
r'^<(h%d).*?id=(["\'])%s\2.*>%s</\1>$' % (entry[0], entry[1], re.escape(entry[2])),
|
547
|
+
text, re.M
|
548
|
+
)
|
549
|
+
return match.start() if match else 0
|
550
|
+
|
551
|
+
self._toc.sort(key=toc_sort)
|
412
552
|
self._toc_html = calculate_toc_html(self._toc)
|
413
553
|
|
414
554
|
# Prepend toc html to output
|
415
|
-
if self.cli:
|
555
|
+
if self.cli or (self.extras['toc'] is not None and self.extras['toc'].get('prepend', False)):
|
416
556
|
text = '{}\n{}'.format(self._toc_html, text)
|
417
557
|
|
418
558
|
text += "\n"
|
@@ -427,14 +567,16 @@ class Markdown(object):
|
|
427
567
|
rv.metadata = self.metadata
|
428
568
|
return rv
|
429
569
|
|
430
|
-
|
570
|
+
@mark_stage(Stage.POSTPROCESS)
|
571
|
+
def postprocess(self, text: str) -> str:
|
431
572
|
"""A hook for subclasses to do some postprocessing of the html, if
|
432
573
|
desired. This is called before unescaping of special chars and
|
433
574
|
unhashing of raw HTML spans.
|
434
575
|
"""
|
435
576
|
return text
|
436
577
|
|
437
|
-
|
578
|
+
@mark_stage(Stage.PREPROCESS)
|
579
|
+
def preprocess(self, text: str) -> str:
|
438
580
|
"""A hook for subclasses to do some preprocessing of the Markdown, if
|
439
581
|
desired. This is called after basic formatting of the text, but prior
|
440
582
|
to any extras, safe mode, etc. processing.
|
@@ -477,29 +619,32 @@ class Markdown(object):
|
|
477
619
|
_meta_data_fence_pattern = re.compile(r'^---[\ \t]*\n', re.MULTILINE)
|
478
620
|
_meta_data_newline = re.compile("^\n", re.MULTILINE)
|
479
621
|
|
480
|
-
def _extract_metadata(self, text):
|
622
|
+
def _extract_metadata(self, text: str) -> str:
|
481
623
|
if text.startswith("---"):
|
482
624
|
fence_splits = re.split(self._meta_data_fence_pattern, text, maxsplit=2)
|
483
625
|
metadata_content = fence_splits[1]
|
484
|
-
match = re.findall(self._meta_data_pattern, metadata_content)
|
485
|
-
if not match:
|
486
|
-
return text
|
487
626
|
tail = fence_splits[2]
|
488
627
|
else:
|
489
628
|
metadata_split = re.split(self._meta_data_newline, text, maxsplit=1)
|
490
629
|
metadata_content = metadata_split[0]
|
491
|
-
match = re.findall(self._meta_data_pattern, metadata_content)
|
492
|
-
if not match:
|
493
|
-
return text
|
494
630
|
tail = metadata_split[1]
|
495
631
|
|
496
|
-
|
632
|
+
# _meta_data_pattern only has one capturing group, so we can assume
|
633
|
+
# the returned type to be list[str]
|
634
|
+
match: List[str] = re.findall(self._meta_data_pattern, metadata_content)
|
635
|
+
if not match:
|
636
|
+
return text
|
637
|
+
|
638
|
+
def parse_structured_value(value: str) -> Union[List[Any], Dict[str, Any]]:
|
497
639
|
vs = value.lstrip()
|
498
640
|
vs = value.replace(v[: len(value) - len(vs)], "\n")[1:]
|
499
641
|
|
500
642
|
# List
|
501
643
|
if vs.startswith("-"):
|
502
|
-
r = []
|
644
|
+
r: List[Any] = []
|
645
|
+
# the regex used has multiple capturing groups, so
|
646
|
+
# returned type from findall will be List[List[str]]
|
647
|
+
match: List[str]
|
503
648
|
for match in re.findall(self._key_val_list_pat, vs):
|
504
649
|
if match[0] and not match[1] and not match[2]:
|
505
650
|
r.append(match[0].strip())
|
@@ -564,7 +709,7 @@ class Markdown(object):
|
|
564
709
|
(?P<content>.*?\1End:)
|
565
710
|
""", re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE)
|
566
711
|
|
567
|
-
def _emacs_vars_oneliner_sub(self, match):
|
712
|
+
def _emacs_vars_oneliner_sub(self, match: re.Match) -> str:
|
568
713
|
if match.group(1).strip() == '-*-' and match.group(4).strip() == '-*-':
|
569
714
|
lead_ws = re.findall(r'^\s*', match.group(1))[0]
|
570
715
|
tail_ws = re.findall(r'\s*$', match.group(4))[0]
|
@@ -573,7 +718,7 @@ class Markdown(object):
|
|
573
718
|
start, end = match.span()
|
574
719
|
return match.string[start: end]
|
575
720
|
|
576
|
-
def _get_emacs_vars(self, text):
|
721
|
+
def _get_emacs_vars(self, text: str) -> Dict[str, str]:
|
577
722
|
"""Return a dictionary of emacs-style local variables.
|
578
723
|
|
579
724
|
Parsing is done loosely according to this spec (and according to
|
@@ -616,7 +761,7 @@ class Markdown(object):
|
|
616
761
|
if match:
|
617
762
|
prefix = match.group("prefix")
|
618
763
|
suffix = match.group("suffix")
|
619
|
-
lines = match.group("content").splitlines(
|
764
|
+
lines = match.group("content").splitlines(False)
|
620
765
|
# print "prefix=%r, suffix=%r, content=%r, lines: %s"\
|
621
766
|
# % (prefix, suffix, match.group("content"), lines)
|
622
767
|
|
@@ -639,8 +784,10 @@ class Markdown(object):
|
|
639
784
|
# Parse out one emacs var per line.
|
640
785
|
continued_for = None
|
641
786
|
for line in lines[:-1]: # no var on the last line ("PREFIX End:")
|
642
|
-
if prefix:
|
643
|
-
|
787
|
+
if prefix:
|
788
|
+
line = line[len(prefix):] # strip prefix
|
789
|
+
if suffix:
|
790
|
+
line = line[:-len(suffix)] # strip suffix
|
644
791
|
line = line.strip()
|
645
792
|
if continued_for:
|
646
793
|
variable = continued_for
|
@@ -674,7 +821,7 @@ class Markdown(object):
|
|
674
821
|
|
675
822
|
return emacs_vars
|
676
823
|
|
677
|
-
def _detab_line(self, line):
|
824
|
+
def _detab_line(self, line: str) -> str:
|
678
825
|
r"""Recusively convert tabs to spaces in a single line.
|
679
826
|
|
680
827
|
Called from _detab()."""
|
@@ -685,7 +832,7 @@ class Markdown(object):
|
|
685
832
|
output = chunk1 + chunk2
|
686
833
|
return self._detab_line(output)
|
687
834
|
|
688
|
-
def _detab(self, text):
|
835
|
+
def _detab(self, text: str) -> str:
|
689
836
|
r"""Iterate text line by line and convert tabs to spaces.
|
690
837
|
|
691
838
|
>>> m = Markdown()
|
@@ -711,7 +858,7 @@ class Markdown(object):
|
|
711
858
|
# _block_tags_b. This way html5 tags are easy to keep track of.
|
712
859
|
_html5tags = '|article|aside|header|hgroup|footer|nav|section|figure|figcaption'
|
713
860
|
|
714
|
-
_block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
|
861
|
+
_block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style|html|head|body'
|
715
862
|
_block_tags_a += _html5tags
|
716
863
|
|
717
864
|
_strict_tag_block_re = re.compile(r"""
|
@@ -730,6 +877,11 @@ class Markdown(object):
|
|
730
877
|
_block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
|
731
878
|
_block_tags_b += _html5tags
|
732
879
|
|
880
|
+
_span_tags = (
|
881
|
+
'a|abbr|acronym|b|bdo|big|br|button|cite|code|dfn|em|i|img|input|kbd|label|map|object|output|q'
|
882
|
+
'|samp|script|select|small|span|strong|sub|sup|textarea|time|tt|var'
|
883
|
+
)
|
884
|
+
|
733
885
|
_liberal_tag_block_re = re.compile(r"""
|
734
886
|
( # save in \1
|
735
887
|
^ # start of line (with re.M)
|
@@ -745,11 +897,26 @@ class Markdown(object):
|
|
745
897
|
|
746
898
|
_html_markdown_attr_re = re.compile(
|
747
899
|
r'''\s+markdown=("1"|'1')''')
|
748
|
-
def _hash_html_block_sub(
|
900
|
+
def _hash_html_block_sub(
|
901
|
+
self,
|
902
|
+
match: Union[re.Match, str],
|
903
|
+
raw: bool = False
|
904
|
+
) -> str:
|
749
905
|
if isinstance(match, str):
|
750
906
|
html = match
|
907
|
+
tag = None
|
751
908
|
else:
|
752
909
|
html = match.group(1)
|
910
|
+
try:
|
911
|
+
tag = match.group(2)
|
912
|
+
except IndexError:
|
913
|
+
tag = None
|
914
|
+
|
915
|
+
if not tag:
|
916
|
+
m = re.match(r'.*?<(\S).*?\s*>', html)
|
917
|
+
# tag shouldn't be none but make the assertion for type checker
|
918
|
+
assert m is not None
|
919
|
+
tag = m.group(1)
|
753
920
|
|
754
921
|
if raw and self.safe_mode:
|
755
922
|
html = self._sanitize_html(html)
|
@@ -758,9 +925,17 @@ class Markdown(object):
|
|
758
925
|
m = self._html_markdown_attr_re.search(first_line)
|
759
926
|
if m:
|
760
927
|
lines = html.split('\n')
|
928
|
+
# if MD is on same line as opening tag then split across two lines
|
929
|
+
lines = list(filter(None, (re.split(r'(.*?<%s.*markdown=.*?>)' % tag, lines[0])))) + lines[1:]
|
930
|
+
# if MD on same line as closing tag, split across two lines
|
931
|
+
lines = lines[:-1] + list(filter(None, re.split(r'(\s*?</%s>.*?$)' % tag, lines[-1])))
|
932
|
+
# extract key sections of the match
|
933
|
+
first_line = lines[0]
|
761
934
|
middle = '\n'.join(lines[1:-1])
|
762
935
|
last_line = lines[-1]
|
936
|
+
# remove `markdown="1"` attr from tag
|
763
937
|
first_line = first_line[:m.start()] + first_line[m.end():]
|
938
|
+
# hash the HTML segments to protect them
|
764
939
|
f_key = _hash_text(first_line)
|
765
940
|
self.html_blocks[f_key] = first_line
|
766
941
|
l_key = _hash_text(last_line)
|
@@ -768,11 +943,14 @@ class Markdown(object):
|
|
768
943
|
return ''.join(["\n\n", f_key,
|
769
944
|
"\n\n", middle, "\n\n",
|
770
945
|
l_key, "\n\n"])
|
946
|
+
elif self.extras.get('header-ids', {}).get('mixed') and self._h_tag_re.match(html):
|
947
|
+
html = self._h_tag_re.sub(self._h_tag_sub, html)
|
771
948
|
key = _hash_text(html)
|
772
949
|
self.html_blocks[key] = html
|
773
950
|
return "\n\n" + key + "\n\n"
|
774
951
|
|
775
|
-
|
952
|
+
@mark_stage(Stage.HASH_HTML)
|
953
|
+
def _hash_html_blocks(self, text: str, raw: bool = False) -> str:
|
776
954
|
"""Hashify HTML blocks
|
777
955
|
|
778
956
|
We only want to do this for block-level HTML tags, such as headers,
|
@@ -806,6 +984,14 @@ class Markdown(object):
|
|
806
984
|
# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
|
807
985
|
text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
|
808
986
|
|
987
|
+
# now do the same for spans that are acting like blocks
|
988
|
+
# eg: an anchor split over multiple lines for readability
|
989
|
+
text = self._strict_tag_block_sub(
|
990
|
+
text, self._span_tags,
|
991
|
+
# inline elements can't contain block level elements, so only span gamut is required
|
992
|
+
lambda t: hash_html_block_sub(self._run_span_gamut(t))
|
993
|
+
)
|
994
|
+
|
809
995
|
# Special case just for <hr />. It was easier to make a special
|
810
996
|
# case than to make the other regex more complicated.
|
811
997
|
if "<hr" in text:
|
@@ -880,27 +1066,45 @@ class Markdown(object):
|
|
880
1066
|
|
881
1067
|
return text
|
882
1068
|
|
883
|
-
def _strict_tag_block_sub(
|
1069
|
+
def _strict_tag_block_sub(
|
1070
|
+
self,
|
1071
|
+
text: str,
|
1072
|
+
html_tags_re: str,
|
1073
|
+
callback: Callable[[str], str],
|
1074
|
+
allow_indent: bool = False
|
1075
|
+
) -> str:
|
1076
|
+
'''
|
1077
|
+
Finds and substitutes HTML blocks within blocks of text
|
1078
|
+
|
1079
|
+
Args:
|
1080
|
+
text: the text to search
|
1081
|
+
html_tags_re: a regex pattern of HTML block tags to match against.
|
1082
|
+
For example, `Markdown._block_tags_a`
|
1083
|
+
callback: callback function that receives the found HTML text block and returns a new str
|
1084
|
+
allow_indent: allow matching HTML blocks that are not completely outdented
|
1085
|
+
'''
|
884
1086
|
tag_count = 0
|
885
1087
|
current_tag = html_tags_re
|
886
1088
|
block = ''
|
887
1089
|
result = ''
|
888
1090
|
|
889
1091
|
for chunk in text.splitlines(True):
|
890
|
-
is_markup = re.match(
|
1092
|
+
is_markup = re.match(
|
1093
|
+
r'^(\s{0,%s})(?:</code>(?=</pre>))?(</?(%s)\b>?)' % ('' if allow_indent else '0', current_tag), chunk
|
1094
|
+
)
|
891
1095
|
block += chunk
|
892
1096
|
|
893
1097
|
if is_markup:
|
894
|
-
if chunk.startswith('</'):
|
1098
|
+
if chunk.startswith('%s</' % is_markup.group(1)):
|
895
1099
|
tag_count -= 1
|
896
1100
|
else:
|
897
1101
|
# if close tag is in same line
|
898
|
-
if
|
1102
|
+
if self._tag_is_closed(is_markup.group(3), chunk):
|
899
1103
|
# we must ignore these
|
900
1104
|
is_markup = None
|
901
1105
|
else:
|
902
1106
|
tag_count += 1
|
903
|
-
current_tag = is_markup.group(
|
1107
|
+
current_tag = is_markup.group(3)
|
904
1108
|
|
905
1109
|
if tag_count == 0:
|
906
1110
|
if is_markup:
|
@@ -913,7 +1117,12 @@ class Markdown(object):
|
|
913
1117
|
|
914
1118
|
return result
|
915
1119
|
|
916
|
-
def
|
1120
|
+
def _tag_is_closed(self, tag_name: str, text: str) -> bool:
|
1121
|
+
# super basic check if number of open tags == number of closing tags
|
1122
|
+
return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('</%s>' % tag_name, text))
|
1123
|
+
|
1124
|
+
@mark_stage(Stage.LINK_DEFS)
|
1125
|
+
def _strip_link_definitions(self, text: str) -> str:
|
917
1126
|
# Strips link definitions from text, stores the URLs and titles in
|
918
1127
|
# hash references.
|
919
1128
|
less_than_tab = self.tab_width - 1
|
@@ -940,7 +1149,7 @@ class Markdown(object):
|
|
940
1149
|
""" % less_than_tab, re.X | re.M | re.U)
|
941
1150
|
return _link_def_re.sub(self._extract_link_def_sub, text)
|
942
1151
|
|
943
|
-
def _extract_link_def_sub(self, match):
|
1152
|
+
def _extract_link_def_sub(self, match: re.Match) -> str:
|
944
1153
|
id, url, title = match.groups()
|
945
1154
|
key = id.lower() # Link IDs are case-insensitive
|
946
1155
|
self.urls[key] = self._encode_amps_and_angles(url)
|
@@ -948,65 +1157,7 @@ class Markdown(object):
|
|
948
1157
|
self.titles[key] = title
|
949
1158
|
return ""
|
950
1159
|
|
951
|
-
def
|
952
|
-
''' We handle the special extension for generic numbering for
|
953
|
-
tables, figures etc.
|
954
|
-
'''
|
955
|
-
# First pass to define all the references
|
956
|
-
self.regex_defns = re.compile(r'''
|
957
|
-
\[\#(\w+) # the counter. Open square plus hash plus a word \1
|
958
|
-
([^@]*) # Some optional characters, that aren't an @. \2
|
959
|
-
@(\w+) # the id. Should this be normed? \3
|
960
|
-
([^\]]*)\] # The rest of the text up to the terminating ] \4
|
961
|
-
''', re.VERBOSE)
|
962
|
-
self.regex_subs = re.compile(r"\[@(\w+)\s*\]") # [@ref_id]
|
963
|
-
counters = {}
|
964
|
-
references = {}
|
965
|
-
replacements = []
|
966
|
-
definition_html = '<figcaption class="{}" id="counter-ref-{}">{}{}{}</figcaption>'
|
967
|
-
reference_html = '<a class="{}" href="#counter-ref-{}">{}</a>'
|
968
|
-
for match in self.regex_defns.finditer(text):
|
969
|
-
# We must have four match groups otherwise this isn't a numbering reference
|
970
|
-
if len(match.groups()) != 4:
|
971
|
-
continue
|
972
|
-
counter = match.group(1)
|
973
|
-
text_before = match.group(2).strip()
|
974
|
-
ref_id = match.group(3)
|
975
|
-
text_after = match.group(4)
|
976
|
-
number = counters.get(counter, 1)
|
977
|
-
references[ref_id] = (number, counter)
|
978
|
-
replacements.append((match.start(0),
|
979
|
-
definition_html.format(counter,
|
980
|
-
ref_id,
|
981
|
-
text_before,
|
982
|
-
number,
|
983
|
-
text_after),
|
984
|
-
match.end(0)))
|
985
|
-
counters[counter] = number + 1
|
986
|
-
for repl in reversed(replacements):
|
987
|
-
text = text[:repl[0]] + repl[1] + text[repl[2]:]
|
988
|
-
|
989
|
-
# Second pass to replace the references with the right
|
990
|
-
# value of the counter
|
991
|
-
# Fwiw, it's vaguely annoying to have to turn the iterator into
|
992
|
-
# a list and then reverse it but I can't think of a better thing to do.
|
993
|
-
for match in reversed(list(self.regex_subs.finditer(text))):
|
994
|
-
number, counter = references.get(match.group(1), (None, None))
|
995
|
-
if number is not None:
|
996
|
-
repl = reference_html.format(counter,
|
997
|
-
match.group(1),
|
998
|
-
number)
|
999
|
-
else:
|
1000
|
-
repl = reference_html.format(match.group(1),
|
1001
|
-
'countererror',
|
1002
|
-
'?' + match.group(1) + '?')
|
1003
|
-
if "smarty-pants" in self.extras:
|
1004
|
-
repl = repl.replace('"', self._escape_table['"'])
|
1005
|
-
|
1006
|
-
text = text[:match.start()] + repl + text[match.end():]
|
1007
|
-
return text
|
1008
|
-
|
1009
|
-
def _extract_footnote_def_sub(self, match):
|
1160
|
+
def _extract_footnote_def_sub(self, match: re.Match) -> str:
|
1010
1161
|
id, text = match.groups()
|
1011
1162
|
text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
|
1012
1163
|
normed_id = re.sub(r'\W', '-', id)
|
@@ -1015,7 +1166,7 @@ class Markdown(object):
|
|
1015
1166
|
self.footnotes[normed_id] = text + "\n\n"
|
1016
1167
|
return ""
|
1017
1168
|
|
1018
|
-
def _strip_footnote_definitions(self, text):
|
1169
|
+
def _strip_footnote_definitions(self, text: str) -> str:
|
1019
1170
|
"""A footnote definition looks like this:
|
1020
1171
|
|
1021
1172
|
[^note-id]: Text of the note.
|
@@ -1050,19 +1201,11 @@ class Markdown(object):
|
|
1050
1201
|
|
1051
1202
|
_hr_re = re.compile(r'^[ ]{0,3}([-_*])[ ]{0,2}(\1[ ]{0,2}){2,}$', re.M)
|
1052
1203
|
|
1053
|
-
|
1204
|
+
@mark_stage(Stage.BLOCK_GAMUT)
|
1205
|
+
def _run_block_gamut(self, text: str) -> str:
|
1054
1206
|
# These are all the transformations that form block-level
|
1055
1207
|
# tags like paragraphs, headers, and list items.
|
1056
1208
|
|
1057
|
-
if 'admonitions' in self.extras:
|
1058
|
-
text = self._do_admonitions(text)
|
1059
|
-
|
1060
|
-
if 'wavedrom' in self.extras:
|
1061
|
-
text = self._do_wavedrom_blocks(text)
|
1062
|
-
|
1063
|
-
if "fenced-code-blocks" in self.extras:
|
1064
|
-
text = self._do_fenced_code_blocks(text)
|
1065
|
-
|
1066
1209
|
text = self._do_headers(text)
|
1067
1210
|
|
1068
1211
|
# Do Horizontal Rules:
|
@@ -1075,13 +1218,6 @@ class Markdown(object):
|
|
1075
1218
|
|
1076
1219
|
text = self._do_lists(text)
|
1077
1220
|
|
1078
|
-
if "pyshell" in self.extras:
|
1079
|
-
text = self._prepare_pyshell_blocks(text)
|
1080
|
-
if "wiki-tables" in self.extras:
|
1081
|
-
text = self._do_wiki_tables(text)
|
1082
|
-
if "tables" in self.extras:
|
1083
|
-
text = self._do_tables(text)
|
1084
|
-
|
1085
1221
|
text = self._do_code_blocks(text)
|
1086
1222
|
|
1087
1223
|
text = self._do_block_quotes(text)
|
@@ -1096,164 +1232,8 @@ class Markdown(object):
|
|
1096
1232
|
|
1097
1233
|
return text
|
1098
1234
|
|
1099
|
-
|
1100
|
-
|
1101
|
-
dedented = _dedent(match.group(0))
|
1102
|
-
return self._do_fenced_code_blocks("```pycon\n" + dedented + "```\n")
|
1103
|
-
lines = match.group(0).splitlines(0)
|
1104
|
-
_dedentlines(lines)
|
1105
|
-
indent = ' ' * self.tab_width
|
1106
|
-
s = ('\n' # separate from possible cuddled paragraph
|
1107
|
-
+ indent + ('\n'+indent).join(lines)
|
1108
|
-
+ '\n')
|
1109
|
-
return s
|
1110
|
-
|
1111
|
-
def _prepare_pyshell_blocks(self, text):
|
1112
|
-
"""Ensure that Python interactive shell sessions are put in
|
1113
|
-
code blocks -- even if not properly indented.
|
1114
|
-
"""
|
1115
|
-
if ">>>" not in text:
|
1116
|
-
return text
|
1117
|
-
|
1118
|
-
less_than_tab = self.tab_width - 1
|
1119
|
-
_pyshell_block_re = re.compile(r"""
|
1120
|
-
^([ ]{0,%d})>>>[ ].*\n # first line
|
1121
|
-
^(\1[^\S\n]*\S.*\n)* # any number of subsequent lines with at least one character
|
1122
|
-
(?=^\1?\n|\Z) # ends with a blank line or end of document
|
1123
|
-
""" % less_than_tab, re.M | re.X)
|
1124
|
-
|
1125
|
-
return _pyshell_block_re.sub(self._pyshell_block_sub, text)
|
1126
|
-
|
1127
|
-
def _table_sub(self, match):
|
1128
|
-
trim_space_re = '^[ \t\n]+|[ \t\n]+$'
|
1129
|
-
trim_bar_re = r'^\||\|$'
|
1130
|
-
split_bar_re = r'^\||(?<![\`\\])\|'
|
1131
|
-
escape_bar_re = r'\\\|'
|
1132
|
-
|
1133
|
-
head, underline, body = match.groups()
|
1134
|
-
|
1135
|
-
# Determine aligns for columns.
|
1136
|
-
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
|
1137
|
-
align_from_col_idx = {}
|
1138
|
-
for col_idx, col in enumerate(cols):
|
1139
|
-
if col[0] == ':' and col[-1] == ':':
|
1140
|
-
align_from_col_idx[col_idx] = ' style="text-align:center;"'
|
1141
|
-
elif col[0] == ':':
|
1142
|
-
align_from_col_idx[col_idx] = ' style="text-align:left;"'
|
1143
|
-
elif col[-1] == ':':
|
1144
|
-
align_from_col_idx[col_idx] = ' style="text-align:right;"'
|
1145
|
-
|
1146
|
-
# thead
|
1147
|
-
hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead%s>' % self._html_class_str_from_tag('thead'), '<tr>']
|
1148
|
-
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
|
1149
|
-
for col_idx, col in enumerate(cols):
|
1150
|
-
hlines.append(' <th%s>%s</th>' % (
|
1151
|
-
align_from_col_idx.get(col_idx, ''),
|
1152
|
-
self._run_span_gamut(col)
|
1153
|
-
))
|
1154
|
-
hlines.append('</tr>')
|
1155
|
-
hlines.append('</thead>')
|
1156
|
-
|
1157
|
-
# tbody
|
1158
|
-
hlines.append('<tbody>')
|
1159
|
-
for line in body.strip('\n').split('\n'):
|
1160
|
-
hlines.append('<tr>')
|
1161
|
-
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
|
1162
|
-
for col_idx, col in enumerate(cols):
|
1163
|
-
hlines.append(' <td%s>%s</td>' % (
|
1164
|
-
align_from_col_idx.get(col_idx, ''),
|
1165
|
-
self._run_span_gamut(col)
|
1166
|
-
))
|
1167
|
-
hlines.append('</tr>')
|
1168
|
-
hlines.append('</tbody>')
|
1169
|
-
hlines.append('</table>')
|
1170
|
-
|
1171
|
-
return '\n'.join(hlines) + '\n'
|
1172
|
-
|
1173
|
-
def _do_tables(self, text):
|
1174
|
-
"""Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
|
1175
|
-
https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
|
1176
|
-
"""
|
1177
|
-
less_than_tab = self.tab_width - 1
|
1178
|
-
table_re = re.compile(r'''
|
1179
|
-
(?:(?<=\n\n)|\A\n?) # leading blank line
|
1180
|
-
|
1181
|
-
^[ ]{0,%d} # allowed whitespace
|
1182
|
-
(.*[|].*) \n # $1: header row (at least one pipe)
|
1183
|
-
|
1184
|
-
^[ ]{0,%d} # allowed whitespace
|
1185
|
-
( # $2: underline row
|
1186
|
-
# underline row with leading bar
|
1187
|
-
(?: \|\ *:?-+:?\ * )+ \|? \s? \n
|
1188
|
-
|
|
1189
|
-
# or, underline row without leading bar
|
1190
|
-
(?: \ *:?-+:?\ *\| )+ (?: \ *:?-+:?\ * )? \s? \n
|
1191
|
-
)
|
1192
|
-
|
1193
|
-
( # $3: data rows
|
1194
|
-
(?:
|
1195
|
-
^[ ]{0,%d}(?!\ ) # ensure line begins with 0 to less_than_tab spaces
|
1196
|
-
.*\|.* \n
|
1197
|
-
)+
|
1198
|
-
)
|
1199
|
-
''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
|
1200
|
-
return table_re.sub(self._table_sub, text)
|
1201
|
-
|
1202
|
-
def _wiki_table_sub(self, match):
|
1203
|
-
ttext = match.group(0).strip()
|
1204
|
-
# print('wiki table: %r' % match.group(0))
|
1205
|
-
rows = []
|
1206
|
-
for line in ttext.splitlines(0):
|
1207
|
-
line = line.strip()[2:-2].strip()
|
1208
|
-
row = [c.strip() for c in re.split(r'(?<!\\)\|\|', line)]
|
1209
|
-
rows.append(row)
|
1210
|
-
# from pprint import pprint
|
1211
|
-
# pprint(rows)
|
1212
|
-
hlines = []
|
1213
|
-
|
1214
|
-
def add_hline(line, indents=0):
|
1215
|
-
hlines.append((self.tab * indents) + line)
|
1216
|
-
|
1217
|
-
def format_cell(text):
|
1218
|
-
return self._run_span_gamut(re.sub(r"^\s*~", "", cell).strip(" "))
|
1219
|
-
|
1220
|
-
add_hline('<table%s>' % self._html_class_str_from_tag('table'))
|
1221
|
-
# Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
|
1222
|
-
if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
|
1223
|
-
add_hline('<thead%s>' % self._html_class_str_from_tag('thead'), 1)
|
1224
|
-
add_hline('<tr>', 2)
|
1225
|
-
for cell in rows[0]:
|
1226
|
-
add_hline("<th>{}</th>".format(format_cell(cell)), 3)
|
1227
|
-
add_hline('</tr>', 2)
|
1228
|
-
add_hline('</thead>', 1)
|
1229
|
-
# Only one header row allowed.
|
1230
|
-
rows = rows[1:]
|
1231
|
-
# If no more rows, don't create a tbody.
|
1232
|
-
if rows:
|
1233
|
-
add_hline('<tbody>', 1)
|
1234
|
-
for row in rows:
|
1235
|
-
add_hline('<tr>', 2)
|
1236
|
-
for cell in row:
|
1237
|
-
add_hline('<td>{}</td>'.format(format_cell(cell)), 3)
|
1238
|
-
add_hline('</tr>', 2)
|
1239
|
-
add_hline('</tbody>', 1)
|
1240
|
-
add_hline('</table>')
|
1241
|
-
return '\n'.join(hlines) + '\n'
|
1242
|
-
|
1243
|
-
def _do_wiki_tables(self, text):
|
1244
|
-
# Optimization.
|
1245
|
-
if "||" not in text:
|
1246
|
-
return text
|
1247
|
-
|
1248
|
-
less_than_tab = self.tab_width - 1
|
1249
|
-
wiki_table_re = re.compile(r'''
|
1250
|
-
(?:(?<=\n\n)|\A\n?) # leading blank line
|
1251
|
-
^([ ]{0,%d})\|\|.+?\|\|[ ]*\n # first line
|
1252
|
-
(^\1\|\|.+?\|\|\n)* # any number of subsequent lines
|
1253
|
-
''' % less_than_tab, re.M | re.X)
|
1254
|
-
return wiki_table_re.sub(self._wiki_table_sub, text)
|
1255
|
-
|
1256
|
-
def _run_span_gamut(self, text):
|
1235
|
+
@mark_stage(Stage.SPAN_GAMUT)
|
1236
|
+
def _run_span_gamut(self, text: str) -> str:
|
1257
1237
|
# These are all the transformations that occur *within* block-level
|
1258
1238
|
# tags like paragraphs, headers, and list items.
|
1259
1239
|
|
@@ -1262,9 +1242,6 @@ class Markdown(object):
|
|
1262
1242
|
text = self._escape_special_chars(text)
|
1263
1243
|
|
1264
1244
|
# Process anchor and image tags.
|
1265
|
-
if "link-patterns" in self.extras:
|
1266
|
-
text = self._do_link_patterns(text)
|
1267
|
-
|
1268
1245
|
text = self._do_links(text)
|
1269
1246
|
|
1270
1247
|
# Make links out of things like `<http://example.com/>`
|
@@ -1274,25 +1251,10 @@ class Markdown(object):
|
|
1274
1251
|
|
1275
1252
|
text = self._encode_amps_and_angles(text)
|
1276
1253
|
|
1277
|
-
if "strike" in self.extras:
|
1278
|
-
text = self._do_strike(text)
|
1279
|
-
|
1280
|
-
if "underline" in self.extras:
|
1281
|
-
text = self._do_underline(text)
|
1282
|
-
|
1283
1254
|
text = self._do_italics_and_bold(text)
|
1284
1255
|
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
if "smarty-pants" in self.extras:
|
1289
|
-
text = self._do_smart_punctuation(text)
|
1290
|
-
|
1291
|
-
# Do hard breaks:
|
1292
|
-
if "break-on-newline" in self.extras:
|
1293
|
-
text = re.sub(r" *\n(?!\<(?:\/?(ul|ol|li))\>)", "<br%s\n" % self.empty_element_suffix, text)
|
1294
|
-
else:
|
1295
|
-
text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
|
1256
|
+
# Do hard breaks
|
1257
|
+
text = re.sub(r" {2,}\n(?!\<(?:\/?(ul|ol|li))\>)", "<br%s\n" % self.empty_element_suffix, text)
|
1296
1258
|
|
1297
1259
|
return text
|
1298
1260
|
|
@@ -1317,7 +1279,8 @@ class Markdown(object):
|
|
1317
1279
|
)
|
1318
1280
|
""", re.X)
|
1319
1281
|
|
1320
|
-
|
1282
|
+
@mark_stage(Stage.ESCAPE_SPECIAL)
|
1283
|
+
def _escape_special_chars(self, text: str) -> str:
|
1321
1284
|
# Python markdown note: the HTML tokenization here differs from
|
1322
1285
|
# that in Markdown.pl, hence the behaviour for subtle cases can
|
1323
1286
|
# differ (I believe the tokenizer here does a better job because
|
@@ -1348,7 +1311,8 @@ class Markdown(object):
|
|
1348
1311
|
is_html_markup = not is_html_markup
|
1349
1312
|
return ''.join(escaped)
|
1350
1313
|
|
1351
|
-
|
1314
|
+
@mark_stage(Stage.HASH_HTML)
|
1315
|
+
def _hash_html_spans(self, text: str) -> str:
|
1352
1316
|
# Used for safe_mode.
|
1353
1317
|
|
1354
1318
|
def _is_auto_link(s):
|
@@ -1371,26 +1335,41 @@ class Markdown(object):
|
|
1371
1335
|
|
1372
1336
|
return re.match(r'<code>md5-[A-Fa-f0-9]{32}</code>', ''.join(peek_tokens))
|
1373
1337
|
|
1338
|
+
def _is_comment(token):
|
1339
|
+
if self.safe_mode == 'replace':
|
1340
|
+
# don't bother processing each section of comment in replace mode. Just do the whole thing
|
1341
|
+
return
|
1342
|
+
return re.match(r'(<!--)(.*)(-->)', token)
|
1343
|
+
|
1344
|
+
def _hash(token):
|
1345
|
+
key = _hash_text(token)
|
1346
|
+
self.html_spans[key] = token
|
1347
|
+
return key
|
1348
|
+
|
1374
1349
|
tokens = []
|
1375
1350
|
split_tokens = self._sorta_html_tokenize_re.split(text)
|
1376
1351
|
is_html_markup = False
|
1377
1352
|
for index, token in enumerate(split_tokens):
|
1378
1353
|
if is_html_markup and not _is_auto_link(token) and not _is_code_span(index, token):
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1354
|
+
is_comment = _is_comment(token)
|
1355
|
+
if is_comment:
|
1356
|
+
tokens.append(_hash(self._sanitize_html(is_comment.group(1))))
|
1357
|
+
# sanitise but leave comment body intact for further markdown processing
|
1358
|
+
tokens.append(self._sanitize_html(is_comment.group(2)))
|
1359
|
+
tokens.append(_hash(self._sanitize_html(is_comment.group(3))))
|
1360
|
+
else:
|
1361
|
+
tokens.append(_hash(self._sanitize_html(token)))
|
1383
1362
|
else:
|
1384
1363
|
tokens.append(self._encode_incomplete_tags(token))
|
1385
1364
|
is_html_markup = not is_html_markup
|
1386
1365
|
return ''.join(tokens)
|
1387
1366
|
|
1388
|
-
def _unhash_html_spans(self, text):
|
1367
|
+
def _unhash_html_spans(self, text: str) -> str:
|
1389
1368
|
for key, sanitized in list(self.html_spans.items()):
|
1390
1369
|
text = text.replace(key, sanitized)
|
1391
1370
|
return text
|
1392
1371
|
|
1393
|
-
def _sanitize_html(self, s):
|
1372
|
+
def _sanitize_html(self, s: str) -> str:
|
1394
1373
|
if self.safe_mode == "replace":
|
1395
1374
|
return self.html_removed_text
|
1396
1375
|
elif self.safe_mode == "escape":
|
@@ -1428,14 +1407,14 @@ class Markdown(object):
|
|
1428
1407
|
|
1429
1408
|
_strip_anglebrackets = re.compile(r'<(.*)>.*')
|
1430
1409
|
|
1431
|
-
def _find_non_whitespace(self, text, start):
|
1410
|
+
def _find_non_whitespace(self, text: str, start: int) -> int:
|
1432
1411
|
"""Returns the index of the first non-whitespace character in text
|
1433
1412
|
after (and including) start
|
1434
1413
|
"""
|
1435
1414
|
match = self._whitespace.match(text, start)
|
1436
|
-
return match.end()
|
1415
|
+
return match.end() if match else len(text)
|
1437
1416
|
|
1438
|
-
def _find_balanced(self, text, start, open_c, close_c):
|
1417
|
+
def _find_balanced(self, text: str, start: int, open_c: str, close_c: str) -> int:
|
1439
1418
|
"""Returns the index where the open_c and close_c characters balance
|
1440
1419
|
out - the same number of open_c and close_c are encountered - or the
|
1441
1420
|
end of string if it's reached before the balance point is found.
|
@@ -1451,7 +1430,7 @@ class Markdown(object):
|
|
1451
1430
|
i += 1
|
1452
1431
|
return i
|
1453
1432
|
|
1454
|
-
def _extract_url_and_title(self, text, start):
|
1433
|
+
def _extract_url_and_title(self, text: str, start: int) -> Union[Tuple[str, str, int], Tuple[None, None, None]]:
|
1455
1434
|
"""Extracts the url and (optional) title from the tail of a link"""
|
1456
1435
|
# text[start] equals the opening parenthesis
|
1457
1436
|
idx = self._find_non_whitespace(text, start+1)
|
@@ -1470,19 +1449,56 @@ class Markdown(object):
|
|
1470
1449
|
url = self._strip_anglebrackets.sub(r'\1', url)
|
1471
1450
|
return url, title, end_idx
|
1472
1451
|
|
1473
|
-
|
1452
|
+
# https://developer.mozilla.org/en-US/docs/web/http/basics_of_http/data_urls
|
1453
|
+
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types
|
1454
|
+
_data_url_re = re.compile(r'''
|
1455
|
+
data:
|
1456
|
+
# in format type/subtype;parameter=optional
|
1457
|
+
(?P<mime>\w+/[\w+\.-]+(?:;\w+=[\w+\.-]+)?)?
|
1458
|
+
# optional base64 token
|
1459
|
+
(?P<token>;base64)?
|
1460
|
+
,(?P<data>.*)
|
1461
|
+
''', re.X)
|
1462
|
+
|
1463
|
+
def _protect_url(self, url: str) -> str:
|
1474
1464
|
'''
|
1475
1465
|
Function that passes a URL through `_html_escape_url` to remove any nasty characters,
|
1476
1466
|
and then hashes the now "safe" URL to prevent other safety mechanisms from tampering
|
1477
1467
|
with it (eg: escaping "&" in URL parameters)
|
1478
1468
|
'''
|
1479
|
-
|
1469
|
+
data_url = self._data_url_re.match(url)
|
1470
|
+
charset = None
|
1471
|
+
if data_url is not None:
|
1472
|
+
mime = data_url.group('mime') or ''
|
1473
|
+
if mime.startswith('image/') and data_url.group('token') == ';base64':
|
1474
|
+
charset='base64'
|
1475
|
+
url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset)
|
1480
1476
|
key = _hash_text(url)
|
1481
1477
|
self._escape_table[url] = key
|
1482
1478
|
return key
|
1483
1479
|
|
1484
|
-
_safe_protocols =
|
1485
|
-
|
1480
|
+
_safe_protocols = r'(?:https?|ftp):\/\/|(?:mailto|tel):'
|
1481
|
+
|
1482
|
+
@property
|
1483
|
+
def _safe_href(self):
|
1484
|
+
'''
|
1485
|
+
_safe_href is adapted from pagedown's Markdown.Sanitizer.js
|
1486
|
+
From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt
|
1487
|
+
Original Showdown code copyright (c) 2007 John Fraser
|
1488
|
+
Modifications and bugfixes (c) 2009 Dana Robinson
|
1489
|
+
Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc.
|
1490
|
+
'''
|
1491
|
+
safe = r'-\w'
|
1492
|
+
# omitted ['"<>] for XSS reasons
|
1493
|
+
less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~'
|
1494
|
+
# dot seperated hostname, optional port number, not followed by protocol seperator
|
1495
|
+
domain = r'(?:[%s]+(?:\.[%s]+)*)(?:(?<!tel):\d+/?)?(?![^:/]*:/*)' % (safe, safe)
|
1496
|
+
fragment = r'[%s]*' % (safe + less_safe)
|
1497
|
+
|
1498
|
+
return re.compile(r'^(?:(%s)?(%s)(%s)|(#|\.{,2}/)(%s))$' % (self._safe_protocols, domain, fragment, fragment), re.I)
|
1499
|
+
|
1500
|
+
@mark_stage(Stage.LINKS)
|
1501
|
+
def _do_links(self, text: str) -> str:
|
1486
1502
|
"""Turn Markdown link shortcuts into XHTML <a> and <img> tags.
|
1487
1503
|
|
1488
1504
|
This is a combination of Markdown.pl's _DoAnchors() and
|
@@ -1599,7 +1615,7 @@ class Markdown(object):
|
|
1599
1615
|
anchor_allowed_pos = start_idx + len(result)
|
1600
1616
|
text = text[:start_idx] + result + text[url_end_idx:]
|
1601
1617
|
elif start_idx >= anchor_allowed_pos:
|
1602
|
-
safe_link = self.
|
1618
|
+
safe_link = self._safe_href.match(url)
|
1603
1619
|
if self.safe_mode and not safe_link:
|
1604
1620
|
result_head = '<a href="#"%s>' % (title_str)
|
1605
1621
|
else:
|
@@ -1655,7 +1671,7 @@ class Markdown(object):
|
|
1655
1671
|
curr_pos = start_idx + len(result)
|
1656
1672
|
text = text[:start_idx] + result + text[match.end():]
|
1657
1673
|
elif start_idx >= anchor_allowed_pos:
|
1658
|
-
if self.safe_mode and not self.
|
1674
|
+
if self.safe_mode and not self._safe_href.match(url):
|
1659
1675
|
result_head = '<a href="#"%s>' % (title_str)
|
1660
1676
|
else:
|
1661
1677
|
result_head = '<a href="%s"%s>' % (self._protect_url(url), title_str)
|
@@ -1672,7 +1688,8 @@ class Markdown(object):
|
|
1672
1688
|
curr_pos = start_idx + 1
|
1673
1689
|
else:
|
1674
1690
|
# This id isn't defined, leave the markup alone.
|
1675
|
-
|
1691
|
+
# set current pos to end of link title and continue from there
|
1692
|
+
curr_pos = p
|
1676
1693
|
continue
|
1677
1694
|
|
1678
1695
|
# Otherwise, it isn't markup.
|
@@ -1680,7 +1697,11 @@ class Markdown(object):
|
|
1680
1697
|
|
1681
1698
|
return text
|
1682
1699
|
|
1683
|
-
def header_id_from_text(self,
|
1700
|
+
def header_id_from_text(self,
|
1701
|
+
text: str,
|
1702
|
+
prefix: str,
|
1703
|
+
n: Optional[int] = None
|
1704
|
+
) -> str:
|
1684
1705
|
"""Generate a header id attribute value from the given header
|
1685
1706
|
HTML content.
|
1686
1707
|
|
@@ -1690,7 +1711,7 @@ class Markdown(object):
|
|
1690
1711
|
@param text {str} The text of the header tag
|
1691
1712
|
@param prefix {str} The requested prefix for header ids. This is the
|
1692
1713
|
value of the "header-ids" extra key, if any. Otherwise, None.
|
1693
|
-
@param n {int} The <hN> tag number, i.e. `1` for an <h1> tag.
|
1714
|
+
@param n {int} (unused) The <hN> tag number, i.e. `1` for an <h1> tag.
|
1694
1715
|
@returns {str} The value for the header tag's "id" attribute. Return
|
1695
1716
|
None to not have an id attribute and to exclude this header from
|
1696
1717
|
the TOC (if the "toc" extra is specified).
|
@@ -1705,7 +1726,14 @@ class Markdown(object):
|
|
1705
1726
|
|
1706
1727
|
return header_id
|
1707
1728
|
|
1708
|
-
def
|
1729
|
+
def _header_id_exists(self, text: str) -> bool:
|
1730
|
+
header_id = _slugify(text)
|
1731
|
+
prefix = self.extras['header-ids'].get('prefix')
|
1732
|
+
if prefix and isinstance(prefix, str):
|
1733
|
+
header_id = prefix + '-' + header_id
|
1734
|
+
return header_id in self._count_from_header_id or header_id in map(lambda x: x[1], self._toc)
|
1735
|
+
|
1736
|
+
def _toc_add_entry(self, level: int, id: str, name: str) -> None:
|
1709
1737
|
if level > self._toc_depth:
|
1710
1738
|
return
|
1711
1739
|
if self._toc is None:
|
@@ -1728,7 +1756,8 @@ class Markdown(object):
|
|
1728
1756
|
_h_re = re.compile(_h_re_base % '*', re.X | re.M)
|
1729
1757
|
_h_re_tag_friendly = re.compile(_h_re_base % '+', re.X | re.M)
|
1730
1758
|
|
1731
|
-
def _h_sub(self, match):
|
1759
|
+
def _h_sub(self, match: re.Match) -> str:
|
1760
|
+
'''Handles processing markdown headers'''
|
1732
1761
|
if match.group(1) is not None and match.group(3) == "-":
|
1733
1762
|
return match.group(1)
|
1734
1763
|
elif match.group(1) is not None:
|
@@ -1746,7 +1775,7 @@ class Markdown(object):
|
|
1746
1775
|
header_id_attr = ""
|
1747
1776
|
if "header-ids" in self.extras:
|
1748
1777
|
header_id = self.header_id_from_text(header_group,
|
1749
|
-
self.extras["header-ids"], n)
|
1778
|
+
self.extras["header-ids"].get('prefix'), n)
|
1750
1779
|
if header_id:
|
1751
1780
|
header_id_attr = ' id="%s"' % header_id
|
1752
1781
|
html = self._run_span_gamut(header_group)
|
@@ -1754,7 +1783,39 @@ class Markdown(object):
|
|
1754
1783
|
self._toc_add_entry(n, header_id, html)
|
1755
1784
|
return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
|
1756
1785
|
|
1757
|
-
|
1786
|
+
_h_tag_re = re.compile(r'''
|
1787
|
+
^<h([1-6])(.*)> # \1 tag num, \2 attrs
|
1788
|
+
(.*) # \3 text
|
1789
|
+
</h\1>
|
1790
|
+
''', re.X | re.M)
|
1791
|
+
|
1792
|
+
def _h_tag_sub(self, match: re.Match) -> str:
|
1793
|
+
'''Different to `_h_sub` in that this function handles existing HTML headers'''
|
1794
|
+
text = match.string[match.start(): match.end()]
|
1795
|
+
h_level = int(match.group(1))
|
1796
|
+
# extract id= attr from tag, trying to account for regex "misses"
|
1797
|
+
id_attr = (re.match(r'.*?id=(\S+)?.*', match.group(2) or '') or '')
|
1798
|
+
if id_attr:
|
1799
|
+
# if id attr exists, extract that
|
1800
|
+
id_attr = id_attr.group(1) or ''
|
1801
|
+
id_attr = id_attr.strip('\'" ')
|
1802
|
+
h_text = match.group(3)
|
1803
|
+
|
1804
|
+
# check if header was already processed (ie: was a markdown header rather than HTML)
|
1805
|
+
if id_attr and self._header_id_exists(id_attr):
|
1806
|
+
return text
|
1807
|
+
|
1808
|
+
# generate new header id if none existed
|
1809
|
+
header_id = id_attr or self.header_id_from_text(h_text, self.extras['header-ids'].get('prefix'), h_level)
|
1810
|
+
if "toc" in self.extras:
|
1811
|
+
self._toc_add_entry(h_level, header_id, h_text)
|
1812
|
+
if header_id and not id_attr:
|
1813
|
+
# '<h[digit]' + new ID + '...'
|
1814
|
+
return text[:3] + ' id="%s"' % header_id + text[3:]
|
1815
|
+
return text
|
1816
|
+
|
1817
|
+
@mark_stage(Stage.HEADERS)
|
1818
|
+
def _do_headers(self, text: str) -> str:
|
1758
1819
|
# Setext-style headers:
|
1759
1820
|
# Header 1
|
1760
1821
|
# ========
|
@@ -1778,7 +1839,7 @@ class Markdown(object):
|
|
1778
1839
|
_marker_ul = '(?:[%s])' % _marker_ul_chars
|
1779
1840
|
_marker_ol = r'(?:\d+\.)'
|
1780
1841
|
|
1781
|
-
def _list_sub(self, match):
|
1842
|
+
def _list_sub(self, match: re.Match) -> str:
|
1782
1843
|
lst = match.group(1)
|
1783
1844
|
lst_type = match.group(4) in self._marker_ul_chars and "ul" or "ol"
|
1784
1845
|
|
@@ -1796,7 +1857,8 @@ class Markdown(object):
|
|
1796
1857
|
else:
|
1797
1858
|
return "<%s%s>\n%s</%s>\n\n" % (lst_type, lst_opts, result, lst_type)
|
1798
1859
|
|
1799
|
-
|
1860
|
+
@mark_stage(Stage.LISTS)
|
1861
|
+
def _do_lists(self, text: str) -> str:
|
1800
1862
|
# Form HTML ordered (numbered) and unordered (bulleted) lists.
|
1801
1863
|
|
1802
1864
|
# Iterate over each *non-overlapping* list match.
|
@@ -1872,20 +1934,24 @@ class Markdown(object):
|
|
1872
1934
|
|
1873
1935
|
_task_list_warpper_str = r'<input type="checkbox" class="task-list-item-checkbox" %sdisabled> %s'
|
1874
1936
|
|
1875
|
-
def _task_list_item_sub(self, match):
|
1937
|
+
def _task_list_item_sub(self, match: re.Match) -> str:
|
1876
1938
|
marker = match.group(1)
|
1877
1939
|
item_text = match.group(2)
|
1878
1940
|
if marker in ['[x]','[X]']:
|
1879
|
-
|
1941
|
+
return self._task_list_warpper_str % ('checked ', item_text)
|
1880
1942
|
elif marker == '[ ]':
|
1881
|
-
|
1943
|
+
return self._task_list_warpper_str % ('', item_text)
|
1944
|
+
# returning None has same effect as returning empty str, but only
|
1945
|
+
# one makes the type checker happy
|
1946
|
+
return ''
|
1882
1947
|
|
1883
1948
|
_last_li_endswith_two_eols = False
|
1884
|
-
def _list_item_sub(self, match):
|
1949
|
+
def _list_item_sub(self, match: re.Match) -> str:
|
1885
1950
|
item = match.group(4)
|
1886
1951
|
leading_line = match.group(1)
|
1887
1952
|
if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
|
1888
|
-
item = self.
|
1953
|
+
item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1]
|
1954
|
+
item = self._run_block_gamut(item)
|
1889
1955
|
else:
|
1890
1956
|
# Recursion for sub-lists:
|
1891
1957
|
item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1])
|
@@ -1899,7 +1965,7 @@ class Markdown(object):
|
|
1899
1965
|
|
1900
1966
|
return "<li>%s</li>\n" % item
|
1901
1967
|
|
1902
|
-
def _process_list_items(self, list_str):
|
1968
|
+
def _process_list_items(self, list_str: str) -> str:
|
1903
1969
|
# Process the contents of a single ordered or unordered list,
|
1904
1970
|
# splitting it into individual list items.
|
1905
1971
|
|
@@ -1930,7 +1996,12 @@ class Markdown(object):
|
|
1930
1996
|
self.list_level -= 1
|
1931
1997
|
return list_str
|
1932
1998
|
|
1933
|
-
def _get_pygments_lexer(self, lexer_name):
|
1999
|
+
def _get_pygments_lexer(self, lexer_name: str):
|
2000
|
+
'''
|
2001
|
+
Returns:
|
2002
|
+
`pygments.Lexer` or None if a lexer matching `lexer_name` is
|
2003
|
+
not found
|
2004
|
+
'''
|
1934
2005
|
try:
|
1935
2006
|
from pygments import lexers, util
|
1936
2007
|
except ImportError:
|
@@ -1940,7 +2011,21 @@ class Markdown(object):
|
|
1940
2011
|
except util.ClassNotFound:
|
1941
2012
|
return None
|
1942
2013
|
|
1943
|
-
def _color_with_pygments(
|
2014
|
+
def _color_with_pygments(
|
2015
|
+
self,
|
2016
|
+
codeblock: str,
|
2017
|
+
lexer,
|
2018
|
+
**formatter_opts
|
2019
|
+
) -> str:
|
2020
|
+
'''
|
2021
|
+
TODO: this function is only referenced by the `FencedCodeBlocks`
|
2022
|
+
extra. May be worth moving over there
|
2023
|
+
|
2024
|
+
Args:
|
2025
|
+
codeblock: the codeblock to highlight
|
2026
|
+
lexer (pygments.Lexer): lexer to use
|
2027
|
+
formatter_opts: pygments HtmlFormatter options
|
2028
|
+
'''
|
1944
2029
|
import pygments
|
1945
2030
|
import pygments.formatters
|
1946
2031
|
|
@@ -1973,82 +2058,22 @@ class Markdown(object):
|
|
1973
2058
|
formatter = HtmlCodeFormatter(**formatter_opts)
|
1974
2059
|
return pygments.highlight(codeblock, lexer, formatter)
|
1975
2060
|
|
1976
|
-
def _code_block_sub(self, match
|
1977
|
-
|
1978
|
-
|
1979
|
-
|
1980
|
-
|
1981
|
-
|
1982
|
-
else:
|
1983
|
-
codeblock = match.group(1)
|
1984
|
-
codeblock = self._outdent(codeblock)
|
1985
|
-
codeblock = self._detab(codeblock)
|
1986
|
-
codeblock = codeblock.lstrip('\n') # trim leading newlines
|
1987
|
-
codeblock = codeblock.rstrip() # trim trailing whitespace
|
1988
|
-
|
1989
|
-
# Use pygments only if not using the highlightjs-lang extra
|
1990
|
-
if lexer_name and "highlightjs-lang" not in self.extras:
|
1991
|
-
lexer = self._get_pygments_lexer(lexer_name)
|
1992
|
-
if lexer:
|
1993
|
-
leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
|
1994
|
-
return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer, is_fenced_code_block)
|
2061
|
+
def _code_block_sub(self, match: re.Match) -> str:
|
2062
|
+
codeblock = match.group(1)
|
2063
|
+
codeblock = self._outdent(codeblock)
|
2064
|
+
codeblock = self._detab(codeblock)
|
2065
|
+
codeblock = codeblock.lstrip('\n') # trim leading newlines
|
2066
|
+
codeblock = codeblock.rstrip() # trim trailing whitespace
|
1995
2067
|
|
1996
2068
|
pre_class_str = self._html_class_str_from_tag("pre")
|
2069
|
+
code_class_str = self._html_class_str_from_tag("code")
|
1997
2070
|
|
1998
|
-
|
1999
|
-
code_class_str = ' class="%s language-%s"' % (lexer_name, lexer_name)
|
2000
|
-
else:
|
2001
|
-
code_class_str = self._html_class_str_from_tag("code")
|
2002
|
-
|
2003
|
-
if is_fenced_code_block:
|
2004
|
-
# Fenced code blocks need to be outdented before encoding, and then reapplied
|
2005
|
-
leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
|
2006
|
-
if codeblock:
|
2007
|
-
# only run the codeblock through the outdenter if not empty
|
2008
|
-
leading_indent, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
|
2009
|
-
|
2010
|
-
codeblock = self._encode_code(codeblock)
|
2011
|
-
|
2012
|
-
if lexer_name == 'mermaid' and 'mermaid' in self.extras:
|
2013
|
-
return '\n%s<pre class="mermaid-pre"><div class="mermaid">%s\n</div></pre>\n' % (
|
2014
|
-
leading_indent, codeblock)
|
2015
|
-
|
2016
|
-
return "\n%s<pre%s><code%s>%s\n</code></pre>\n" % (
|
2017
|
-
leading_indent, pre_class_str, code_class_str, codeblock)
|
2018
|
-
else:
|
2019
|
-
codeblock = self._encode_code(codeblock)
|
2020
|
-
|
2021
|
-
return "\n<pre%s><code%s>%s\n</code></pre>\n" % (
|
2022
|
-
pre_class_str, code_class_str, codeblock)
|
2023
|
-
|
2024
|
-
def _code_block_with_lexer_sub(self, codeblock, leading_indent, lexer, is_fenced_code_block):
|
2025
|
-
if is_fenced_code_block:
|
2026
|
-
formatter_opts = self.extras['fenced-code-blocks'] or {}
|
2027
|
-
else:
|
2028
|
-
formatter_opts = {}
|
2029
|
-
|
2030
|
-
def unhash_code(codeblock):
|
2031
|
-
for key, sanitized in list(self.html_spans.items()):
|
2032
|
-
codeblock = codeblock.replace(key, sanitized)
|
2033
|
-
replacements = [
|
2034
|
-
("&", "&"),
|
2035
|
-
("<", "<"),
|
2036
|
-
(">", ">")
|
2037
|
-
]
|
2038
|
-
for old, new in replacements:
|
2039
|
-
codeblock = codeblock.replace(old, new)
|
2040
|
-
return codeblock
|
2041
|
-
# remove leading indent from code block
|
2042
|
-
_, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
|
2043
|
-
|
2044
|
-
codeblock = unhash_code(codeblock)
|
2045
|
-
colored = self._color_with_pygments(codeblock, lexer,
|
2046
|
-
**formatter_opts)
|
2071
|
+
codeblock = self._encode_code(codeblock)
|
2047
2072
|
|
2048
|
-
|
2049
|
-
|
2073
|
+
return "\n<pre%s><code%s>%s\n</code></pre>\n" % (
|
2074
|
+
pre_class_str, code_class_str, codeblock)
|
2050
2075
|
|
2051
|
-
def _html_class_str_from_tag(self, tag):
|
2076
|
+
def _html_class_str_from_tag(self, tag: str) -> str:
|
2052
2077
|
"""Get the appropriate ' class="..."' string (note the leading
|
2053
2078
|
space), if any, for the given tag.
|
2054
2079
|
"""
|
@@ -2064,7 +2089,8 @@ class Markdown(object):
|
|
2064
2089
|
return ' class="%s"' % html_classes_from_tag[tag]
|
2065
2090
|
return ""
|
2066
2091
|
|
2067
|
-
|
2092
|
+
@mark_stage(Stage.CODE_BLOCKS)
|
2093
|
+
def _do_code_blocks(self, text: str) -> str:
|
2068
2094
|
"""Process Markdown `<pre><code>` blocks."""
|
2069
2095
|
code_block_re = re.compile(r'''
|
2070
2096
|
(?:\n\n|\A\n?)
|
@@ -2082,20 +2108,6 @@ class Markdown(object):
|
|
2082
2108
|
re.M | re.X)
|
2083
2109
|
return code_block_re.sub(self._code_block_sub, text)
|
2084
2110
|
|
2085
|
-
_fenced_code_block_re = re.compile(r'''
|
2086
|
-
(?:\n+|\A\n?|(?<=\n))
|
2087
|
-
(^[ \t]*`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n # $1 = opening fence (captured for back-referencing), $2 = optional lang
|
2088
|
-
(.*?) # $3 = code block content
|
2089
|
-
\1[ \t]*\n # closing fence
|
2090
|
-
''', re.M | re.X | re.S)
|
2091
|
-
|
2092
|
-
def _fenced_code_block_sub(self, match):
|
2093
|
-
return self._code_block_sub(match, is_fenced_code_block=True)
|
2094
|
-
|
2095
|
-
def _do_fenced_code_blocks(self, text):
|
2096
|
-
"""Process ```-fenced unindented code blocks ('fenced-code-blocks' extra)."""
|
2097
|
-
return self._fenced_code_block_re.sub(self._fenced_code_block_sub, text)
|
2098
|
-
|
2099
2111
|
# Rules for a code span:
|
2100
2112
|
# - backslash escapes are not interpreted in a code span
|
2101
2113
|
# - to include one or or a run of more backticks the delimiters must
|
@@ -2114,12 +2126,13 @@ class Markdown(object):
|
|
2114
2126
|
(?!`)
|
2115
2127
|
''', re.X | re.S)
|
2116
2128
|
|
2117
|
-
def _code_span_sub(self, match):
|
2129
|
+
def _code_span_sub(self, match: re.Match) -> str:
|
2118
2130
|
c = match.group(2).strip(" \t")
|
2119
2131
|
c = self._encode_code(c)
|
2120
2132
|
return "<code%s>%s</code>" % (self._html_class_str_from_tag("code"), c)
|
2121
2133
|
|
2122
|
-
|
2134
|
+
@mark_stage(Stage.CODE_SPANS)
|
2135
|
+
def _do_code_spans(self, text: str) -> str:
|
2123
2136
|
# * Backtick quotes are used for <code></code> spans.
|
2124
2137
|
#
|
2125
2138
|
# * You can use multiple backticks as the delimiters if you want to
|
@@ -2144,7 +2157,7 @@ class Markdown(object):
|
|
2144
2157
|
# ... type <code>`bar`</code> ...
|
2145
2158
|
return self._code_span_re.sub(self._code_span_sub, text)
|
2146
2159
|
|
2147
|
-
def _encode_code(self, text):
|
2160
|
+
def _encode_code(self, text: str) -> str:
|
2148
2161
|
"""Encode/escape certain characters inside Markdown code runs.
|
2149
2162
|
The point is that in code, these characters are literals,
|
2150
2163
|
and lose their special Markdown meanings.
|
@@ -2163,160 +2176,14 @@ class Markdown(object):
|
|
2163
2176
|
self._code_table[text] = hashed
|
2164
2177
|
return hashed
|
2165
2178
|
|
2166
|
-
|
2167
|
-
|
2168
|
-
if match.group(2) != 'wavedrom':
|
2169
|
-
return match.string[match.start():match.end()]
|
2170
|
-
|
2171
|
-
# dedent the block for processing
|
2172
|
-
lead_indent, waves = self._uniform_outdent(match.group(3))
|
2173
|
-
# default tags to wrap the wavedrom block in
|
2174
|
-
open_tag, close_tag = '<script type="WaveDrom">\n', '</script>'
|
2175
|
-
|
2176
|
-
# check if the user would prefer to have the SVG embedded directly
|
2177
|
-
if not isinstance(self.extras['wavedrom'], dict):
|
2178
|
-
embed_svg = True
|
2179
|
-
else:
|
2180
|
-
# default behaviour is to embed SVGs
|
2181
|
-
embed_svg = self.extras['wavedrom'].get('prefer_embed_svg', True)
|
2182
|
-
|
2183
|
-
if embed_svg:
|
2184
|
-
try:
|
2185
|
-
import wavedrom
|
2186
|
-
waves = wavedrom.render(waves).tostring()
|
2187
|
-
open_tag, close_tag = '<div>', '\n</div>'
|
2188
|
-
except ImportError:
|
2189
|
-
pass
|
2190
|
-
|
2191
|
-
# hash SVG to prevent <> chars being messed with
|
2192
|
-
self._escape_table[waves] = _hash_text(waves)
|
2193
|
-
|
2194
|
-
return self._uniform_indent(
|
2195
|
-
'\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag),
|
2196
|
-
lead_indent, include_empty_lines=True
|
2197
|
-
)
|
2198
|
-
|
2199
|
-
def _do_wavedrom_blocks(self, text):
|
2200
|
-
return self._fenced_code_block_re.sub(self._wavedrom_block_sub, text)
|
2201
|
-
|
2202
|
-
_admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
|
2203
|
-
_admonitions_re = re.compile(r'''
|
2204
|
-
^(\ *)\.\.\ (%s)::\ * # $1 leading indent, $2 the admonition
|
2205
|
-
(.*)? # $3 admonition title
|
2206
|
-
((?:\s*\n\1\ {3,}.*)+?) # $4 admonition body (required)
|
2207
|
-
(?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S)) # until EOF, 3 blank lines or something less indented
|
2208
|
-
''' % _admonitions,
|
2209
|
-
re.IGNORECASE | re.MULTILINE | re.VERBOSE
|
2210
|
-
)
|
2211
|
-
|
2212
|
-
def _do_admonitions_sub(self, match):
|
2213
|
-
lead_indent, admonition_name, title, body = match.groups()
|
2214
|
-
|
2215
|
-
admonition_type = '<strong>%s</strong>' % admonition_name
|
2216
|
-
|
2217
|
-
# figure out the class names to assign the block
|
2218
|
-
if admonition_name.lower() == 'admonition':
|
2219
|
-
admonition_class = 'admonition'
|
2220
|
-
else:
|
2221
|
-
admonition_class = 'admonition %s' % admonition_name.lower()
|
2222
|
-
|
2223
|
-
# titles are generally optional
|
2224
|
-
if title:
|
2225
|
-
title = '<em>%s</em>' % title
|
2226
|
-
|
2227
|
-
# process the admonition body like regular markdown
|
2228
|
-
body = self._run_block_gamut("\n%s\n" % self._uniform_outdent(body)[1])
|
2229
|
-
|
2230
|
-
# indent the body before placing inside the aside block
|
2231
|
-
admonition = self._uniform_indent('%s\n%s\n\n%s\n' % (admonition_type, title, body), self.tab, False)
|
2232
|
-
# wrap it in an aside
|
2233
|
-
admonition = '<aside class="%s">\n%s</aside>' % (admonition_class, admonition)
|
2234
|
-
# now indent the whole admonition back to where it started
|
2235
|
-
return self._uniform_indent(admonition, lead_indent, False)
|
2236
|
-
|
2237
|
-
def _do_admonitions(self, text):
|
2238
|
-
return self._admonitions_re.sub(self._do_admonitions_sub, text)
|
2239
|
-
|
2240
|
-
_strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
|
2241
|
-
def _do_strike(self, text):
|
2242
|
-
text = self._strike_re.sub(r"<s>\1</s>", text)
|
2243
|
-
return text
|
2244
|
-
|
2245
|
-
_underline_re = re.compile(r"(?<!<!)--(?!>)(?=\S)(.+?)(?<=\S)(?<!<!)--(?!>)", re.S)
|
2246
|
-
def _do_underline(self, text):
|
2247
|
-
text = self._underline_re.sub(r"<u>\1</u>", text)
|
2248
|
-
return text
|
2249
|
-
|
2250
|
-
_tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
|
2251
|
-
def _do_tg_spoiler(self, text):
|
2252
|
-
text = self._tg_spoiler_re.sub(r"<tg-spoiler>\1</tg-spoiler>", text)
|
2253
|
-
return text
|
2179
|
+
_strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]?)(?<=\S)\1", re.S)
|
2180
|
+
_em_re = re.compile(r"(\*|_)(?=\S)(.*?\S)\1", re.S)
|
2254
2181
|
|
2255
|
-
|
2256
|
-
|
2257
|
-
_code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
|
2258
|
-
_code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
|
2259
|
-
def _do_italics_and_bold(self, text):
|
2182
|
+
@mark_stage(Stage.ITALIC_AND_BOLD)
|
2183
|
+
def _do_italics_and_bold(self, text: str) -> str:
|
2260
2184
|
# <strong> must go first:
|
2261
|
-
|
2262
|
-
|
2263
|
-
text = self._code_friendly_em_re.sub(r"<em>\1</em>", text)
|
2264
|
-
else:
|
2265
|
-
text = self._strong_re.sub(r"<strong>\2</strong>", text)
|
2266
|
-
text = self._em_re.sub(r"<em>\2</em>", text)
|
2267
|
-
return text
|
2268
|
-
|
2269
|
-
# "smarty-pants" extra: Very liberal in interpreting a single prime as an
|
2270
|
-
# apostrophe; e.g. ignores the fact that "round", "bout", "twer", and
|
2271
|
-
# "twixt" can be written without an initial apostrophe. This is fine because
|
2272
|
-
# using scare quotes (single quotation marks) is rare.
|
2273
|
-
_apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
|
2274
|
-
_contractions = ["tis", "twas", "twer", "neath", "o", "n",
|
2275
|
-
"round", "bout", "twixt", "nuff", "fraid", "sup"]
|
2276
|
-
def _do_smart_contractions(self, text):
|
2277
|
-
text = self._apostrophe_year_re.sub(r"’\1", text)
|
2278
|
-
for c in self._contractions:
|
2279
|
-
text = text.replace("'%s" % c, "’%s" % c)
|
2280
|
-
text = text.replace("'%s" % c.capitalize(),
|
2281
|
-
"’%s" % c.capitalize())
|
2282
|
-
return text
|
2283
|
-
|
2284
|
-
# Substitute double-quotes before single-quotes.
|
2285
|
-
_opening_single_quote_re = re.compile(r"(?<!\S)'(?=\S)")
|
2286
|
-
_opening_double_quote_re = re.compile(r'(?<!\S)"(?=\S)')
|
2287
|
-
_closing_single_quote_re = re.compile(r"(?<=\S)'")
|
2288
|
-
_closing_double_quote_re = re.compile(r'(?<=\S)"(?=(\s|,|;|\.|\?|!|$))')
|
2289
|
-
def _do_smart_punctuation(self, text):
|
2290
|
-
"""Fancifies 'single quotes', "double quotes", and apostrophes.
|
2291
|
-
Converts --, ---, and ... into en dashes, em dashes, and ellipses.
|
2292
|
-
|
2293
|
-
Inspiration is: <http://daringfireball.net/projects/smartypants/>
|
2294
|
-
See "test/tm-cases/smarty_pants.text" for a full discussion of the
|
2295
|
-
support here and
|
2296
|
-
<http://code.google.com/p/python-markdown2/issues/detail?id=42> for a
|
2297
|
-
discussion of some diversion from the original SmartyPants.
|
2298
|
-
"""
|
2299
|
-
if "'" in text: # guard for perf
|
2300
|
-
text = self._do_smart_contractions(text)
|
2301
|
-
text = self._opening_single_quote_re.sub("‘", text)
|
2302
|
-
text = self._closing_single_quote_re.sub("’", text)
|
2303
|
-
|
2304
|
-
if '"' in text: # guard for perf
|
2305
|
-
text = self._opening_double_quote_re.sub("“", text)
|
2306
|
-
text = self._closing_double_quote_re.sub("”", text)
|
2307
|
-
|
2308
|
-
text = text.replace("---", "—")
|
2309
|
-
text = text.replace("--", "–")
|
2310
|
-
text = text.replace("...", "…")
|
2311
|
-
text = text.replace(" . . . ", "…")
|
2312
|
-
text = text.replace(". . .", "…")
|
2313
|
-
|
2314
|
-
# TODO: Temporary hack to fix https://github.com/trentm/python-markdown2/issues/150
|
2315
|
-
if "footnotes" in self.extras and "footnote-ref" in text:
|
2316
|
-
# Quotes in the footnote back ref get converted to "smart" quotes
|
2317
|
-
# Change them back here to ensure they work.
|
2318
|
-
text = text.replace('class="footnote-ref”', 'class="footnote-ref"')
|
2319
|
-
|
2185
|
+
text = self._strong_re.sub(r"<strong>\2</strong>", text)
|
2186
|
+
text = self._em_re.sub(r"<em>\2</em>", text)
|
2320
2187
|
return text
|
2321
2188
|
|
2322
2189
|
_block_quote_base = r'''
|
@@ -2334,10 +2201,10 @@ class Markdown(object):
|
|
2334
2201
|
_bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
|
2335
2202
|
_bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
|
2336
2203
|
_html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
|
2337
|
-
def _dedent_two_spaces_sub(self, match):
|
2204
|
+
def _dedent_two_spaces_sub(self, match: re.Match) -> str:
|
2338
2205
|
return re.sub(r'(?m)^ ', '', match.group(1))
|
2339
2206
|
|
2340
|
-
def _block_quote_sub(self, match):
|
2207
|
+
def _block_quote_sub(self, match: re.Match) -> str:
|
2341
2208
|
bq = match.group(1)
|
2342
2209
|
is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq)
|
2343
2210
|
# trim one level of quoting
|
@@ -2358,7 +2225,8 @@ class Markdown(object):
|
|
2358
2225
|
else:
|
2359
2226
|
return '<blockquote>\n%s\n</blockquote>\n\n' % bq
|
2360
2227
|
|
2361
|
-
|
2228
|
+
@mark_stage(Stage.BLOCK_QUOTES)
|
2229
|
+
def _do_block_quotes(self, text: str) -> str:
|
2362
2230
|
if '>' not in text:
|
2363
2231
|
return text
|
2364
2232
|
if 'spoiler' in self.extras:
|
@@ -2366,7 +2234,8 @@ class Markdown(object):
|
|
2366
2234
|
else:
|
2367
2235
|
return self._block_quote_re.sub(self._block_quote_sub, text)
|
2368
2236
|
|
2369
|
-
|
2237
|
+
@mark_stage(Stage.PARAGRAPHS)
|
2238
|
+
def _form_paragraphs(self, text: str) -> str:
|
2370
2239
|
# Strip leading and trailing lines:
|
2371
2240
|
text = text.strip('\n')
|
2372
2241
|
|
@@ -2396,8 +2265,13 @@ class Markdown(object):
|
|
2396
2265
|
):
|
2397
2266
|
start = li.start()
|
2398
2267
|
cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
|
2399
|
-
|
2400
|
-
|
2268
|
+
if re.match(r'^<(?:ul|ol).*?>', cuddled_list):
|
2269
|
+
graf = graf[:start]
|
2270
|
+
else:
|
2271
|
+
# Not quite a cuddled list. (See not_quite_a_list_cuddled_lists test case)
|
2272
|
+
# Store as a simple paragraph.
|
2273
|
+
graf = cuddled_list
|
2274
|
+
cuddled_list = None
|
2401
2275
|
|
2402
2276
|
# Wrap <p> tags.
|
2403
2277
|
graf = self._run_span_gamut(graf)
|
@@ -2408,7 +2282,7 @@ class Markdown(object):
|
|
2408
2282
|
|
2409
2283
|
return "\n\n".join(grafs)
|
2410
2284
|
|
2411
|
-
def _add_footnotes(self, text):
|
2285
|
+
def _add_footnotes(self, text: str) -> str:
|
2412
2286
|
if self.footnotes:
|
2413
2287
|
footer = [
|
2414
2288
|
'<div class="footnotes">',
|
@@ -2421,6 +2295,10 @@ class Markdown(object):
|
|
2421
2295
|
if not self.footnote_return_symbol:
|
2422
2296
|
self.footnote_return_symbol = "↩"
|
2423
2297
|
|
2298
|
+
# self.footnotes is generated in _strip_footnote_definitions, which runs re.sub on the whole
|
2299
|
+
# text. This means that the dict keys are inserted in order of appearance. Use the dict to
|
2300
|
+
# sort footnote ids by that same order
|
2301
|
+
self.footnote_ids.sort(key=lambda a: list(self.footnotes.keys()).index(a))
|
2424
2302
|
for i, id in enumerate(self.footnote_ids):
|
2425
2303
|
if i != 0:
|
2426
2304
|
footer.append('')
|
@@ -2455,7 +2333,7 @@ class Markdown(object):
|
|
2455
2333
|
_naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
|
2456
2334
|
_naked_gt_re = re.compile(r'''(?<![a-z0-9?!/'"-])>''', re.I)
|
2457
2335
|
|
2458
|
-
def _encode_amps_and_angles(self, text):
|
2336
|
+
def _encode_amps_and_angles(self, text: str) -> str:
|
2459
2337
|
# Smart processing for ampersands and angle brackets that need
|
2460
2338
|
# to be encoded.
|
2461
2339
|
text = _AMPERSAND_RE.sub('&', text)
|
@@ -2469,9 +2347,9 @@ class Markdown(object):
|
|
2469
2347
|
text = self._naked_gt_re.sub('>', text)
|
2470
2348
|
return text
|
2471
2349
|
|
2472
|
-
_incomplete_tags_re = re.compile(r"<(
|
2350
|
+
_incomplete_tags_re = re.compile(r"<(!--|/?\w+?(?!\w)\s*?.+?(?:[\s/]+?|$))")
|
2473
2351
|
|
2474
|
-
def _encode_incomplete_tags(self, text):
|
2352
|
+
def _encode_incomplete_tags(self, text: str) -> str:
|
2475
2353
|
if self.safe_mode not in ("replace", "escape"):
|
2476
2354
|
return text
|
2477
2355
|
|
@@ -2483,13 +2361,13 @@ class Markdown(object):
|
|
2483
2361
|
|
2484
2362
|
return self._incomplete_tags_re.sub(incomplete_tags_sub, text)
|
2485
2363
|
|
2486
|
-
def _encode_backslash_escapes(self, text):
|
2364
|
+
def _encode_backslash_escapes(self, text: str) -> str:
|
2487
2365
|
for ch, escape in list(self._escape_table.items()):
|
2488
2366
|
text = text.replace("\\"+ch, escape)
|
2489
2367
|
return text
|
2490
2368
|
|
2491
2369
|
_auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
|
2492
|
-
def _auto_link_sub(self, match):
|
2370
|
+
def _auto_link_sub(self, match: re.Match) -> str:
|
2493
2371
|
g1 = match.group(1)
|
2494
2372
|
return '<a href="%s">%s</a>' % (self._protect_url(g1), g1)
|
2495
2373
|
|
@@ -2503,16 +2381,16 @@ class Markdown(object):
|
|
2503
2381
|
)
|
2504
2382
|
>
|
2505
2383
|
""", re.I | re.X | re.U)
|
2506
|
-
def _auto_email_link_sub(self, match):
|
2384
|
+
def _auto_email_link_sub(self, match: re.Match) -> str:
|
2507
2385
|
return self._encode_email_address(
|
2508
2386
|
self._unescape_special_chars(match.group(1)))
|
2509
2387
|
|
2510
|
-
def _do_auto_links(self, text):
|
2388
|
+
def _do_auto_links(self, text: str) -> str:
|
2511
2389
|
text = self._auto_link_re.sub(self._auto_link_sub, text)
|
2512
2390
|
text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
|
2513
2391
|
return text
|
2514
2392
|
|
2515
|
-
def _encode_email_address(self, addr):
|
2393
|
+
def _encode_email_address(self, addr: str) -> str:
|
2516
2394
|
# Input: an email address, e.g. "foo@example.com"
|
2517
2395
|
#
|
2518
2396
|
# Output: the email address as a mailto link, with each character
|
@@ -2532,88 +2410,40 @@ class Markdown(object):
|
|
2532
2410
|
% (''.join(chars), ''.join(chars[7:]))
|
2533
2411
|
return addr
|
2534
2412
|
|
2535
|
-
|
2536
|
-
def _do_link_patterns(self, text):
|
2537
|
-
link_from_hash = {}
|
2538
|
-
for regex, repl in self.link_patterns:
|
2539
|
-
replacements = []
|
2540
|
-
for match in regex.finditer(text):
|
2541
|
-
if any(self._match_overlaps_substr(text, match, h) for h in link_from_hash):
|
2542
|
-
continue
|
2543
|
-
|
2544
|
-
if hasattr(repl, "__call__"):
|
2545
|
-
href = repl(match)
|
2546
|
-
else:
|
2547
|
-
href = match.expand(repl)
|
2548
|
-
replacements.append((match.span(), href))
|
2549
|
-
for (start, end), href in reversed(replacements):
|
2550
|
-
|
2551
|
-
# Do not match against links inside brackets.
|
2552
|
-
if text[start - 1:start] == '[' and text[end:end + 1] == ']':
|
2553
|
-
continue
|
2554
|
-
|
2555
|
-
# Do not match against links in the standard markdown syntax.
|
2556
|
-
if text[start - 2:start] == '](' or text[end:end + 2] == '")':
|
2557
|
-
continue
|
2558
|
-
|
2559
|
-
# Do not match against links which are escaped.
|
2560
|
-
if text[start - 3:start] == '"""' and text[end:end + 3] == '"""':
|
2561
|
-
text = text[:start - 3] + text[start:end] + text[end + 3:]
|
2562
|
-
continue
|
2563
|
-
|
2564
|
-
# search the text for anything that looks like a link
|
2565
|
-
is_inside_link = False
|
2566
|
-
for link_re in (self._auto_link_re, self._basic_link_re):
|
2567
|
-
for match in link_re.finditer(text):
|
2568
|
-
if any((r[0] <= start and end <= r[1]) for r in match.regs):
|
2569
|
-
# if the link pattern start and end pos is within the bounds of
|
2570
|
-
# something that looks like a link, then don't process it
|
2571
|
-
is_inside_link = True
|
2572
|
-
break
|
2573
|
-
else:
|
2574
|
-
continue
|
2575
|
-
break
|
2576
|
-
|
2577
|
-
if is_inside_link:
|
2578
|
-
continue
|
2579
|
-
|
2580
|
-
escaped_href = (
|
2581
|
-
href.replace('"', '"') # b/c of attr quote
|
2582
|
-
# To avoid markdown <em> and <strong>:
|
2583
|
-
.replace('*', self._escape_table['*'])
|
2584
|
-
.replace('_', self._escape_table['_']))
|
2585
|
-
link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
|
2586
|
-
hash = _hash_text(link)
|
2587
|
-
link_from_hash[hash] = link
|
2588
|
-
text = text[:start] + hash + text[end:]
|
2589
|
-
for hash, link in list(link_from_hash.items()):
|
2590
|
-
text = text.replace(hash, link)
|
2591
|
-
return text
|
2592
|
-
|
2593
|
-
def _unescape_special_chars(self, text):
|
2413
|
+
def _unescape_special_chars(self, text: str) -> str:
|
2594
2414
|
# Swap back in all the special characters we've hidden.
|
2415
|
+
hashmap = tuple(self._escape_table.items()) + tuple(self._code_table.items())
|
2416
|
+
# html_blocks table is in format {hash: item} compared to usual {item: hash}
|
2417
|
+
hashmap += tuple(tuple(reversed(i)) for i in self.html_blocks.items())
|
2595
2418
|
while True:
|
2596
2419
|
orig_text = text
|
2597
|
-
for ch, hash in
|
2420
|
+
for ch, hash in hashmap:
|
2598
2421
|
text = text.replace(hash, ch)
|
2599
2422
|
if text == orig_text:
|
2600
2423
|
break
|
2601
2424
|
return text
|
2602
2425
|
|
2603
|
-
def _outdent(self, text):
|
2426
|
+
def _outdent(self, text: str) -> str:
|
2604
2427
|
# Remove one level of line-leading tabs or spaces
|
2605
2428
|
return self._outdent_re.sub('', text)
|
2606
2429
|
|
2607
|
-
|
2608
|
-
|
2609
|
-
|
2610
|
-
|
2611
|
-
|
2612
|
-
|
2613
|
-
|
2430
|
+
@staticmethod
|
2431
|
+
def _uniform_outdent(
|
2432
|
+
text: str,
|
2433
|
+
min_outdent: Optional[str] = None,
|
2434
|
+
max_outdent: Optional[str] = None
|
2435
|
+
) -> Tuple[str, str]:
|
2436
|
+
'''
|
2437
|
+
Removes the smallest common leading indentation from each (non empty)
|
2438
|
+
line of `text` and returns said indent along with the outdented text.
|
2439
|
+
|
2440
|
+
Args:
|
2441
|
+
min_outdent: make sure the smallest common whitespace is at least this size
|
2442
|
+
max_outdent: the maximum amount a line can be outdented by
|
2443
|
+
'''
|
2614
2444
|
|
2615
2445
|
# find the leading whitespace for every line
|
2616
|
-
whitespace = [
|
2446
|
+
whitespace: List[Union[str, None]] = [
|
2617
2447
|
re.findall(r'^[ \t]*', line)[0] if line else None
|
2618
2448
|
for line in text.splitlines()
|
2619
2449
|
]
|
@@ -2644,14 +2474,34 @@ class Markdown(object):
|
|
2644
2474
|
|
2645
2475
|
return outdent, ''.join(outdented)
|
2646
2476
|
|
2647
|
-
|
2648
|
-
|
2649
|
-
|
2650
|
-
|
2651
|
-
|
2477
|
+
@staticmethod
|
2478
|
+
def _uniform_indent(
|
2479
|
+
text: str,
|
2480
|
+
indent: str,
|
2481
|
+
include_empty_lines: bool = False,
|
2482
|
+
indent_empty_lines: bool = False
|
2483
|
+
) -> str:
|
2484
|
+
'''
|
2485
|
+
Uniformly indent a block of text by a fixed amount
|
2486
|
+
|
2487
|
+
Args:
|
2488
|
+
text: the text to indent
|
2489
|
+
indent: a string containing the indent to apply
|
2490
|
+
include_empty_lines: don't remove whitespace only lines
|
2491
|
+
indent_empty_lines: indent whitespace only lines with the rest of the text
|
2492
|
+
'''
|
2493
|
+
blocks = []
|
2494
|
+
for line in text.splitlines(True):
|
2495
|
+
if line.strip() or indent_empty_lines:
|
2496
|
+
blocks.append(indent + line)
|
2497
|
+
elif include_empty_lines:
|
2498
|
+
blocks.append(line)
|
2499
|
+
else:
|
2500
|
+
blocks.append('')
|
2501
|
+
return ''.join(blocks)
|
2652
2502
|
|
2653
2503
|
@staticmethod
|
2654
|
-
def _match_overlaps_substr(text, match, substr):
|
2504
|
+
def _match_overlaps_substr(text, match: re.Match, substr: str) -> bool:
|
2655
2505
|
'''
|
2656
2506
|
Checks if a regex match overlaps with a substring in the given text.
|
2657
2507
|
'''
|
@@ -2676,58 +2526,1093 @@ class MarkdownWithExtras(Markdown):
|
|
2676
2526
|
- link-patterns (because you need to specify some actual
|
2677
2527
|
link-patterns anyway)
|
2678
2528
|
"""
|
2679
|
-
extras = ["footnotes", "fenced-code-blocks"]
|
2529
|
+
extras = ["footnotes", "fenced-code-blocks"] # type: ignore
|
2680
2530
|
|
2681
2531
|
|
2682
|
-
#
|
2532
|
+
# ----------------------------------------------------------
|
2533
|
+
# Extras
|
2534
|
+
# ----------------------------------------------------------
|
2683
2535
|
|
2536
|
+
# Base classes
|
2537
|
+
# ----------------------------------------------------------
|
2684
2538
|
|
2685
|
-
|
2686
|
-
|
2539
|
+
class Extra(ABC):
|
2540
|
+
_registry: Dict[str, Type['Extra']] = {}
|
2541
|
+
_exec_order: Dict[Stage, Tuple[List[Type['Extra']], List[Type['Extra']]]] = {}
|
2687
2542
|
|
2688
|
-
|
2689
|
-
|
2690
|
-
|
2691
|
-
|
2543
|
+
name: str
|
2544
|
+
'''
|
2545
|
+
An identifiable name that users can use to invoke the extra
|
2546
|
+
in the Markdown class
|
2547
|
+
'''
|
2548
|
+
order: Tuple[Collection[Union[Stage, Type['Extra']]], Collection[Union[Stage, Type['Extra']]]]
|
2549
|
+
'''
|
2550
|
+
Tuple of two iterables containing the stages/extras this extra will run before and
|
2551
|
+
after, respectively
|
2552
|
+
'''
|
2692
2553
|
|
2693
|
-
def
|
2694
|
-
|
2695
|
-
|
2696
|
-
|
2697
|
-
|
2698
|
-
|
2699
|
-
|
2700
|
-
|
2701
|
-
elif level == h_stack[-1]:
|
2702
|
-
lines[-1] += "</li>"
|
2703
|
-
else:
|
2704
|
-
while level < h_stack[-1]:
|
2705
|
-
h_stack.pop()
|
2706
|
-
if not lines[-1].endswith("</li>"):
|
2707
|
-
lines[-1] += "</li>"
|
2708
|
-
lines.append("%s</ul></li>" % indent())
|
2709
|
-
lines.append('%s<li><a href="#%s">%s</a>' % (
|
2710
|
-
indent(), id, name))
|
2711
|
-
while len(h_stack) > 1:
|
2712
|
-
h_stack.pop()
|
2713
|
-
if not lines[-1].endswith("</li>"):
|
2714
|
-
lines[-1] += "</li>"
|
2715
|
-
lines.append("%s</ul>" % indent())
|
2716
|
-
return '\n'.join(lines) + '\n'
|
2554
|
+
def __init__(self, md: Markdown, options: Optional[dict]):
|
2555
|
+
'''
|
2556
|
+
Args:
|
2557
|
+
md: An instance of `Markdown`
|
2558
|
+
options: a dict of settings to alter the extra's behaviour
|
2559
|
+
'''
|
2560
|
+
self.md = md
|
2561
|
+
self.options = options if options is not None else {}
|
2717
2562
|
|
2563
|
+
@classmethod
|
2564
|
+
def deregister(cls):
|
2565
|
+
'''
|
2566
|
+
Removes the class from the extras registry and unsets its execution order.
|
2567
|
+
'''
|
2568
|
+
if cls.name in cls._registry:
|
2569
|
+
del cls._registry[cls.name]
|
2718
2570
|
|
2719
|
-
|
2720
|
-
|
2721
|
-
|
2722
|
-
|
2723
|
-
|
2724
|
-
metadata = None
|
2725
|
-
toc_html = None
|
2571
|
+
for exec_order in Extra._exec_order.values():
|
2572
|
+
# find everywhere this extra is mentioned and remove it
|
2573
|
+
for section in exec_order:
|
2574
|
+
while cls in section:
|
2575
|
+
section.remove(cls)
|
2726
2576
|
|
2727
|
-
|
2728
|
-
|
2729
|
-
|
2730
|
-
|
2577
|
+
@classmethod
|
2578
|
+
def register(cls):
|
2579
|
+
'''
|
2580
|
+
Registers the class for use with `Markdown` and calculates its execution order based on
|
2581
|
+
the `order` class attribute.
|
2582
|
+
'''
|
2583
|
+
cls._registry[cls.name] = cls
|
2584
|
+
|
2585
|
+
for index, item in enumerate((*cls.order[0], *cls.order[1])):
|
2586
|
+
before = index < len(cls.order[0])
|
2587
|
+
if not isinstance(item, Stage) and issubclass(item, Extra):
|
2588
|
+
# eg: FencedCodeBlocks
|
2589
|
+
for exec_orders in Extra._exec_order.values():
|
2590
|
+
# insert this extra everywhere the other one is mentioned
|
2591
|
+
for section in exec_orders:
|
2592
|
+
if item in section:
|
2593
|
+
to_index = section.index(item)
|
2594
|
+
if not before:
|
2595
|
+
to_index += 1
|
2596
|
+
section.insert(to_index, cls)
|
2597
|
+
else:
|
2598
|
+
# eg: Stage.PREPROCESS
|
2599
|
+
Extra._exec_order.setdefault(item, ([], []))
|
2600
|
+
if cls in Extra._exec_order[item][0 if before else 1]:
|
2601
|
+
# extra is already runnig after this stage. Don't duplicate that effort
|
2602
|
+
continue
|
2603
|
+
if before:
|
2604
|
+
Extra._exec_order[item][0].insert(0, cls)
|
2605
|
+
else:
|
2606
|
+
Extra._exec_order[item][1].append(cls)
|
2607
|
+
|
2608
|
+
@abstractmethod
|
2609
|
+
def run(self, text: str) -> str:
|
2610
|
+
'''
|
2611
|
+
Run the extra against the given text.
|
2612
|
+
|
2613
|
+
Returns:
|
2614
|
+
The new text after being modified by the extra
|
2615
|
+
'''
|
2616
|
+
...
|
2617
|
+
|
2618
|
+
def test(self, text: str) -> bool:
|
2619
|
+
'''
|
2620
|
+
Check a section of markdown to see if this extra should be run upon it.
|
2621
|
+
The default implementation will always return True but it's recommended to override
|
2622
|
+
this behaviour to improve performance.
|
2623
|
+
'''
|
2624
|
+
return True
|
2625
|
+
|
2626
|
+
|
2627
|
+
class ItalicAndBoldProcessor(Extra):
|
2628
|
+
'''
|
2629
|
+
An ABC that provides hooks for dealing with italics and bold syntax.
|
2630
|
+
This class is set to trigger both before AND after the italics and bold stage.
|
2631
|
+
This allows any child classes to intercept instances of bold or italic syntax and
|
2632
|
+
change the output or hash it to prevent it from being processed.
|
2633
|
+
|
2634
|
+
After the I&B stage any hashes in the `hash_tables` instance variable are replaced.
|
2635
|
+
'''
|
2636
|
+
name = 'italic-and-bold-processor'
|
2637
|
+
order = (Stage.ITALIC_AND_BOLD,), (Stage.ITALIC_AND_BOLD,)
|
2638
|
+
|
2639
|
+
strong_re = Markdown._strong_re
|
2640
|
+
em_re = Markdown._em_re
|
2641
|
+
|
2642
|
+
def __init__(self, md: Markdown, options: dict):
|
2643
|
+
super().__init__(md, options)
|
2644
|
+
self.hash_table = {}
|
2645
|
+
|
2646
|
+
def run(self, text):
|
2647
|
+
if self.md.order < Stage.ITALIC_AND_BOLD:
|
2648
|
+
text = self.strong_re.sub(self.sub, text)
|
2649
|
+
text = self.em_re.sub(self.sub, text)
|
2650
|
+
else:
|
2651
|
+
# push any hashed values back, using a while loop to deal with recursive hashes
|
2652
|
+
orig_text = ''
|
2653
|
+
while orig_text != text:
|
2654
|
+
orig_text = text
|
2655
|
+
for key, substr in self.hash_table.items():
|
2656
|
+
text = text.replace(key, substr)
|
2657
|
+
return text
|
2658
|
+
|
2659
|
+
@abstractmethod
|
2660
|
+
def sub(self, match: re.Match) -> str:
|
2661
|
+
# do nothing. Let `Markdown._do_italics_and_bold` do its thing later
|
2662
|
+
return match.string[match.start(): match.end()]
|
2663
|
+
|
2664
|
+
def sub_hash(self, match: re.Match) -> str:
|
2665
|
+
substr = match.string[match.start(): match.end()]
|
2666
|
+
key = _hash_text(substr)
|
2667
|
+
self.hash_table[key] = substr
|
2668
|
+
return key
|
2669
|
+
|
2670
|
+
def test(self, text):
|
2671
|
+
if self.md.order < Stage.ITALIC_AND_BOLD:
|
2672
|
+
return '*' in text or '_' in text
|
2673
|
+
return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
|
2674
|
+
|
2675
|
+
# User facing extras
|
2676
|
+
# ----------------------------------------------------------
|
2677
|
+
|
2678
|
+
|
2679
|
+
class Admonitions(Extra):
|
2680
|
+
'''
|
2681
|
+
Enable parsing of RST admonitions
|
2682
|
+
'''
|
2683
|
+
|
2684
|
+
name = 'admonitions'
|
2685
|
+
order = (Stage.BLOCK_GAMUT, Stage.LINK_DEFS), ()
|
2686
|
+
|
2687
|
+
admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
|
2688
|
+
|
2689
|
+
admonitions_re = re.compile(r'''
|
2690
|
+
^(\ *)\.\.\ (%s)::\ * # $1 leading indent, $2 the admonition
|
2691
|
+
(.*)? # $3 admonition title
|
2692
|
+
((?:\s*\n\1\ {3,}.*)+?) # $4 admonition body (required)
|
2693
|
+
(?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S)) # until EOF, 3 blank lines or something less indented
|
2694
|
+
''' % admonitions,
|
2695
|
+
re.IGNORECASE | re.MULTILINE | re.VERBOSE
|
2696
|
+
)
|
2697
|
+
|
2698
|
+
def test(self, text):
|
2699
|
+
return self.admonitions_re.search(text) is not None
|
2700
|
+
|
2701
|
+
def sub(self, match: re.Match) -> str:
|
2702
|
+
lead_indent, admonition_name, title, body = match.groups()
|
2703
|
+
|
2704
|
+
admonition_type = '<strong>%s</strong>' % admonition_name
|
2705
|
+
|
2706
|
+
# figure out the class names to assign the block
|
2707
|
+
if admonition_name.lower() == 'admonition':
|
2708
|
+
admonition_class = 'admonition'
|
2709
|
+
else:
|
2710
|
+
admonition_class = 'admonition %s' % admonition_name.lower()
|
2711
|
+
|
2712
|
+
# titles are generally optional
|
2713
|
+
if title:
|
2714
|
+
title = '<em>%s</em>' % title
|
2715
|
+
|
2716
|
+
# process the admonition body like regular markdown
|
2717
|
+
body = self.md._run_block_gamut("\n%s\n" % self.md._uniform_outdent(body)[1])
|
2718
|
+
|
2719
|
+
# indent the body before placing inside the aside block
|
2720
|
+
admonition = self.md._uniform_indent(
|
2721
|
+
'%s\n%s\n\n%s\n' % (admonition_type, title, body),
|
2722
|
+
self.md.tab, False
|
2723
|
+
)
|
2724
|
+
# wrap it in an aside
|
2725
|
+
admonition = '<aside class="%s">\n%s</aside>' % (admonition_class, admonition)
|
2726
|
+
# now indent the whole admonition back to where it started
|
2727
|
+
return self.md._uniform_indent(admonition, lead_indent, False)
|
2728
|
+
|
2729
|
+
def run(self, text):
|
2730
|
+
return self.admonitions_re.sub(self.sub, text)
|
2731
|
+
|
2732
|
+
|
2733
|
+
class Alerts(Extra):
|
2734
|
+
'''
|
2735
|
+
Markdown Alerts as per
|
2736
|
+
https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
|
2737
|
+
'''
|
2738
|
+
|
2739
|
+
name = 'alerts'
|
2740
|
+
order = (), (Stage.BLOCK_QUOTES, )
|
2741
|
+
|
2742
|
+
alert_re = re.compile(r'''
|
2743
|
+
<blockquote>\s*
|
2744
|
+
<p>
|
2745
|
+
\[!(?P<type>NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]
|
2746
|
+
(?P<closing_tag></p>[ \t]*\n?)?
|
2747
|
+
(?P<contents>[\s\S]+?)
|
2748
|
+
</blockquote>
|
2749
|
+
''', re.X
|
2750
|
+
)
|
2751
|
+
|
2752
|
+
def test(self, text):
|
2753
|
+
return "<blockquote>" in text
|
2754
|
+
|
2755
|
+
def sub(self, match: re.Match) -> str:
|
2756
|
+
typ = match["type"].lower()
|
2757
|
+
heading = f"<em>{match['type'].title()}</em>"
|
2758
|
+
contents = match["contents"].strip()
|
2759
|
+
if match["closing_tag"]:
|
2760
|
+
return f'<div class="alert {typ}">\n{heading}\n{contents}\n</div>'
|
2761
|
+
else:
|
2762
|
+
return f'<div class="alert {typ}">\n{heading}\n<p>{contents}\n</div>'
|
2763
|
+
|
2764
|
+
def run(self, text):
|
2765
|
+
return self.alert_re.sub(self.sub, text)
|
2766
|
+
|
2767
|
+
|
2768
|
+
class _BreaksExtraOpts(TypedDict, total=False):
|
2769
|
+
'''Options for the `Breaks` extra'''
|
2770
|
+
on_backslash: bool
|
2771
|
+
'''Replace backslashes at the end of a line with <br>'''
|
2772
|
+
on_newline: bool
|
2773
|
+
'''Replace single new line characters with <br> when True'''
|
2774
|
+
|
2775
|
+
|
2776
|
+
class Breaks(Extra):
|
2777
|
+
name = 'breaks'
|
2778
|
+
order = (), (Stage.ITALIC_AND_BOLD,)
|
2779
|
+
options: _BreaksExtraOpts
|
2780
|
+
|
2781
|
+
def run(self, text):
|
2782
|
+
on_backslash = self.options.get('on_backslash', False)
|
2783
|
+
on_newline = self.options.get('on_newline', False)
|
2784
|
+
|
2785
|
+
if on_backslash and on_newline:
|
2786
|
+
pattern = r' *\\?'
|
2787
|
+
elif on_backslash:
|
2788
|
+
pattern = r'(?: *\\| {2,})'
|
2789
|
+
elif on_newline:
|
2790
|
+
pattern = r' *'
|
2791
|
+
else:
|
2792
|
+
pattern = r' {2,}'
|
2793
|
+
|
2794
|
+
break_tag = "<br%s\n" % self.md.empty_element_suffix
|
2795
|
+
text = re.sub(pattern + r"\n(?!\<(?:\/?(ul|ol|li))\>)", break_tag, text)
|
2796
|
+
|
2797
|
+
return text
|
2798
|
+
|
2799
|
+
|
2800
|
+
class CodeFriendly(ItalicAndBoldProcessor):
|
2801
|
+
'''
|
2802
|
+
Disable _ and __ for em and strong.
|
2803
|
+
'''
|
2804
|
+
name = 'code-friendly'
|
2805
|
+
|
2806
|
+
def sub(self, match: re.Match) -> str:
|
2807
|
+
syntax = match.group(1)
|
2808
|
+
text: str = match.string[match.start(): match.end()]
|
2809
|
+
if '_' in syntax:
|
2810
|
+
# if using _this_ syntax, hash the whole thing so that it doesn't get processed
|
2811
|
+
key = _hash_text(text)
|
2812
|
+
self.hash_table[key] = text
|
2813
|
+
return key
|
2814
|
+
elif '_' in text:
|
2815
|
+
# if the text within the bold/em markers contains '_' then hash those contents to protect them from em_re
|
2816
|
+
text = text[len(syntax): -len(syntax)]
|
2817
|
+
key = _hash_text(text)
|
2818
|
+
self.hash_table[key] = text
|
2819
|
+
return syntax + key + syntax
|
2820
|
+
# if no underscores are present, the text is fine and we can just leave it alone
|
2821
|
+
return super().sub(match)
|
2822
|
+
|
2823
|
+
|
2824
|
+
class FencedCodeBlocks(Extra):
|
2825
|
+
'''
|
2826
|
+
Allows a code block to not have to be indented
|
2827
|
+
by fencing it with '```' on a line before and after. Based on
|
2828
|
+
<http://github.github.com/github-flavored-markdown/> with support for
|
2829
|
+
syntax highlighting.
|
2830
|
+
'''
|
2831
|
+
|
2832
|
+
name = 'fenced-code-blocks'
|
2833
|
+
order = (Stage.LINK_DEFS, Stage.BLOCK_GAMUT), (Stage.PREPROCESS,)
|
2834
|
+
|
2835
|
+
fenced_code_block_re = re.compile(r'''
|
2836
|
+
(?:\n+|\A\n?|(?<=\n))
|
2837
|
+
(^[ \t]*`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n # $1 = opening fence (captured for back-referencing), $2 = optional lang
|
2838
|
+
(.*?) # $3 = code block content
|
2839
|
+
\1[ \t]*\n # closing fence
|
2840
|
+
''', re.M | re.X | re.S)
|
2841
|
+
|
2842
|
+
def test(self, text):
|
2843
|
+
if '```' not in text:
|
2844
|
+
return False
|
2845
|
+
if self.md.stage == Stage.PREPROCESS and not self.md.safe_mode:
|
2846
|
+
return True
|
2847
|
+
if self.md.stage == Stage.LINK_DEFS and self.md.safe_mode:
|
2848
|
+
return True
|
2849
|
+
return self.md.stage == Stage.BLOCK_GAMUT
|
2850
|
+
|
2851
|
+
def _code_block_with_lexer_sub(
|
2852
|
+
self,
|
2853
|
+
codeblock: str,
|
2854
|
+
leading_indent: str,
|
2855
|
+
lexer
|
2856
|
+
) -> str:
|
2857
|
+
'''
|
2858
|
+
Args:
|
2859
|
+
codeblock: the codeblock to format
|
2860
|
+
leading_indent: the indentation to prefix the block with
|
2861
|
+
lexer (pygments.Lexer): the lexer to use
|
2862
|
+
'''
|
2863
|
+
formatter_opts = self.md.extras['fenced-code-blocks'] or {}
|
2864
|
+
|
2865
|
+
def unhash_code(codeblock):
|
2866
|
+
for key, sanitized in list(self.md.html_spans.items()):
|
2867
|
+
codeblock = codeblock.replace(key, sanitized)
|
2868
|
+
replacements = [
|
2869
|
+
("&", "&"),
|
2870
|
+
("<", "<"),
|
2871
|
+
(">", ">")
|
2872
|
+
]
|
2873
|
+
for old, new in replacements:
|
2874
|
+
codeblock = codeblock.replace(old, new)
|
2875
|
+
return codeblock
|
2876
|
+
# remove leading indent from code block
|
2877
|
+
_, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
|
2878
|
+
|
2879
|
+
codeblock = unhash_code(codeblock)
|
2880
|
+
colored = self.md._color_with_pygments(codeblock, lexer,
|
2881
|
+
**formatter_opts)
|
2882
|
+
|
2883
|
+
# add back the indent to all lines
|
2884
|
+
return "\n%s\n" % self.md._uniform_indent(colored, leading_indent, True)
|
2885
|
+
|
2886
|
+
def tags(self, lexer_name: str) -> Tuple[str, str]:
|
2887
|
+
'''
|
2888
|
+
Returns the tags that the encoded code block will be wrapped in, based
|
2889
|
+
upon the lexer name.
|
2890
|
+
|
2891
|
+
This function can be overridden by subclasses to piggy-back off of the
|
2892
|
+
fenced code blocks syntax (see `Mermaid` extra).
|
2893
|
+
|
2894
|
+
Returns:
|
2895
|
+
The opening and closing tags, as strings within a tuple
|
2896
|
+
'''
|
2897
|
+
pre_class = self.md._html_class_str_from_tag('pre')
|
2898
|
+
if "highlightjs-lang" in self.md.extras and lexer_name:
|
2899
|
+
code_class = ' class="%s language-%s"' % (lexer_name, lexer_name)
|
2900
|
+
else:
|
2901
|
+
code_class = self.md._html_class_str_from_tag('code')
|
2902
|
+
return ('<pre%s><code%s>' % (pre_class, code_class), '</code></pre>')
|
2903
|
+
|
2904
|
+
def sub(self, match: re.Match) -> str:
|
2905
|
+
lexer_name = match.group(2)
|
2906
|
+
codeblock = match.group(3)
|
2907
|
+
codeblock = codeblock[:-1] # drop one trailing newline
|
2908
|
+
|
2909
|
+
# Use pygments only if not using the highlightjs-lang extra
|
2910
|
+
if lexer_name and "highlightjs-lang" not in self.md.extras:
|
2911
|
+
lexer = self.md._get_pygments_lexer(lexer_name)
|
2912
|
+
if lexer:
|
2913
|
+
leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
|
2914
|
+
return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer)
|
2915
|
+
|
2916
|
+
# Fenced code blocks need to be outdented before encoding, and then reapplied
|
2917
|
+
leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
|
2918
|
+
if codeblock:
|
2919
|
+
# only run the codeblock through the outdenter if not empty
|
2920
|
+
leading_indent, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
|
2921
|
+
|
2922
|
+
codeblock = self.md._encode_code(codeblock)
|
2923
|
+
|
2924
|
+
tags = self.tags(lexer_name)
|
2925
|
+
|
2926
|
+
return "\n%s%s%s\n%s%s\n" % (leading_indent, tags[0], codeblock, leading_indent, tags[1])
|
2927
|
+
|
2928
|
+
def run(self, text):
|
2929
|
+
return self.fenced_code_block_re.sub(self.sub, text)
|
2930
|
+
|
2931
|
+
|
2932
|
+
class Latex(Extra):
|
2933
|
+
'''
|
2934
|
+
Convert $ and $$ to <math> and </math> tags for inline and block math.
|
2935
|
+
'''
|
2936
|
+
name = 'latex'
|
2937
|
+
order = (Stage.CODE_BLOCKS, FencedCodeBlocks), ()
|
2938
|
+
|
2939
|
+
_single_dollar_re = re.compile(r'(?<!\$)\$(?!\$)(.*?)\$')
|
2940
|
+
_double_dollar_re = re.compile(r'\$\$(.*?)\$\$', re.DOTALL)
|
2941
|
+
|
2942
|
+
# Ways to escape
|
2943
|
+
_pre_code_block_re = re.compile(r"<pre>(.*?)</pre>", re.DOTALL) # Wraped in <pre>
|
2944
|
+
_triple_re = re.compile(r'```(.*?)```', re.DOTALL) # Wrapped in a code block ```
|
2945
|
+
_single_re = re.compile(r'(?<!`)(`)(.*?)(?<!`)\1(?!`)') # Wrapped in a single `
|
2946
|
+
|
2947
|
+
converter = None
|
2948
|
+
code_blocks = {}
|
2949
|
+
|
2950
|
+
def _convert_single_match(self, match):
|
2951
|
+
return self.converter.convert(match.group(1))
|
2952
|
+
|
2953
|
+
def _convert_double_match(self, match):
|
2954
|
+
return self.converter.convert(match.group(1).replace(r"\n", ''), display="block")
|
2955
|
+
|
2956
|
+
def code_placeholder(self, match):
|
2957
|
+
placeholder = f"<!--CODE_BLOCK_{len(self.code_blocks)}-->"
|
2958
|
+
self.code_blocks[placeholder] = match.group(0)
|
2959
|
+
return placeholder
|
2960
|
+
|
2961
|
+
def run(self, text):
|
2962
|
+
try:
|
2963
|
+
import latex2mathml.converter
|
2964
|
+
self.converter = latex2mathml.converter
|
2965
|
+
except ImportError:
|
2966
|
+
raise ImportError('The "latex" extra requires the "latex2mathml" package to be installed.')
|
2967
|
+
|
2968
|
+
# Escape by replacing with a code block
|
2969
|
+
text = self._pre_code_block_re.sub(self.code_placeholder, text)
|
2970
|
+
text = self._single_re.sub(self.code_placeholder, text)
|
2971
|
+
text = self._triple_re.sub(self.code_placeholder, text)
|
2972
|
+
|
2973
|
+
text = self._single_dollar_re.sub(self._convert_single_match, text)
|
2974
|
+
text = self._double_dollar_re.sub(self._convert_double_match, text)
|
2975
|
+
|
2976
|
+
# Convert placeholder tag back to original code
|
2977
|
+
for placeholder, code_block in self.code_blocks.items():
|
2978
|
+
text = text.replace(placeholder, code_block)
|
2979
|
+
|
2980
|
+
return text
|
2981
|
+
|
2982
|
+
|
2983
|
+
class LinkPatterns(Extra):
|
2984
|
+
'''
|
2985
|
+
Auto-link given regex patterns in text (e.g. bug number
|
2986
|
+
references, revision number references).
|
2987
|
+
'''
|
2988
|
+
name = 'link-patterns'
|
2989
|
+
order = (Stage.LINKS,), ()
|
2990
|
+
options: _link_patterns
|
2991
|
+
|
2992
|
+
_basic_link_re = re.compile(r'!?\[.*?\]\(.*?\)')
|
2993
|
+
|
2994
|
+
def run(self, text):
|
2995
|
+
link_from_hash = {}
|
2996
|
+
for regex, repl in self.options:
|
2997
|
+
replacements = []
|
2998
|
+
for match in regex.finditer(text):
|
2999
|
+
if any(self.md._match_overlaps_substr(text, match, h) for h in link_from_hash):
|
3000
|
+
continue
|
3001
|
+
|
3002
|
+
if callable(repl):
|
3003
|
+
href = repl(match)
|
3004
|
+
else:
|
3005
|
+
href = match.expand(repl)
|
3006
|
+
replacements.append((match.span(), href))
|
3007
|
+
for (start, end), href in reversed(replacements):
|
3008
|
+
|
3009
|
+
# Do not match against links inside brackets.
|
3010
|
+
if text[start - 1:start] == '[' and text[end:end + 1] == ']':
|
3011
|
+
continue
|
3012
|
+
|
3013
|
+
# Do not match against links in the standard markdown syntax.
|
3014
|
+
if text[start - 2:start] == '](' or text[end:end + 2] == '")':
|
3015
|
+
continue
|
3016
|
+
|
3017
|
+
# Do not match against links which are escaped.
|
3018
|
+
if text[start - 3:start] == '"""' and text[end:end + 3] == '"""':
|
3019
|
+
text = text[:start - 3] + text[start:end] + text[end + 3:]
|
3020
|
+
continue
|
3021
|
+
|
3022
|
+
# search the text for anything that looks like a link
|
3023
|
+
is_inside_link = False
|
3024
|
+
for link_re in (self.md._auto_link_re, self._basic_link_re):
|
3025
|
+
for match in link_re.finditer(text):
|
3026
|
+
if any((r[0] <= start and end <= r[1]) for r in match.regs):
|
3027
|
+
# if the link pattern start and end pos is within the bounds of
|
3028
|
+
# something that looks like a link, then don't process it
|
3029
|
+
is_inside_link = True
|
3030
|
+
break
|
3031
|
+
else:
|
3032
|
+
continue
|
3033
|
+
break
|
3034
|
+
|
3035
|
+
if is_inside_link:
|
3036
|
+
continue
|
3037
|
+
|
3038
|
+
escaped_href = (
|
3039
|
+
href.replace('"', '"') # b/c of attr quote
|
3040
|
+
# To avoid markdown <em> and <strong>:
|
3041
|
+
.replace('*', self.md._escape_table['*'])
|
3042
|
+
.replace('_', self.md._escape_table['_']))
|
3043
|
+
link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
|
3044
|
+
hash = _hash_text(link)
|
3045
|
+
link_from_hash[hash] = link
|
3046
|
+
text = text[:start] + hash + text[end:]
|
3047
|
+
for hash, link in list(link_from_hash.items()):
|
3048
|
+
text = text.replace(hash, link)
|
3049
|
+
return text
|
3050
|
+
|
3051
|
+
def test(self, text):
|
3052
|
+
return True
|
3053
|
+
|
3054
|
+
|
3055
|
+
class MarkdownInHTML(Extra):
|
3056
|
+
'''
|
3057
|
+
Allow the use of `markdown="1"` in a block HTML tag to
|
3058
|
+
have markdown processing be done on its contents. Similar to
|
3059
|
+
<http://michelf.com/projects/php-markdown/extra/#markdown-attr> but with
|
3060
|
+
some limitations.
|
3061
|
+
'''
|
3062
|
+
name = 'markdown-in-html'
|
3063
|
+
order = (), (Stage.HASH_HTML,)
|
3064
|
+
|
3065
|
+
def run(self, text):
|
3066
|
+
def callback(block):
|
3067
|
+
indent, block = self.md._uniform_outdent(block)
|
3068
|
+
block = self.md._hash_html_block_sub(block)
|
3069
|
+
block = self.md._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False)
|
3070
|
+
return block
|
3071
|
+
|
3072
|
+
return self.md._strict_tag_block_sub(text, self.md._block_tags_a, callback, True)
|
3073
|
+
|
3074
|
+
def test(self, text):
|
3075
|
+
return True
|
3076
|
+
|
3077
|
+
|
3078
|
+
class Mermaid(FencedCodeBlocks):
|
3079
|
+
name = 'mermaid'
|
3080
|
+
order = (FencedCodeBlocks,), ()
|
3081
|
+
|
3082
|
+
def tags(self, lexer_name):
|
3083
|
+
if lexer_name == 'mermaid':
|
3084
|
+
return ('<pre class="mermaid-pre"><div class="mermaid">', '</div></pre>')
|
3085
|
+
return super().tags(lexer_name)
|
3086
|
+
|
3087
|
+
|
3088
|
+
class MiddleWordEm(ItalicAndBoldProcessor):
|
3089
|
+
'''
|
3090
|
+
Allows or disallows emphasis syntax in the middle of words,
|
3091
|
+
defaulting to allow. Disabling this means that `this_text_here` will not be
|
3092
|
+
converted to `this<em>text</em>here`.
|
3093
|
+
'''
|
3094
|
+
name = 'middle-word-em'
|
3095
|
+
order = (CodeFriendly,), (Stage.ITALIC_AND_BOLD,)
|
3096
|
+
|
3097
|
+
def __init__(self, md: Markdown, options: Union[dict, bool]):
|
3098
|
+
'''
|
3099
|
+
Args:
|
3100
|
+
md: the markdown instance
|
3101
|
+
options: can be bool for backwards compatibility but will be converted to a dict
|
3102
|
+
in the constructor. All options are:
|
3103
|
+
- allowed (bool): whether to allow emphasis in the middle of a word.
|
3104
|
+
If `options` is a bool it will be placed under this key.
|
3105
|
+
'''
|
3106
|
+
if isinstance(options, bool):
|
3107
|
+
options = {'allowed': options}
|
3108
|
+
options.setdefault('allowed', True)
|
3109
|
+
super().__init__(md, options)
|
3110
|
+
|
3111
|
+
self.liberal_em_re = self.em_re
|
3112
|
+
if not options['allowed']:
|
3113
|
+
self.em_re = re.compile(r'(?<=\b)%s(?=\b)' % self.liberal_em_re.pattern, self.liberal_em_re.flags)
|
3114
|
+
|
3115
|
+
def run(self, text):
|
3116
|
+
# run strong and whatnot first
|
3117
|
+
# this also will process all strict ems
|
3118
|
+
text = super().run(text)
|
3119
|
+
if self.md.order < self.md.stage:
|
3120
|
+
# hash all non-valid ems
|
3121
|
+
text = self.liberal_em_re.sub(self.sub_hash, text)
|
3122
|
+
return text
|
3123
|
+
|
3124
|
+
def sub(self, match: re.Match) -> str:
|
3125
|
+
syntax = match.group(1)
|
3126
|
+
if len(syntax) != 1:
|
3127
|
+
# strong syntax
|
3128
|
+
return super().sub(match)
|
3129
|
+
return '<em>%s</em>' % match.group(2)
|
3130
|
+
|
3131
|
+
|
3132
|
+
class Numbering(Extra):
|
3133
|
+
'''
|
3134
|
+
Support of generic counters. Non standard extension to
|
3135
|
+
allow sequential numbering of figures, tables, equations, exhibits etc.
|
3136
|
+
'''
|
3137
|
+
|
3138
|
+
name = 'numbering'
|
3139
|
+
order = (Stage.LINK_DEFS,), ()
|
3140
|
+
|
3141
|
+
def run(self, text):
|
3142
|
+
# First pass to define all the references
|
3143
|
+
regex_defns = re.compile(r'''
|
3144
|
+
\[\#(\w+) # the counter. Open square plus hash plus a word \1
|
3145
|
+
([^@]*) # Some optional characters, that aren't an @. \2
|
3146
|
+
@(\w+) # the id. Should this be normed? \3
|
3147
|
+
([^\]]*)\] # The rest of the text up to the terminating ] \4
|
3148
|
+
''', re.VERBOSE)
|
3149
|
+
regex_subs = re.compile(r"\[@(\w+)\s*\]") # [@ref_id]
|
3150
|
+
counters = {}
|
3151
|
+
references = {}
|
3152
|
+
replacements = []
|
3153
|
+
definition_html = '<figcaption class="{}" id="counter-ref-{}">{}{}{}</figcaption>'
|
3154
|
+
reference_html = '<a class="{}" href="#counter-ref-{}">{}</a>'
|
3155
|
+
for match in regex_defns.finditer(text):
|
3156
|
+
# We must have four match groups otherwise this isn't a numbering reference
|
3157
|
+
if len(match.groups()) != 4:
|
3158
|
+
continue
|
3159
|
+
counter = match.group(1)
|
3160
|
+
text_before = match.group(2).strip()
|
3161
|
+
ref_id = match.group(3)
|
3162
|
+
text_after = match.group(4)
|
3163
|
+
number = counters.get(counter, 1)
|
3164
|
+
references[ref_id] = (number, counter)
|
3165
|
+
replacements.append((match.start(0),
|
3166
|
+
definition_html.format(counter,
|
3167
|
+
ref_id,
|
3168
|
+
text_before,
|
3169
|
+
number,
|
3170
|
+
text_after),
|
3171
|
+
match.end(0)))
|
3172
|
+
counters[counter] = number + 1
|
3173
|
+
for repl in reversed(replacements):
|
3174
|
+
text = text[:repl[0]] + repl[1] + text[repl[2]:]
|
3175
|
+
|
3176
|
+
# Second pass to replace the references with the right
|
3177
|
+
# value of the counter
|
3178
|
+
# Fwiw, it's vaguely annoying to have to turn the iterator into
|
3179
|
+
# a list and then reverse it but I can't think of a better thing to do.
|
3180
|
+
for match in reversed(list(regex_subs.finditer(text))):
|
3181
|
+
number, counter = references.get(match.group(1), (None, None))
|
3182
|
+
if number is not None:
|
3183
|
+
repl = reference_html.format(counter,
|
3184
|
+
match.group(1),
|
3185
|
+
number)
|
3186
|
+
else:
|
3187
|
+
repl = reference_html.format(match.group(1),
|
3188
|
+
'countererror',
|
3189
|
+
'?' + match.group(1) + '?')
|
3190
|
+
if "smarty-pants" in self.md.extras:
|
3191
|
+
repl = repl.replace('"', self.md._escape_table['"'])
|
3192
|
+
|
3193
|
+
text = text[:match.start()] + repl + text[match.end():]
|
3194
|
+
return text
|
3195
|
+
|
3196
|
+
|
3197
|
+
class PyShell(Extra):
|
3198
|
+
'''
|
3199
|
+
Treats unindented Python interactive shell sessions as <code>
|
3200
|
+
blocks.
|
3201
|
+
'''
|
3202
|
+
|
3203
|
+
name = 'pyshell'
|
3204
|
+
order = (), (Stage.LISTS,)
|
3205
|
+
|
3206
|
+
def test(self, text):
|
3207
|
+
return ">>>" in text
|
3208
|
+
|
3209
|
+
def sub(self, match: re.Match) -> str:
|
3210
|
+
if "fenced-code-blocks" in self.md.extras:
|
3211
|
+
dedented = _dedent(match.group(0))
|
3212
|
+
return self.md.extra_classes['fenced-code-blocks'].run("```pycon\n" + dedented + "```\n")
|
3213
|
+
|
3214
|
+
lines = match.group(0).splitlines(0)
|
3215
|
+
_dedentlines(lines)
|
3216
|
+
indent = ' ' * self.md.tab_width
|
3217
|
+
s = ('\n' # separate from possible cuddled paragraph
|
3218
|
+
+ indent + ('\n'+indent).join(lines)
|
3219
|
+
+ '\n')
|
3220
|
+
return s
|
3221
|
+
|
3222
|
+
def run(self, text):
|
3223
|
+
less_than_tab = self.md.tab_width - 1
|
3224
|
+
_pyshell_block_re = re.compile(r"""
|
3225
|
+
^([ ]{0,%d})>>>[ ].*\n # first line
|
3226
|
+
^(\1[^\S\n]*\S.*\n)* # any number of subsequent lines with at least one character
|
3227
|
+
(?=^\1?\n|\Z) # ends with a blank line or end of document
|
3228
|
+
""" % less_than_tab, re.M | re.X)
|
3229
|
+
|
3230
|
+
return _pyshell_block_re.sub(self.sub, text)
|
3231
|
+
|
3232
|
+
|
3233
|
+
class SmartyPants(Extra):
|
3234
|
+
'''
|
3235
|
+
Replaces ' and " with curly quotation marks or curly
|
3236
|
+
apostrophes. Replaces --, ---, ..., and . . . with en dashes, em dashes,
|
3237
|
+
and ellipses.
|
3238
|
+
'''
|
3239
|
+
name = 'smarty-pants'
|
3240
|
+
order = (), (Stage.SPAN_GAMUT,)
|
3241
|
+
|
3242
|
+
_opening_single_quote_re = re.compile(r"(?<!\S)'(?=\S)")
|
3243
|
+
_opening_double_quote_re = re.compile(r'(?<!\S)"(?=\S)')
|
3244
|
+
_closing_single_quote_re = re.compile(r"(?<=\S)'")
|
3245
|
+
_closing_double_quote_re = re.compile(r'(?<=\S)"(?=(\s|,|;|\.|\?|!|$))')
|
3246
|
+
# "smarty-pants" extra: Very liberal in interpreting a single prime as an
|
3247
|
+
# apostrophe; e.g. ignores the fact that "round", "bout", "twer", and
|
3248
|
+
# "twixt" can be written without an initial apostrophe. This is fine because
|
3249
|
+
# using scare quotes (single quotation marks) is rare.
|
3250
|
+
_apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
|
3251
|
+
_contractions = ["tis", "twas", "twer", "neath", "o", "n",
|
3252
|
+
"round", "bout", "twixt", "nuff", "fraid", "sup"]
|
3253
|
+
|
3254
|
+
|
3255
|
+
def contractions(self, text: str) -> str:
|
3256
|
+
text = self._apostrophe_year_re.sub(r"’\1", text)
|
3257
|
+
for c in self._contractions:
|
3258
|
+
text = text.replace("'%s" % c, "’%s" % c)
|
3259
|
+
text = text.replace("'%s" % c.capitalize(),
|
3260
|
+
"’%s" % c.capitalize())
|
3261
|
+
return text
|
3262
|
+
|
3263
|
+
def run(self, text):
|
3264
|
+
"""Fancifies 'single quotes', "double quotes", and apostrophes.
|
3265
|
+
Converts --, ---, and ... into en dashes, em dashes, and ellipses.
|
3266
|
+
|
3267
|
+
Inspiration is: <http://daringfireball.net/projects/smartypants/>
|
3268
|
+
See "test/tm-cases/smarty_pants.text" for a full discussion of the
|
3269
|
+
support here and
|
3270
|
+
<http://code.google.com/p/python-markdown2/issues/detail?id=42> for a
|
3271
|
+
discussion of some diversion from the original SmartyPants.
|
3272
|
+
"""
|
3273
|
+
if "'" in text: # guard for perf
|
3274
|
+
text = self.contractions(text)
|
3275
|
+
text = self._opening_single_quote_re.sub("‘", text)
|
3276
|
+
text = self._closing_single_quote_re.sub("’", text)
|
3277
|
+
|
3278
|
+
if '"' in text: # guard for perf
|
3279
|
+
text = self._opening_double_quote_re.sub("“", text)
|
3280
|
+
text = self._closing_double_quote_re.sub("”", text)
|
3281
|
+
|
3282
|
+
text = text.replace("---", "—")
|
3283
|
+
text = text.replace("--", "–")
|
3284
|
+
text = text.replace("...", "…")
|
3285
|
+
text = text.replace(" . . . ", "…")
|
3286
|
+
text = text.replace(". . .", "…")
|
3287
|
+
|
3288
|
+
# TODO: Temporary hack to fix https://github.com/trentm/python-markdown2/issues/150
|
3289
|
+
if "footnotes" in self.md.extras and "footnote-ref" in text:
|
3290
|
+
# Quotes in the footnote back ref get converted to "smart" quotes
|
3291
|
+
# Change them back here to ensure they work.
|
3292
|
+
text = text.replace('class="footnote-ref”', 'class="footnote-ref"')
|
3293
|
+
|
3294
|
+
return text
|
3295
|
+
|
3296
|
+
def test(self, text):
|
3297
|
+
return "'" in text or '"' in text
|
3298
|
+
|
3299
|
+
|
3300
|
+
class Strike(Extra):
|
3301
|
+
'''
|
3302
|
+
Text inside of double tilde is ~~strikethrough~~
|
3303
|
+
'''
|
3304
|
+
name = 'strike'
|
3305
|
+
order = (Stage.ITALIC_AND_BOLD,), ()
|
3306
|
+
|
3307
|
+
_strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
|
3308
|
+
|
3309
|
+
def run(self, text):
|
3310
|
+
return self._strike_re.sub(r"<s>\1</s>", text)
|
3311
|
+
|
3312
|
+
def test(self, text):
|
3313
|
+
return '~~' in text
|
3314
|
+
|
3315
|
+
|
3316
|
+
class Tables(Extra):
|
3317
|
+
'''
|
3318
|
+
Tables using the same format as GFM
|
3319
|
+
<https://help.github.com/articles/github-flavored-markdown#tables> and
|
3320
|
+
PHP-Markdown Extra <https://michelf.ca/projects/php-markdown/extra/#table>.
|
3321
|
+
'''
|
3322
|
+
name = 'tables'
|
3323
|
+
order = (), (Stage.LISTS,)
|
3324
|
+
|
3325
|
+
def run(self, text):
|
3326
|
+
"""Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
|
3327
|
+
https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
|
3328
|
+
"""
|
3329
|
+
less_than_tab = self.md.tab_width - 1
|
3330
|
+
table_re = re.compile(r'''
|
3331
|
+
(?:(?<=\n)|\A\n?) # leading blank line
|
3332
|
+
|
3333
|
+
^[ ]{0,%d} # allowed whitespace
|
3334
|
+
(.*[|].*)[ ]*\n # $1: header row (at least one pipe)
|
3335
|
+
|
3336
|
+
^[ ]{0,%d} # allowed whitespace
|
3337
|
+
( # $2: underline row
|
3338
|
+
# underline row with leading bar
|
3339
|
+
(?: \|\ *:?-+:?\ * )+ \|? \s?[ ]*\n
|
3340
|
+
|
|
3341
|
+
# or, underline row without leading bar
|
3342
|
+
(?: \ *:?-+:?\ *\| )+ (?: \ *:?-+:?\ * )? \s?[ ]*\n
|
3343
|
+
)
|
3344
|
+
|
3345
|
+
( # $3: data rows
|
3346
|
+
(?:
|
3347
|
+
^[ ]{0,%d}(?!\ ) # ensure line begins with 0 to less_than_tab spaces
|
3348
|
+
.*\|.*[ ]*\n
|
3349
|
+
)+
|
3350
|
+
)
|
3351
|
+
''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
|
3352
|
+
return table_re.sub(self.sub, text)
|
3353
|
+
|
3354
|
+
def sub(self, match: re.Match) -> str:
|
3355
|
+
trim_space_re = '^[ \t\n]+|[ \t\n]+$'
|
3356
|
+
trim_bar_re = r'^\||\|$'
|
3357
|
+
split_bar_re = r'^\||(?<![\`\\])\|'
|
3358
|
+
escape_bar_re = r'\\\|'
|
3359
|
+
|
3360
|
+
head, underline, body = match.groups()
|
3361
|
+
|
3362
|
+
# Determine aligns for columns.
|
3363
|
+
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
|
3364
|
+
align_from_col_idx = {}
|
3365
|
+
for col_idx, col in enumerate(cols):
|
3366
|
+
if col[0] == ':' and col[-1] == ':':
|
3367
|
+
align_from_col_idx[col_idx] = ' style="text-align:center;"'
|
3368
|
+
elif col[0] == ':':
|
3369
|
+
align_from_col_idx[col_idx] = ' style="text-align:left;"'
|
3370
|
+
elif col[-1] == ':':
|
3371
|
+
align_from_col_idx[col_idx] = ' style="text-align:right;"'
|
3372
|
+
|
3373
|
+
# thead
|
3374
|
+
hlines = ['<table%s>' % self.md._html_class_str_from_tag('table'), '<thead%s>' % self.md._html_class_str_from_tag('thead'), '<tr>']
|
3375
|
+
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
|
3376
|
+
for col_idx, col in enumerate(cols):
|
3377
|
+
hlines.append(' <th%s>%s</th>' % (
|
3378
|
+
align_from_col_idx.get(col_idx, ''),
|
3379
|
+
self.md._run_span_gamut(col)
|
3380
|
+
))
|
3381
|
+
hlines.append('</tr>')
|
3382
|
+
hlines.append('</thead>')
|
3383
|
+
|
3384
|
+
# tbody
|
3385
|
+
hlines.append('<tbody>')
|
3386
|
+
for line in body.strip('\n').split('\n'):
|
3387
|
+
hlines.append('<tr>')
|
3388
|
+
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
|
3389
|
+
for col_idx, col in enumerate(cols):
|
3390
|
+
hlines.append(' <td%s>%s</td>' % (
|
3391
|
+
align_from_col_idx.get(col_idx, ''),
|
3392
|
+
self.md._run_span_gamut(col)
|
3393
|
+
))
|
3394
|
+
hlines.append('</tr>')
|
3395
|
+
hlines.append('</tbody>')
|
3396
|
+
hlines.append('</table>')
|
3397
|
+
|
3398
|
+
return '\n'.join(hlines) + '\n'
|
3399
|
+
|
3400
|
+
|
3401
|
+
class TelegramSpoiler(Extra):
|
3402
|
+
name = 'tg-spoiler'
|
3403
|
+
order = (), (Stage.ITALIC_AND_BOLD,)
|
3404
|
+
|
3405
|
+
_tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
|
3406
|
+
|
3407
|
+
def run(self, text):
|
3408
|
+
return self._tg_spoiler_re.sub(r"<tg-spoiler>\1</tg-spoiler>", text)
|
3409
|
+
|
3410
|
+
def test(self, text):
|
3411
|
+
return '||' in text
|
3412
|
+
|
3413
|
+
|
3414
|
+
class Underline(Extra):
|
3415
|
+
'''
|
3416
|
+
Text inside of double dash is --underlined--.
|
3417
|
+
'''
|
3418
|
+
name = 'underline'
|
3419
|
+
order = (Stage.ITALIC_AND_BOLD,), ()
|
3420
|
+
|
3421
|
+
_underline_re = re.compile(r"(?<!<!)--(?!>)(?=\S)(.+?)(?<=\S)(?<!<!)--(?!>)", re.S)
|
3422
|
+
|
3423
|
+
def run(self, text):
|
3424
|
+
return self._underline_re.sub(r"<u>\1</u>", text)
|
3425
|
+
|
3426
|
+
def test(self, text):
|
3427
|
+
return '--' in text
|
3428
|
+
|
3429
|
+
|
3430
|
+
class _WavedromExtraOpts(TypedDict, total=False):
|
3431
|
+
'''Options for the `Wavedrom` extra'''
|
3432
|
+
prefer_embed_svg: bool
|
3433
|
+
'''
|
3434
|
+
Use the `wavedrom` library to convert diagrams to SVGs and embed them directly.
|
3435
|
+
This will only work if the `wavedrom` library has been installed.
|
3436
|
+
|
3437
|
+
Defaults to `True`
|
3438
|
+
'''
|
3439
|
+
|
3440
|
+
|
3441
|
+
class Wavedrom(Extra):
|
3442
|
+
'''
|
3443
|
+
Support for generating Wavedrom digital timing diagrams
|
3444
|
+
'''
|
3445
|
+
name = 'wavedrom'
|
3446
|
+
order = (Stage.CODE_BLOCKS, FencedCodeBlocks), ()
|
3447
|
+
options: _WavedromExtraOpts
|
3448
|
+
|
3449
|
+
def test(self, text):
|
3450
|
+
match = FencedCodeBlocks.fenced_code_block_re.search(text)
|
3451
|
+
return match is None or match.group(2) == 'wavedrom'
|
3452
|
+
|
3453
|
+
def sub(self, match: re.Match) -> str:
|
3454
|
+
# dedent the block for processing
|
3455
|
+
lead_indent, waves = self.md._uniform_outdent(match.group(3))
|
3456
|
+
# default tags to wrap the wavedrom block in
|
3457
|
+
open_tag, close_tag = '<script type="WaveDrom">\n', '</script>'
|
3458
|
+
|
3459
|
+
# check if the user would prefer to have the SVG embedded directly
|
3460
|
+
embed_svg = self.options.get('prefer_embed_svg', True)
|
3461
|
+
|
3462
|
+
if embed_svg:
|
3463
|
+
try:
|
3464
|
+
import wavedrom
|
3465
|
+
waves = wavedrom.render(waves).tostring()
|
3466
|
+
open_tag, close_tag = '<div>', '\n</div>'
|
3467
|
+
except ImportError:
|
3468
|
+
pass
|
3469
|
+
|
3470
|
+
# hash SVG to prevent <> chars being messed with
|
3471
|
+
self.md._escape_table[waves] = _hash_text(waves)
|
3472
|
+
|
3473
|
+
return self.md._uniform_indent(
|
3474
|
+
'\n%s%s%s\n' % (open_tag, self.md._escape_table[waves], close_tag),
|
3475
|
+
lead_indent, include_empty_lines=True
|
3476
|
+
)
|
3477
|
+
|
3478
|
+
def run(self, text):
|
3479
|
+
return FencedCodeBlocks.fenced_code_block_re.sub(self.sub, text)
|
3480
|
+
|
3481
|
+
|
3482
|
+
class WikiTables(Extra):
|
3483
|
+
'''
|
3484
|
+
Google Code Wiki-style tables. See
|
3485
|
+
<http://code.google.com/p/support/wiki/WikiSyntax#Tables>.
|
3486
|
+
'''
|
3487
|
+
name = 'wiki-tables'
|
3488
|
+
order = (Tables,), ()
|
3489
|
+
|
3490
|
+
def run(self, text):
|
3491
|
+
less_than_tab = self.md.tab_width - 1
|
3492
|
+
wiki_table_re = re.compile(r'''
|
3493
|
+
(?:(?<=\n\n)|\A\n?) # leading blank line
|
3494
|
+
^([ ]{0,%d})\|\|.+?\|\|[ ]*\n # first line
|
3495
|
+
(^\1\|\|.+?\|\|\n)* # any number of subsequent lines
|
3496
|
+
''' % less_than_tab, re.M | re.X)
|
3497
|
+
return wiki_table_re.sub(self.sub, text)
|
3498
|
+
|
3499
|
+
def sub(self, match: re.Match) -> str:
|
3500
|
+
ttext = match.group(0).strip()
|
3501
|
+
rows = []
|
3502
|
+
for line in ttext.splitlines(0):
|
3503
|
+
line = line.strip()[2:-2].strip()
|
3504
|
+
row = [c.strip() for c in re.split(r'(?<!\\)\|\|', line)]
|
3505
|
+
rows.append(row)
|
3506
|
+
|
3507
|
+
hlines = []
|
3508
|
+
|
3509
|
+
def add_hline(line, indents=0):
|
3510
|
+
hlines.append((self.md.tab * indents) + line)
|
3511
|
+
|
3512
|
+
def format_cell(text):
|
3513
|
+
return self.md._run_span_gamut(re.sub(r"^\s*~", "", cell).strip(" "))
|
3514
|
+
|
3515
|
+
add_hline('<table%s>' % self.md._html_class_str_from_tag('table'))
|
3516
|
+
# Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
|
3517
|
+
if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
|
3518
|
+
add_hline('<thead%s>' % self.md._html_class_str_from_tag('thead'), 1)
|
3519
|
+
add_hline('<tr>', 2)
|
3520
|
+
for cell in rows[0]:
|
3521
|
+
add_hline("<th>{}</th>".format(format_cell(cell)), 3)
|
3522
|
+
add_hline('</tr>', 2)
|
3523
|
+
add_hline('</thead>', 1)
|
3524
|
+
# Only one header row allowed.
|
3525
|
+
rows = rows[1:]
|
3526
|
+
# If no more rows, don't create a tbody.
|
3527
|
+
if rows:
|
3528
|
+
add_hline('<tbody>', 1)
|
3529
|
+
for row in rows:
|
3530
|
+
add_hline('<tr>', 2)
|
3531
|
+
for cell in row:
|
3532
|
+
add_hline('<td>{}</td>'.format(format_cell(cell)), 3)
|
3533
|
+
add_hline('</tr>', 2)
|
3534
|
+
add_hline('</tbody>', 1)
|
3535
|
+
add_hline('</table>')
|
3536
|
+
return '\n'.join(hlines) + '\n'
|
3537
|
+
|
3538
|
+
def test(self, text):
|
3539
|
+
return '||' in text
|
3540
|
+
|
3541
|
+
|
3542
|
+
# Register extras
|
3543
|
+
Admonitions.register()
|
3544
|
+
Alerts.register()
|
3545
|
+
Breaks.register()
|
3546
|
+
CodeFriendly.register()
|
3547
|
+
FencedCodeBlocks.register()
|
3548
|
+
Latex.register()
|
3549
|
+
LinkPatterns.register()
|
3550
|
+
MarkdownInHTML.register()
|
3551
|
+
MiddleWordEm.register()
|
3552
|
+
Mermaid.register()
|
3553
|
+
Numbering.register()
|
3554
|
+
PyShell.register()
|
3555
|
+
SmartyPants.register()
|
3556
|
+
Strike.register()
|
3557
|
+
Tables.register()
|
3558
|
+
TelegramSpoiler.register()
|
3559
|
+
Underline.register()
|
3560
|
+
Wavedrom.register()
|
3561
|
+
WikiTables.register()
|
3562
|
+
|
3563
|
+
|
3564
|
+
# ----------------------------------------------------------
|
3565
|
+
|
3566
|
+
|
3567
|
+
# ---- internal support functions
|
3568
|
+
|
3569
|
+
|
3570
|
+
def calculate_toc_html(toc: Union[List[Tuple[int, str, str]], None]) -> Optional[str]:
|
3571
|
+
"""Return the HTML for the current TOC.
|
3572
|
+
|
3573
|
+
This expects the `_toc` attribute to have been set on this instance.
|
3574
|
+
"""
|
3575
|
+
if toc is None:
|
3576
|
+
return None
|
3577
|
+
|
3578
|
+
def indent():
|
3579
|
+
return ' ' * (len(h_stack) - 1)
|
3580
|
+
lines = []
|
3581
|
+
h_stack = [0] # stack of header-level numbers
|
3582
|
+
for level, id, name in toc:
|
3583
|
+
if level > h_stack[-1]:
|
3584
|
+
lines.append("%s<ul>" % indent())
|
3585
|
+
h_stack.append(level)
|
3586
|
+
elif level == h_stack[-1]:
|
3587
|
+
lines[-1] += "</li>"
|
3588
|
+
else:
|
3589
|
+
while level < h_stack[-1]:
|
3590
|
+
h_stack.pop()
|
3591
|
+
if not lines[-1].endswith("</li>"):
|
3592
|
+
lines[-1] += "</li>"
|
3593
|
+
lines.append("%s</ul></li>" % indent())
|
3594
|
+
lines.append('%s<li><a href="#%s">%s</a>' % (
|
3595
|
+
indent(), id, name))
|
3596
|
+
while len(h_stack) > 1:
|
3597
|
+
h_stack.pop()
|
3598
|
+
if not lines[-1].endswith("</li>"):
|
3599
|
+
lines[-1] += "</li>"
|
3600
|
+
lines.append("%s</ul>" % indent())
|
3601
|
+
return '\n'.join(lines) + '\n'
|
3602
|
+
|
3603
|
+
|
3604
|
+
class UnicodeWithAttrs(str):
|
3605
|
+
"""A subclass of unicode used for the return value of conversion to
|
3606
|
+
possibly attach some attributes. E.g. the "toc_html" attribute when
|
3607
|
+
the "toc" extra is used.
|
3608
|
+
"""
|
3609
|
+
metadata: Optional[Dict[str, str]] = None
|
3610
|
+
toc_html: Optional[str] = None
|
3611
|
+
|
3612
|
+
## {{{ http://code.activestate.com/recipes/577257/ (r1)
|
3613
|
+
_slugify_strip_re = re.compile(r'[^\w\s-]')
|
3614
|
+
_slugify_hyphenate_re = re.compile(r'[-\s]+')
|
3615
|
+
def _slugify(value: str) -> str:
|
2731
3616
|
"""
|
2732
3617
|
Normalizes string, converts to lowercase, removes non-alpha characters,
|
2733
3618
|
and converts spaces to hyphens.
|
@@ -2735,15 +3620,14 @@ def _slugify(value):
|
|
2735
3620
|
From Django's "django/template/defaultfilters.py".
|
2736
3621
|
"""
|
2737
3622
|
import unicodedata
|
2738
|
-
value = unicodedata.normalize('NFKD', value).encode('
|
3623
|
+
value = unicodedata.normalize('NFKD', value).encode('utf-8', 'ignore').decode()
|
2739
3624
|
value = _slugify_strip_re.sub('', value).strip().lower()
|
2740
3625
|
return _slugify_hyphenate_re.sub('-', value)
|
2741
3626
|
## end of http://code.activestate.com/recipes/577257/ }}}
|
2742
3627
|
|
2743
3628
|
|
2744
3629
|
# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
|
2745
|
-
def _curry(*args, **kwargs):
|
2746
|
-
function, args = args[0], args[1:]
|
3630
|
+
def _curry(function: Callable, *args, **kwargs) -> Callable:
|
2747
3631
|
def result(*rest, **kwrest):
|
2748
3632
|
combined = kwargs.copy()
|
2749
3633
|
combined.update(kwrest)
|
@@ -2752,7 +3636,7 @@ def _curry(*args, **kwargs):
|
|
2752
3636
|
|
2753
3637
|
|
2754
3638
|
# Recipe: regex_from_encoded_pattern (1.0)
|
2755
|
-
def _regex_from_encoded_pattern(s):
|
3639
|
+
def _regex_from_encoded_pattern(s: str) -> re.Pattern:
|
2756
3640
|
"""'foo' -> re.compile(re.escape('foo'))
|
2757
3641
|
'/foo/' -> re.compile('foo')
|
2758
3642
|
'/foo/i' -> re.compile('foo', re.I)
|
@@ -2782,7 +3666,7 @@ def _regex_from_encoded_pattern(s):
|
|
2782
3666
|
|
2783
3667
|
|
2784
3668
|
# Recipe: dedent (0.1.2)
|
2785
|
-
def _dedentlines(lines, tabsize=8, skip_first_line=False):
|
3669
|
+
def _dedentlines(lines: List[str], tabsize: int = 8, skip_first_line: bool = False) -> List[str]:
|
2786
3670
|
"""_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
|
2787
3671
|
|
2788
3672
|
"lines" is a list of lines to dedent.
|
@@ -2800,7 +3684,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
|
|
2800
3684
|
% (tabsize, skip_first_line))
|
2801
3685
|
margin = None
|
2802
3686
|
for i, line in enumerate(lines):
|
2803
|
-
if i == 0 and skip_first_line:
|
3687
|
+
if i == 0 and skip_first_line:
|
3688
|
+
continue
|
2804
3689
|
indent = 0
|
2805
3690
|
for ch in line:
|
2806
3691
|
if ch == ' ':
|
@@ -2813,16 +3698,19 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
|
|
2813
3698
|
break
|
2814
3699
|
else:
|
2815
3700
|
continue # skip all-whitespace lines
|
2816
|
-
if DEBUG:
|
3701
|
+
if DEBUG:
|
3702
|
+
print("dedent: indent=%d: %r" % (indent, line))
|
2817
3703
|
if margin is None:
|
2818
3704
|
margin = indent
|
2819
3705
|
else:
|
2820
3706
|
margin = min(margin, indent)
|
2821
|
-
if DEBUG:
|
3707
|
+
if DEBUG:
|
3708
|
+
print("dedent: margin=%r" % margin)
|
2822
3709
|
|
2823
3710
|
if margin is not None and margin > 0:
|
2824
3711
|
for i, line in enumerate(lines):
|
2825
|
-
if i == 0 and skip_first_line:
|
3712
|
+
if i == 0 and skip_first_line:
|
3713
|
+
continue
|
2826
3714
|
removed = 0
|
2827
3715
|
for j, ch in enumerate(line):
|
2828
3716
|
if ch == ' ':
|
@@ -2830,7 +3718,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
|
|
2830
3718
|
elif ch == '\t':
|
2831
3719
|
removed += tabsize - (removed % tabsize)
|
2832
3720
|
elif ch in '\r\n':
|
2833
|
-
if DEBUG:
|
3721
|
+
if DEBUG:
|
3722
|
+
print("dedent: %r: EOL -> strip up to EOL" % line)
|
2834
3723
|
lines[i] = lines[i][j:]
|
2835
3724
|
break
|
2836
3725
|
else:
|
@@ -2852,7 +3741,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
|
|
2852
3741
|
return lines
|
2853
3742
|
|
2854
3743
|
|
2855
|
-
def _dedent(text, tabsize=8, skip_first_line=False):
|
3744
|
+
def _dedent(text: str, tabsize: int = 8, skip_first_line: bool = False) -> str:
|
2856
3745
|
"""_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
|
2857
3746
|
|
2858
3747
|
"text" is the text to dedent.
|
@@ -2863,7 +3752,7 @@ def _dedent(text, tabsize=8, skip_first_line=False):
|
|
2863
3752
|
|
2864
3753
|
textwrap.dedent(s), but don't expand tabs to spaces
|
2865
3754
|
"""
|
2866
|
-
lines = text.splitlines(
|
3755
|
+
lines = text.splitlines(True)
|
2867
3756
|
_dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line)
|
2868
3757
|
return ''.join(lines)
|
2869
3758
|
|
@@ -2895,7 +3784,7 @@ class _memoized(object):
|
|
2895
3784
|
return self.func.__doc__
|
2896
3785
|
|
2897
3786
|
|
2898
|
-
def _xml_oneliner_re_from_tab_width(tab_width):
|
3787
|
+
def _xml_oneliner_re_from_tab_width(tab_width: int) -> re.Pattern:
|
2899
3788
|
"""Standalone XML processing instruction regex."""
|
2900
3789
|
return re.compile(r"""
|
2901
3790
|
(?:
|
@@ -2917,7 +3806,7 @@ def _xml_oneliner_re_from_tab_width(tab_width):
|
|
2917
3806
|
_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
|
2918
3807
|
|
2919
3808
|
|
2920
|
-
def _hr_tag_re_from_tab_width(tab_width):
|
3809
|
+
def _hr_tag_re_from_tab_width(tab_width: int) -> re.Pattern:
|
2921
3810
|
return re.compile(r"""
|
2922
3811
|
(?:
|
2923
3812
|
(?<=\n\n) # Starting after a blank line
|
@@ -2937,7 +3826,7 @@ def _hr_tag_re_from_tab_width(tab_width):
|
|
2937
3826
|
_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
|
2938
3827
|
|
2939
3828
|
|
2940
|
-
def _xml_escape_attr(attr, skip_single_quote=True):
|
3829
|
+
def _xml_escape_attr(attr: str, skip_single_quote: bool = True) -> str:
|
2941
3830
|
"""Escape the given string for use in an HTML/XML tag attribute.
|
2942
3831
|
|
2943
3832
|
By default this doesn't bother with escaping `'` to `'`, presuming that
|
@@ -2954,7 +3843,7 @@ def _xml_escape_attr(attr, skip_single_quote=True):
|
|
2954
3843
|
return escaped
|
2955
3844
|
|
2956
3845
|
|
2957
|
-
def _xml_encode_email_char_at_random(ch):
|
3846
|
+
def _xml_encode_email_char_at_random(ch: str) -> str:
|
2958
3847
|
r = random()
|
2959
3848
|
# Roughly 10% raw, 45% hex, 45% dec.
|
2960
3849
|
# '@' *must* be encoded. I [John Gruber] insist.
|
@@ -2968,14 +3857,25 @@ def _xml_encode_email_char_at_random(ch):
|
|
2968
3857
|
return '&#%s;' % ord(ch)
|
2969
3858
|
|
2970
3859
|
|
2971
|
-
def _html_escape_url(
|
2972
|
-
|
3860
|
+
def _html_escape_url(
|
3861
|
+
attr: str,
|
3862
|
+
safe_mode: Union[_safe_mode, bool, None] = False,
|
3863
|
+
charset: Optional[str] = None
|
3864
|
+
):
|
3865
|
+
"""
|
3866
|
+
Replace special characters that are potentially malicious in url string.
|
3867
|
+
|
3868
|
+
Args:
|
3869
|
+
charset: don't escape characters from this charset. Currently the only
|
3870
|
+
exception is for '+' when charset=='base64'
|
3871
|
+
"""
|
2973
3872
|
escaped = (attr
|
2974
3873
|
.replace('"', '"')
|
2975
3874
|
.replace('<', '<')
|
2976
3875
|
.replace('>', '>'))
|
2977
3876
|
if safe_mode:
|
2978
|
-
|
3877
|
+
if charset != 'base64':
|
3878
|
+
escaped = escaped.replace('+', ' ')
|
2979
3879
|
escaped = escaped.replace("'", "'")
|
2980
3880
|
return escaped
|
2981
3881
|
|
@@ -3065,8 +3965,10 @@ def main(argv=None):
|
|
3065
3965
|
f = open(opts.link_patterns_file)
|
3066
3966
|
try:
|
3067
3967
|
for i, line in enumerate(f.readlines()):
|
3068
|
-
if not line.strip():
|
3069
|
-
|
3968
|
+
if not line.strip():
|
3969
|
+
continue
|
3970
|
+
if line.lstrip().startswith("#"):
|
3971
|
+
continue
|
3070
3972
|
try:
|
3071
3973
|
pat, href = line.rstrip().rsplit(None, 1)
|
3072
3974
|
except ValueError:
|