PyPI - pdoc - Versions diffs - 14.5.1__py3-none-any.whl → 14.6.0__py3-none-any.whl - Mend

pdoc 14.5.1py3-none-any.whl → 14.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

pdoc/__init__.py +7 -1
pdoc/__main__.py +2 -2
pdoc/doc.py +23 -8
pdoc/docstrings.py +1 -1
pdoc/extract.py +1 -1
pdoc/markdown2/__init__.py +1667 -765
pdoc/render_helpers.py +76 -38
pdoc/templates/content.css +10 -6
pdoc-14.6.0.dist-info/LICENSE +5 -0
{pdoc-14.5.1.dist-info → pdoc-14.6.0.dist-info}/METADATA +2 -2
{pdoc-14.5.1.dist-info → pdoc-14.6.0.dist-info}/RECORD +14 -14
pdoc-14.5.1.dist-info/LICENSE +0 -24
{pdoc-14.5.1.dist-info → pdoc-14.6.0.dist-info}/WHEEL +0 -0
{pdoc-14.5.1.dist-info → pdoc-14.6.0.dist-info}/entry_points.txt +0 -0
{pdoc-14.5.1.dist-info → pdoc-14.6.0.dist-info}/top_level.txt +0 -0

pdoc/markdown2/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # fmt: off
 # flake8: noqa
 # type: ignore
-# Taken from here: https://github.com/trentm/python-markdown2/blob/bce3f18ed86a19b418c8114a712bb6fee790c4c2/lib/markdown2.py
+# Taken from here: https://github.com/trentm/python-markdown2/blob/8d3a65bc7d4f8b64af89f668eb6c60841dc0578c/lib/markdown2.py
 #!/usr/bin/env python
 # Copyright (c) 2012 Trent Mick.
@@ -46,7 +46,11 @@ Supported extra syntax options (see -x|--extras option below and
 see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
 * admonitions: Enable parsing of RST admonitions.
-* break-on-newline: Replace single new line characters with <br> when True
+* breaks: Control where hard breaks are inserted in the markdown.
+  Options include:
+  - on_newline: Replace single new line characters with <br> when True
+  - on_backslash: Replace backslashes at the end of a line with <br>
+* break-on-newline: Alias for the on_newline option in the breaks extra.
 * code-friendly: Disable _ and __ for em and strong.
 * cuddled-lists: Allow lists to be cuddled to the preceding paragraph.
 * fenced-code-blocks: Allows a code block to not have to be indented
@@ -71,6 +75,9 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
   some limitations.
 * metadata: Extract metadata from a leading '---'-fenced block.
   See <https://github.com/trentm/python-markdown2/issues/77> for details.
+* middle-word-em: Allows or disallows emphasis syntax in the middle of words,
+  defaulting to allow. Disabling this means that `this_text_here` will not be
+  converted to `this<em>text</em>here`.
 * nofollow: Add `rel="nofollow"` to add `<a>` tags with an href. See
   <http://en.wikipedia.org/wiki/Nofollow>.
 * numbering: Support of generic counters.  Non standard extension to
@@ -104,7 +111,7 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
 #   not yet sure if there implications with this. Compare 'pydoc sre'
 #   and 'perldoc perlre'.
-__version_info__ = (2, 4, 9)
+__version_info__ = (2, 5, 1)
 __version__ = '.'.join(map(str, __version_info__))
 __author__ = "Trent Mick"
@@ -113,9 +120,24 @@ import codecs
 import logging
 import re
 import sys
-from collections import defaultdict
+from collections import defaultdict, OrderedDict
+from abc import ABC, abstractmethod
+import functools
 from hashlib import sha256
 from random import randint, random
+from typing import Any, Callable, Collection, Dict, List, Literal, Optional, Tuple, Type, TypedDict, Union
+from enum import IntEnum, auto
+if sys.version_info[1] < 9:
+    from typing import Iterable
+else:
+    from collections.abc import Iterable
+# ---- type defs
+_safe_mode = Literal['replace', 'escape']
+_extras_dict = Dict[str, Any]
+_extras_param = Union[List[str], _extras_dict]
+_link_patterns = Iterable[Tuple[re.Pattern, Union[str, Callable[[re.Match], str]]]]
 # ---- globals
@@ -128,7 +150,7 @@ DEFAULT_TAB_WIDTH = 4
 SECRET_SALT = bytes(randint(0, 1000000))
 # MD5 function was previously used for this; the "md5" prefix was kept for
 # backwards compatibility.
-def _hash_text(s):
+def _hash_text(s: str) -> str:
     return 'md5-' + sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:]
 # Table of hash values for escaped characters:
@@ -147,11 +169,18 @@ class MarkdownError(Exception):
 # ---- public api
-def markdown_path(path, encoding="utf-8",
-                  html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
-                  safe_mode=None, extras=None, link_patterns=None,
-                  footnote_title=None, footnote_return_symbol=None,
-                  use_file_vars=False):
+def markdown_path(
+    path: str,
+    encoding: str = "utf-8",
+    html4tags: bool = False,
+    tab_width: int = DEFAULT_TAB_WIDTH,
+    safe_mode: Optional[_safe_mode] = None,
+    extras: Optional[_extras_param] = None,
+    link_patterns: Optional[_link_patterns] = None,
+    footnote_title: Optional[str] = None,
+    footnote_return_symbol: Optional[str] = None,
+    use_file_vars: bool = False
+) -> 'UnicodeWithAttrs':
     fp = codecs.open(path, 'r', encoding)
     text = fp.read()
     fp.close()
@@ -163,10 +192,18 @@ def markdown_path(path, encoding="utf-8",
                     use_file_vars=use_file_vars).convert(text)
-def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
-             safe_mode=None, extras=None, link_patterns=None,
-             footnote_title=None, footnote_return_symbol=None,
-             use_file_vars=False, cli=False):
+def markdown(
+    text: str,
+    html4tags: bool = False,
+    tab_width: int = DEFAULT_TAB_WIDTH,
+    safe_mode: Optional[_safe_mode] = None,
+    extras: Optional[_extras_param] = None,
+    link_patterns: Optional[_link_patterns] = None,
+    footnote_title: Optional[str] = None,
+    footnote_return_symbol: Optional[str] =None,
+    use_file_vars: bool = False,
+    cli: bool = False
+) -> 'UnicodeWithAttrs':
     return Markdown(html4tags=html4tags, tab_width=tab_width,
                     safe_mode=safe_mode, extras=extras,
                     link_patterns=link_patterns,
@@ -175,6 +212,66 @@ def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
                     use_file_vars=use_file_vars, cli=cli).convert(text)
+class Stage(IntEnum):
+    PREPROCESS = auto()
+    HASH_HTML = auto()
+    LINK_DEFS = auto()
+    BLOCK_GAMUT = auto()
+    HEADERS = auto()
+    LISTS = auto()
+    CODE_BLOCKS = auto()
+    BLOCK_QUOTES = auto()
+    PARAGRAPHS = auto()
+    SPAN_GAMUT = auto()
+    CODE_SPANS = auto()
+    ESCAPE_SPECIAL = auto()
+    LINKS = auto()  # and auto links
+    ITALIC_AND_BOLD = auto()
+    POSTPROCESS = auto()
+    UNHASH_HTML = auto()
+def mark_stage(stage: Stage):
+    '''
+    Decorator that handles executing relevant `Extra`s before and after this `Stage` executes.
+    '''
+    def wrapper(func):
+        @functools.wraps(func)
+        def inner(md: 'Markdown', text, *args, **kwargs):
+            md.stage = stage
+            # set "order" prop so extras can tell if they're being invoked before/after the stage
+            md.order = stage - 0.5
+            if stage in Extra._exec_order:
+                for klass in Extra._exec_order[stage][0]:
+                    if klass.name not in md.extra_classes:
+                        continue
+                    extra = md.extra_classes[klass.name]
+                    if extra.test(text):
+                        text = extra.run(text)
+            md.order = stage
+            text = func(md, text, *args, **kwargs)
+            md.order = stage + 0.5
+            if stage in Extra._exec_order:
+                for klass in Extra._exec_order[stage][1]:
+                    if klass.name not in md.extra_classes:
+                        continue
+                    extra = md.extra_classes[klass.name]
+                    if extra.test(text):
+                        text = extra.run(text)
+            return text
+        return inner
+    return wrapper
 class Markdown(object):
     # The dict of "extras" to enable in processing -- a mapping of
     # extra name to argument for the extra. Most extras do not have an
@@ -182,27 +279,47 @@ class Markdown(object):
     #
     # This can be set via (a) subclassing and (b) the constructor
     # "extras" argument.
-    extras = None
+    extras: _extras_dict
+    # dict of `Extra` names and associated class instances, populated during _setup_extras
+    extra_classes: Dict[str, 'Extra']
-    urls = None
-    titles = None
-    html_blocks = None
-    html_spans = None
-    html_removed_text = "{(#HTML#)}"  # placeholder removed text that does not trigger bold
-    html_removed_text_compat = "[HTML_REMOVED]"  # for compat with markdown.py
+    urls: Dict[str, str]
+    titles: Dict[str, str]
+    html_blocks: Dict[str, str]
+    html_spans: Dict[str, str]
+    html_removed_text: str = "{(#HTML#)}"  # placeholder removed text that does not trigger bold
+    html_removed_text_compat: str = "[HTML_REMOVED]"  # for compat with markdown.py
+    safe_mode: Optional[_safe_mode]
-    _toc = None
+    _toc: List[Tuple[int, str, str]]
     # Used to track when we're inside an ordered or unordered list
     # (see _ProcessListItems() for details):
     list_level = 0
+    stage: Stage
+    '''Current "stage" of markdown conversion taking place'''
+    order: float
+    '''
+    Same as `Stage` but will be +/- 0.5 of the value of `Stage`.
+    This allows extras to check if they are running before or after a particular stage
+    with `if md.order < md.stage`.
+    '''
     _ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
-    def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
-                 extras=None, link_patterns=None,
-                 footnote_title=None, footnote_return_symbol=None,
-                 use_file_vars=False, cli=False):
+    def __init__(
+        self,
+        html4tags: bool = False,
+        tab_width: int = DEFAULT_TAB_WIDTH,
+        safe_mode: Optional[_safe_mode] = None,
+        extras: Optional[_extras_param] = None,
+        link_patterns: Optional[_link_patterns] = None,
+        footnote_title: Optional[str] = None,
+        footnote_return_symbol: Optional[str] = None,
+        use_file_vars: bool = False,
+        cli: bool = False
+    ):
         if html4tags:
             self.empty_element_suffix = ">"
         else:
@@ -219,10 +336,13 @@ class Markdown(object):
             self.safe_mode = safe_mode
         # Massaging and building the "extras" info.
-        if self.extras is None:
+        if getattr(self, 'extras', None) is None:
             self.extras = {}
         elif not isinstance(self.extras, dict):
-            self.extras = dict([(e, None) for e in self.extras])
+            # inheriting classes may set `self.extras` as List[str].
+            # we can't allow it through type hints but we can convert it
+            self.extras = dict([(e, None) for e in self.extras])  # type:ignore
         if extras:
             if not isinstance(extras, dict):
                 extras = dict([(e, None) for e in extras])
@@ -237,14 +357,30 @@ class Markdown(object):
                 self._toc_depth = 6
             else:
                 self._toc_depth = self.extras["toc"].get("depth", 6)
-        self._instance_extras = self.extras.copy()
+        if 'header-ids' in self.extras:
+            if not isinstance(self.extras['header-ids'], dict):
+                self.extras['header-ids'] = {
+                    'mixed': False,
+                    'prefix': self.extras['header-ids'],
+                    'reset-count': True
+                }
+        if 'break-on-newline' in self.extras:
+            self.extras.setdefault('breaks', {})
+            self.extras['breaks']['on_newline'] = True
         if 'link-patterns' in self.extras:
+            # allow link patterns via extras dict without kwarg explicitly set
+            link_patterns = link_patterns or self.extras['link-patterns']
             if link_patterns is None:
                 # if you have specified that the link-patterns extra SHOULD
                 # be used (via self.extras) but you haven't provided anything
                 # via the link_patterns argument then an error is raised
                 raise MarkdownError("If the 'link-patterns' extra is used, an argument for 'link_patterns' is required")
+            self.extras['link-patterns'] = link_patterns
+        self._instance_extras = self.extras.copy()
         self.link_patterns = link_patterns
         self.footnote_title = footnote_title
         self.footnote_return_symbol = footnote_return_symbol
@@ -266,16 +402,25 @@ class Markdown(object):
         self.list_level = 0
         self.extras = self._instance_extras.copy()
         self._setup_extras()
-        self._toc = None
+        self._toc = []
     def _setup_extras(self):
         if "footnotes" in self.extras:
-            self.footnotes = {}
+            # order of insertion matters for footnotes. Use ordered dict for Python < 3.7
+            # https://docs.python.org/3/whatsnew/3.7.html#summary-release-highlights
+            self.footnotes = OrderedDict()
             self.footnote_ids = []
         if "header-ids" in self.extras:
-            self._count_from_header_id = defaultdict(int)
+            if not hasattr(self, '_count_from_header_id') or self.extras['header-ids'].get('reset-count', False):
+                self._count_from_header_id = defaultdict(int)
         if "metadata" in self.extras:
-            self.metadata = {}
+            self.metadata: Dict[str, Any] = {}
+        self.extra_classes = {}
+        for name, klass in Extra._registry.items():
+            if name not in self.extras:
+                continue
+            self.extra_classes[name] = klass(self, (self.extras.get(name, {})))
     # Per <https://developer.mozilla.org/en-US/docs/HTML/Element/a> "rel"
     # should only be used in <a> tags with an "href" attribute.
@@ -295,7 +440,7 @@ class Markdown(object):
         re.IGNORECASE | re.VERBOSE
     )
-    def convert(self, text):
+    def convert(self, text: str) -> 'UnicodeWithAttrs':
         """Convert the given text."""
         # Main function. The order in which other subs are called here is
         # essential. Link and image substitutions need to happen before
@@ -353,29 +498,12 @@ class Markdown(object):
         text = self.preprocess(text)
-        if 'wavedrom' in self.extras:
-            text = self._do_wavedrom_blocks(text)
-        if "fenced-code-blocks" in self.extras and not self.safe_mode:
-            text = self._do_fenced_code_blocks(text)
         if self.safe_mode:
             text = self._hash_html_spans(text)
         # Turn block-level HTML blocks into hash entries
         text = self._hash_html_blocks(text, raw=True)
-        if "fenced-code-blocks" in self.extras and self.safe_mode:
-            text = self._do_fenced_code_blocks(text)
-        if 'admonitions' in self.extras:
-            text = self._do_admonitions(text)
-        # Because numbering references aren't links (yet?) then we can do everything associated with counters
-        # before we get started
-        if "numbering" in self.extras:
-            text = self._do_numbering(text)
         # Strip link definitions, store in hashes.
         if "footnotes" in self.extras:
             # Must do footnotes first because an unlucky footnote defn
@@ -409,10 +537,22 @@ class Markdown(object):
             text = self._a_nofollow_or_blank_links.sub(r'<\1 rel="nofollow"\2', text)
         if "toc" in self.extras and self._toc:
+            if self.extras['header-ids'].get('mixed'):
+                # TOC will only be out of order if mixed headers is enabled
+                def toc_sort(entry):
+                    '''Sort the TOC by order of appearance in text'''
+                    match = re.search(
+                        # header tag, any attrs, the ID, any attrs, the text, close tag
+                        r'^<(h%d).*?id=(["\'])%s\2.*>%s</\1>$' % (entry[0], entry[1], re.escape(entry[2])),
+                        text, re.M
+                    )
+                    return match.start() if match else 0
+                self._toc.sort(key=toc_sort)
             self._toc_html = calculate_toc_html(self._toc)
             # Prepend toc html to output
-            if self.cli:
+            if self.cli or (self.extras['toc'] is not None and self.extras['toc'].get('prepend', False)):
                 text = '{}\n{}'.format(self._toc_html, text)
         text += "\n"
@@ -427,14 +567,16 @@ class Markdown(object):
             rv.metadata = self.metadata
         return rv
-    def postprocess(self, text):
+    @mark_stage(Stage.POSTPROCESS)
+    def postprocess(self, text: str) -> str:
         """A hook for subclasses to do some postprocessing of the html, if
         desired. This is called before unescaping of special chars and
         unhashing of raw HTML spans.
         """
         return text
-    def preprocess(self, text):
+    @mark_stage(Stage.PREPROCESS)
+    def preprocess(self, text: str) -> str:
         """A hook for subclasses to do some preprocessing of the Markdown, if
         desired. This is called after basic formatting of the text, but prior
         to any extras, safe mode, etc. processing.
@@ -477,29 +619,32 @@ class Markdown(object):
     _meta_data_fence_pattern = re.compile(r'^---[\ \t]*\n', re.MULTILINE)
     _meta_data_newline = re.compile("^\n", re.MULTILINE)
-    def _extract_metadata(self, text):
+    def _extract_metadata(self, text: str) -> str:
         if text.startswith("---"):
             fence_splits = re.split(self._meta_data_fence_pattern, text, maxsplit=2)
             metadata_content = fence_splits[1]
-            match = re.findall(self._meta_data_pattern, metadata_content)
-            if not match:
-                return text
             tail = fence_splits[2]
         else:
             metadata_split = re.split(self._meta_data_newline, text, maxsplit=1)
             metadata_content = metadata_split[0]
-            match = re.findall(self._meta_data_pattern, metadata_content)
-            if not match:
-                return text
             tail = metadata_split[1]
-        def parse_structured_value(value):
+        # _meta_data_pattern only has one capturing group, so we can assume
+        # the returned type to be list[str]
+        match: List[str] = re.findall(self._meta_data_pattern, metadata_content)
+        if not match:
+            return text
+        def parse_structured_value(value: str) -> Union[List[Any], Dict[str, Any]]:
             vs = value.lstrip()
             vs = value.replace(v[: len(value) - len(vs)], "\n")[1:]
             # List
             if vs.startswith("-"):
-                r = []
+                r: List[Any] = []
+                # the regex used has multiple capturing groups, so
+                # returned type from findall will be List[List[str]]
+                match: List[str]
                 for match in re.findall(self._key_val_list_pat, vs):
                     if match[0] and not match[1] and not match[2]:
                         r.append(match[0].strip())
@@ -564,7 +709,7 @@ class Markdown(object):
         (?P<content>.*?\1End:)
         """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE)
-    def _emacs_vars_oneliner_sub(self, match):
+    def _emacs_vars_oneliner_sub(self, match: re.Match) -> str:
         if match.group(1).strip() == '-*-' and match.group(4).strip() == '-*-':
             lead_ws = re.findall(r'^\s*', match.group(1))[0]
             tail_ws = re.findall(r'\s*$', match.group(4))[0]
@@ -573,7 +718,7 @@ class Markdown(object):
         start, end = match.span()
         return match.string[start: end]
-    def _get_emacs_vars(self, text):
+    def _get_emacs_vars(self, text: str) -> Dict[str, str]:
         """Return a dictionary of emacs-style local variables.
         Parsing is done loosely according to this spec (and according to
@@ -616,7 +761,7 @@ class Markdown(object):
             if match:
                 prefix = match.group("prefix")
                 suffix = match.group("suffix")
-                lines = match.group("content").splitlines(0)
+                lines = match.group("content").splitlines(False)
                 # print "prefix=%r, suffix=%r, content=%r, lines: %s"\
                 #      % (prefix, suffix, match.group("content"), lines)
@@ -639,8 +784,10 @@ class Markdown(object):
                 # Parse out one emacs var per line.
                 continued_for = None
                 for line in lines[:-1]:  # no var on the last line ("PREFIX End:")
-                    if prefix: line = line[len(prefix):]  # strip prefix
-                    if suffix: line = line[:-len(suffix)]  # strip suffix
+                    if prefix:
+                        line = line[len(prefix):]  # strip prefix
+                    if suffix:
+                        line = line[:-len(suffix)]  # strip suffix
                     line = line.strip()
                     if continued_for:
                         variable = continued_for
@@ -674,7 +821,7 @@ class Markdown(object):
         return emacs_vars
-    def _detab_line(self, line):
+    def _detab_line(self, line: str) -> str:
         r"""Recusively convert tabs to spaces in a single line.
         Called from _detab()."""
@@ -685,7 +832,7 @@ class Markdown(object):
         output = chunk1 + chunk2
         return self._detab_line(output)
-    def _detab(self, text):
+    def _detab(self, text: str) -> str:
         r"""Iterate text line by line and convert tabs to spaces.
             >>> m = Markdown()
@@ -711,7 +858,7 @@ class Markdown(object):
     # _block_tags_b.  This way html5 tags are easy to keep track of.
     _html5tags = '|article|aside|header|hgroup|footer|nav|section|figure|figcaption'
-    _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
+    _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style|html|head|body'
     _block_tags_a += _html5tags
     _strict_tag_block_re = re.compile(r"""
@@ -730,6 +877,11 @@ class Markdown(object):
     _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
     _block_tags_b += _html5tags
+    _span_tags = (
+        'a|abbr|acronym|b|bdo|big|br|button|cite|code|dfn|em|i|img|input|kbd|label|map|object|output|q'
+        '|samp|script|select|small|span|strong|sub|sup|textarea|time|tt|var'
+    )
     _liberal_tag_block_re = re.compile(r"""
         (                       # save in \1
             ^                   # start of line  (with re.M)
@@ -745,11 +897,26 @@ class Markdown(object):
     _html_markdown_attr_re = re.compile(
         r'''\s+markdown=("1"|'1')''')
-    def _hash_html_block_sub(self, match, raw=False):
+    def _hash_html_block_sub(
+        self,
+        match: Union[re.Match, str],
+        raw: bool = False
+    ) -> str:
         if isinstance(match, str):
             html = match
+            tag = None
         else:
             html = match.group(1)
+            try:
+                tag = match.group(2)
+            except IndexError:
+                tag = None
+        if not tag:
+            m = re.match(r'.*?<(\S).*?\s*>', html)
+            # tag shouldn't be none but make the assertion for type checker
+            assert m is not None
+            tag = m.group(1)
         if raw and self.safe_mode:
             html = self._sanitize_html(html)
@@ -758,9 +925,17 @@ class Markdown(object):
             m = self._html_markdown_attr_re.search(first_line)
             if m:
                 lines = html.split('\n')
+                # if MD is on same line as opening tag then split across two lines
+                lines = list(filter(None, (re.split(r'(.*?<%s.*markdown=.*?>)' % tag, lines[0])))) + lines[1:]
+                # if MD on same line as closing tag, split across two lines
+                lines = lines[:-1] + list(filter(None, re.split(r'(\s*?</%s>.*?$)' % tag, lines[-1])))
+                # extract key sections of the match
+                first_line = lines[0]
                 middle = '\n'.join(lines[1:-1])
                 last_line = lines[-1]
+                # remove `markdown="1"` attr from tag
                 first_line = first_line[:m.start()] + first_line[m.end():]
+                # hash the HTML segments to protect them
                 f_key = _hash_text(first_line)
                 self.html_blocks[f_key] = first_line
                 l_key = _hash_text(last_line)
@@ -768,11 +943,14 @@ class Markdown(object):
                 return ''.join(["\n\n", f_key,
                     "\n\n", middle, "\n\n",
                     l_key, "\n\n"])
+        elif self.extras.get('header-ids', {}).get('mixed') and self._h_tag_re.match(html):
+            html = self._h_tag_re.sub(self._h_tag_sub, html)
         key = _hash_text(html)
         self.html_blocks[key] = html
         return "\n\n" + key + "\n\n"
-    def _hash_html_blocks(self, text, raw=False):
+    @mark_stage(Stage.HASH_HTML)
+    def _hash_html_blocks(self, text: str, raw: bool = False) -> str:
         """Hashify HTML blocks
         We only want to do this for block-level HTML tags, such as headers,
@@ -806,6 +984,14 @@ class Markdown(object):
         # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
         text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
+        # now do the same for spans that are acting like blocks
+        # eg: an anchor split over multiple lines for readability
+        text = self._strict_tag_block_sub(
+            text, self._span_tags,
+            # inline elements can't contain block level elements, so only span gamut is required
+            lambda t: hash_html_block_sub(self._run_span_gamut(t))
+        )
         # Special case just for <hr />. It was easier to make a special
         # case than to make the other regex more complicated.
         if "<hr" in text:
@@ -880,27 +1066,45 @@ class Markdown(object):
         return text
-    def _strict_tag_block_sub(self, text, html_tags_re, callback):
+    def _strict_tag_block_sub(
+        self,
+        text: str,
+        html_tags_re: str,
+        callback: Callable[[str], str],
+        allow_indent: bool = False
+    ) -> str:
+        '''
+        Finds and substitutes HTML blocks within blocks of text
+        Args:
+            text: the text to search
+            html_tags_re: a regex pattern of HTML block tags to match against.
+                For example, `Markdown._block_tags_a`
+            callback: callback function that receives the found HTML text block and returns a new str
+            allow_indent: allow matching HTML blocks that are not completely outdented
+        '''
         tag_count = 0
         current_tag = html_tags_re
         block = ''
         result = ''
         for chunk in text.splitlines(True):
-            is_markup = re.match(r'^(?:</code>(?=</pre>))?(</?(%s)\b>?)' % current_tag, chunk)
+            is_markup = re.match(
+                r'^(\s{0,%s})(?:</code>(?=</pre>))?(</?(%s)\b>?)' % ('' if allow_indent else '0', current_tag), chunk
+            )
             block += chunk
             if is_markup:
-                if chunk.startswith('</'):
+                if chunk.startswith('%s</' % is_markup.group(1)):
                     tag_count -= 1
                 else:
                     # if close tag is in same line
-                    if '</%s>' % is_markup.group(2) in chunk[is_markup.end():]:
+                    if self._tag_is_closed(is_markup.group(3), chunk):
                         # we must ignore these
                         is_markup = None
                     else:
                         tag_count += 1
-                        current_tag = is_markup.group(2)
+                        current_tag = is_markup.group(3)
             if tag_count == 0:
                 if is_markup:
@@ -913,7 +1117,12 @@ class Markdown(object):
         return result
-    def _strip_link_definitions(self, text):
+    def _tag_is_closed(self, tag_name: str, text: str) -> bool:
+        # super basic check if number of open tags == number of closing tags
+        return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('</%s>' % tag_name, text))
+    @mark_stage(Stage.LINK_DEFS)
+    def _strip_link_definitions(self, text: str) -> str:
         # Strips link definitions from text, stores the URLs and titles in
         # hash references.
         less_than_tab = self.tab_width - 1
@@ -940,7 +1149,7 @@ class Markdown(object):
             """ % less_than_tab, re.X | re.M | re.U)
         return _link_def_re.sub(self._extract_link_def_sub, text)
-    def _extract_link_def_sub(self, match):
+    def _extract_link_def_sub(self, match: re.Match) -> str:
         id, url, title = match.groups()
         key = id.lower()    # Link IDs are case-insensitive
         self.urls[key] = self._encode_amps_and_angles(url)
@@ -948,65 +1157,7 @@ class Markdown(object):
             self.titles[key] = title
         return ""
-    def _do_numbering(self, text):
-        ''' We handle the special extension for generic numbering for
-            tables, figures etc.
-        '''
-        # First pass to define all the references
-        self.regex_defns = re.compile(r'''
-            \[\#(\w+) # the counter.  Open square plus hash plus a word \1
-            ([^@]*)   # Some optional characters, that aren't an @. \2
-            @(\w+)       # the id.  Should this be normed? \3
-            ([^\]]*)\]   # The rest of the text up to the terminating ] \4
-            ''', re.VERBOSE)
-        self.regex_subs = re.compile(r"\[@(\w+)\s*\]")  # [@ref_id]
-        counters = {}
-        references = {}
-        replacements = []
-        definition_html = '<figcaption class="{}" id="counter-ref-{}">{}{}{}</figcaption>'
-        reference_html = '<a class="{}" href="#counter-ref-{}">{}</a>'
-        for match in self.regex_defns.finditer(text):
-            # We must have four match groups otherwise this isn't a numbering reference
-            if len(match.groups()) != 4:
-                continue
-            counter = match.group(1)
-            text_before = match.group(2).strip()
-            ref_id = match.group(3)
-            text_after = match.group(4)
-            number = counters.get(counter, 1)
-            references[ref_id] = (number, counter)
-            replacements.append((match.start(0),
-                                 definition_html.format(counter,
-                                                        ref_id,
-                                                        text_before,
-                                                        number,
-                                                        text_after),
-                                 match.end(0)))
-            counters[counter] = number + 1
-        for repl in reversed(replacements):
-            text = text[:repl[0]] + repl[1] + text[repl[2]:]
-        # Second pass to replace the references with the right
-        # value of the counter
-        # Fwiw, it's vaguely annoying to have to turn the iterator into
-        # a list and then reverse it but I can't think of a better thing to do.
-        for match in reversed(list(self.regex_subs.finditer(text))):
-            number, counter = references.get(match.group(1), (None, None))
-            if number is not None:
-                repl = reference_html.format(counter,
-                                             match.group(1),
-                                             number)
-            else:
-                repl = reference_html.format(match.group(1),
-                                             'countererror',
-                                             '?' + match.group(1) + '?')
-            if "smarty-pants" in self.extras:
-                repl = repl.replace('"', self._escape_table['"'])
-            text = text[:match.start()] + repl + text[match.end():]
-        return text
-    def _extract_footnote_def_sub(self, match):
+    def _extract_footnote_def_sub(self, match: re.Match) -> str:
         id, text = match.groups()
         text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
         normed_id = re.sub(r'\W', '-', id)
@@ -1015,7 +1166,7 @@ class Markdown(object):
         self.footnotes[normed_id] = text + "\n\n"
         return ""
-    def _strip_footnote_definitions(self, text):
+    def _strip_footnote_definitions(self, text: str) -> str:
         """A footnote definition looks like this:
             [^note-id]: Text of the note.
@@ -1050,19 +1201,11 @@ class Markdown(object):
     _hr_re = re.compile(r'^[ ]{0,3}([-_*])[ ]{0,2}(\1[ ]{0,2}){2,}$', re.M)
-    def _run_block_gamut(self, text):
+    @mark_stage(Stage.BLOCK_GAMUT)
+    def _run_block_gamut(self, text: str) -> str:
         # These are all the transformations that form block-level
         # tags like paragraphs, headers, and list items.
-        if 'admonitions' in self.extras:
-            text = self._do_admonitions(text)
-        if 'wavedrom' in self.extras:
-            text = self._do_wavedrom_blocks(text)
-        if "fenced-code-blocks" in self.extras:
-            text = self._do_fenced_code_blocks(text)
         text = self._do_headers(text)
         # Do Horizontal Rules:
@@ -1075,13 +1218,6 @@ class Markdown(object):
         text = self._do_lists(text)
-        if "pyshell" in self.extras:
-            text = self._prepare_pyshell_blocks(text)
-        if "wiki-tables" in self.extras:
-            text = self._do_wiki_tables(text)
-        if "tables" in self.extras:
-            text = self._do_tables(text)
         text = self._do_code_blocks(text)
         text = self._do_block_quotes(text)
@@ -1096,164 +1232,8 @@ class Markdown(object):
         return text
-    def _pyshell_block_sub(self, match):
-        if "fenced-code-blocks" in self.extras:
-            dedented = _dedent(match.group(0))
-            return self._do_fenced_code_blocks("```pycon\n" + dedented + "```\n")
-        lines = match.group(0).splitlines(0)
-        _dedentlines(lines)
-        indent = ' ' * self.tab_width
-        s = ('\n'  # separate from possible cuddled paragraph
-             + indent + ('\n'+indent).join(lines)
-             + '\n')
-        return s
-    def _prepare_pyshell_blocks(self, text):
-        """Ensure that Python interactive shell sessions are put in
-        code blocks -- even if not properly indented.
-        """
-        if ">>>" not in text:
-            return text
-        less_than_tab = self.tab_width - 1
-        _pyshell_block_re = re.compile(r"""
-            ^([ ]{0,%d})>>>[ ].*\n  # first line
-            ^(\1[^\S\n]*\S.*\n)*    # any number of subsequent lines with at least one character
-            (?=^\1?\n|\Z)           # ends with a blank line or end of document
-            """ % less_than_tab, re.M | re.X)
-        return _pyshell_block_re.sub(self._pyshell_block_sub, text)
-    def _table_sub(self, match):
-        trim_space_re = '^[ \t\n]+|[ \t\n]+$'
-        trim_bar_re = r'^\||\|$'
-        split_bar_re = r'^\||(?<![\`\\])\|'
-        escape_bar_re = r'\\\|'
-        head, underline, body = match.groups()
-        # Determine aligns for columns.
-        cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
-        align_from_col_idx = {}
-        for col_idx, col in enumerate(cols):
-            if col[0] == ':' and col[-1] == ':':
-                align_from_col_idx[col_idx] = ' style="text-align:center;"'
-            elif col[0] == ':':
-                align_from_col_idx[col_idx] = ' style="text-align:left;"'
-            elif col[-1] == ':':
-                align_from_col_idx[col_idx] = ' style="text-align:right;"'
-        # thead
-        hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead%s>' % self._html_class_str_from_tag('thead'), '<tr>']
-        cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
-        for col_idx, col in enumerate(cols):
-            hlines.append('  <th%s>%s</th>' % (
-                align_from_col_idx.get(col_idx, ''),
-                self._run_span_gamut(col)
-            ))
-        hlines.append('</tr>')
-        hlines.append('</thead>')
-        # tbody
-        hlines.append('<tbody>')
-        for line in body.strip('\n').split('\n'):
-            hlines.append('<tr>')
-            cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
-            for col_idx, col in enumerate(cols):
-                hlines.append('  <td%s>%s</td>' % (
-                    align_from_col_idx.get(col_idx, ''),
-                    self._run_span_gamut(col)
-                ))
-            hlines.append('</tr>')
-        hlines.append('</tbody>')
-        hlines.append('</table>')
-        return '\n'.join(hlines) + '\n'
-    def _do_tables(self, text):
-        """Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
-        https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
-        """
-        less_than_tab = self.tab_width - 1
-        table_re = re.compile(r'''
-                (?:(?<=\n\n)|\A\n?)             # leading blank line
-                ^[ ]{0,%d}                      # allowed whitespace
-                (.*[|].*)  \n                   # $1: header row (at least one pipe)
-                ^[ ]{0,%d}                      # allowed whitespace
-                (                               # $2: underline row
-                    # underline row with leading bar
-                    (?:  \|\ *:?-+:?\ *  )+  \|? \s? \n
-                    |
-                    # or, underline row without leading bar
-                    (?:  \ *:?-+:?\ *\|  )+  (?:  \ *:?-+:?\ *  )? \s? \n
-                )
-                (                               # $3: data rows
-                    (?:
-                        ^[ ]{0,%d}(?!\ )         # ensure line begins with 0 to less_than_tab spaces
-                        .*\|.*  \n
-                    )+
-                )
-            ''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
-        return table_re.sub(self._table_sub, text)
-    def _wiki_table_sub(self, match):
-        ttext = match.group(0).strip()
-        # print('wiki table: %r' % match.group(0))
-        rows = []
-        for line in ttext.splitlines(0):
-            line = line.strip()[2:-2].strip()
-            row = [c.strip() for c in re.split(r'(?<!\\)\|\|', line)]
-            rows.append(row)
-        # from pprint import pprint
-        # pprint(rows)
-        hlines = []
-        def add_hline(line, indents=0):
-            hlines.append((self.tab * indents) + line)
-        def format_cell(text):
-            return self._run_span_gamut(re.sub(r"^\s*~", "", cell).strip(" "))
-        add_hline('<table%s>' % self._html_class_str_from_tag('table'))
-        # Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
-        if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
-            add_hline('<thead%s>' % self._html_class_str_from_tag('thead'), 1)
-            add_hline('<tr>', 2)
-            for cell in rows[0]:
-                add_hline("<th>{}</th>".format(format_cell(cell)), 3)
-            add_hline('</tr>', 2)
-            add_hline('</thead>', 1)
-            # Only one header row allowed.
-            rows = rows[1:]
-        # If no more rows, don't create a tbody.
-        if rows:
-            add_hline('<tbody>', 1)
-            for row in rows:
-                add_hline('<tr>', 2)
-                for cell in row:
-                    add_hline('<td>{}</td>'.format(format_cell(cell)), 3)
-                add_hline('</tr>', 2)
-            add_hline('</tbody>', 1)
-        add_hline('</table>')
-        return '\n'.join(hlines) + '\n'
-    def _do_wiki_tables(self, text):
-        # Optimization.
-        if "||" not in text:
-            return text
-        less_than_tab = self.tab_width - 1
-        wiki_table_re = re.compile(r'''
-            (?:(?<=\n\n)|\A\n?)            # leading blank line
-            ^([ ]{0,%d})\|\|.+?\|\|[ ]*\n  # first line
-            (^\1\|\|.+?\|\|\n)*        # any number of subsequent lines
-            ''' % less_than_tab, re.M | re.X)
-        return wiki_table_re.sub(self._wiki_table_sub, text)
-    def _run_span_gamut(self, text):
+    @mark_stage(Stage.SPAN_GAMUT)
+    def _run_span_gamut(self, text: str) -> str:
         # These are all the transformations that occur *within* block-level
         # tags like paragraphs, headers, and list items.
@@ -1262,9 +1242,6 @@ class Markdown(object):
         text = self._escape_special_chars(text)
         # Process anchor and image tags.
-        if "link-patterns" in self.extras:
-            text = self._do_link_patterns(text)
         text = self._do_links(text)
         # Make links out of things like `<http://example.com/>`
@@ -1274,25 +1251,10 @@ class Markdown(object):
         text = self._encode_amps_and_angles(text)
-        if "strike" in self.extras:
-            text = self._do_strike(text)
-        if "underline" in self.extras:
-            text = self._do_underline(text)
         text = self._do_italics_and_bold(text)
-        if "tg-spoiler" in self.extras:
-            text = self._do_tg_spoiler(text)
-        if "smarty-pants" in self.extras:
-            text = self._do_smart_punctuation(text)
-        # Do hard breaks:
-        if "break-on-newline" in self.extras:
-            text = re.sub(r" *\n(?!\<(?:\/?(ul|ol|li))\>)", "<br%s\n" % self.empty_element_suffix, text)
-        else:
-            text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
+        # Do hard breaks
+        text = re.sub(r" {2,}\n(?!\<(?:\/?(ul|ol|li))\>)", "<br%s\n" % self.empty_element_suffix, text)
         return text
@@ -1317,7 +1279,8 @@ class Markdown(object):
         )
         """, re.X)
-    def _escape_special_chars(self, text):
+    @mark_stage(Stage.ESCAPE_SPECIAL)
+    def _escape_special_chars(self, text: str) -> str:
         # Python markdown note: the HTML tokenization here differs from
         # that in Markdown.pl, hence the behaviour for subtle cases can
         # differ (I believe the tokenizer here does a better job because
@@ -1348,7 +1311,8 @@ class Markdown(object):
             is_html_markup = not is_html_markup
         return ''.join(escaped)
-    def _hash_html_spans(self, text):
+    @mark_stage(Stage.HASH_HTML)
+    def _hash_html_spans(self, text: str) -> str:
         # Used for safe_mode.
         def _is_auto_link(s):
@@ -1371,26 +1335,41 @@ class Markdown(object):
             return re.match(r'<code>md5-[A-Fa-f0-9]{32}</code>', ''.join(peek_tokens))
+        def _is_comment(token):
+            if self.safe_mode == 'replace':
+                # don't bother processing each section of comment in replace mode. Just do the whole thing
+                return
+            return re.match(r'(<!--)(.*)(-->)', token)
+        def _hash(token):
+            key = _hash_text(token)
+            self.html_spans[key] = token
+            return key
         tokens = []
         split_tokens = self._sorta_html_tokenize_re.split(text)
         is_html_markup = False
         for index, token in enumerate(split_tokens):
             if is_html_markup and not _is_auto_link(token) and not _is_code_span(index, token):
-                sanitized = self._sanitize_html(token)
-                key = _hash_text(sanitized)
-                self.html_spans[key] = sanitized
-                tokens.append(key)
+                is_comment = _is_comment(token)
+                if is_comment:
+                    tokens.append(_hash(self._sanitize_html(is_comment.group(1))))
+                    # sanitise but leave comment body intact for further markdown processing
+                    tokens.append(self._sanitize_html(is_comment.group(2)))
+                    tokens.append(_hash(self._sanitize_html(is_comment.group(3))))
+                else:
+                    tokens.append(_hash(self._sanitize_html(token)))
             else:
                 tokens.append(self._encode_incomplete_tags(token))
             is_html_markup = not is_html_markup
         return ''.join(tokens)
-    def _unhash_html_spans(self, text):
+    def _unhash_html_spans(self, text: str) -> str:
         for key, sanitized in list(self.html_spans.items()):
             text = text.replace(key, sanitized)
         return text
-    def _sanitize_html(self, s):
+    def _sanitize_html(self, s: str) -> str:
         if self.safe_mode == "replace":
             return self.html_removed_text
         elif self.safe_mode == "escape":
@@ -1428,14 +1407,14 @@ class Markdown(object):
     _strip_anglebrackets = re.compile(r'<(.*)>.*')
-    def _find_non_whitespace(self, text, start):
+    def _find_non_whitespace(self, text: str, start: int) -> int:
         """Returns the index of the first non-whitespace character in text
         after (and including) start
         """
         match = self._whitespace.match(text, start)
-        return match.end()
+        return match.end() if match else len(text)
-    def _find_balanced(self, text, start, open_c, close_c):
+    def _find_balanced(self, text: str, start: int, open_c: str, close_c: str) -> int:
         """Returns the index where the open_c and close_c characters balance
         out - the same number of open_c and close_c are encountered - or the
         end of string if it's reached before the balance point is found.
@@ -1451,7 +1430,7 @@ class Markdown(object):
             i += 1
         return i
-    def _extract_url_and_title(self, text, start):
+    def _extract_url_and_title(self, text: str, start: int) -> Union[Tuple[str, str, int], Tuple[None, None, None]]:
         """Extracts the url and (optional) title from the tail of a link"""
         # text[start] equals the opening parenthesis
         idx = self._find_non_whitespace(text, start+1)
@@ -1470,19 +1449,56 @@ class Markdown(object):
             url = self._strip_anglebrackets.sub(r'\1', url)
         return url, title, end_idx
-    def _protect_url(self, url):
+    # https://developer.mozilla.org/en-US/docs/web/http/basics_of_http/data_urls
+    # https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types
+    _data_url_re = re.compile(r'''
+        data:
+        # in format type/subtype;parameter=optional
+        (?P<mime>\w+/[\w+\.-]+(?:;\w+=[\w+\.-]+)?)?
+        # optional base64 token
+        (?P<token>;base64)?
+        ,(?P<data>.*)
+    ''', re.X)
+    def _protect_url(self, url: str) -> str:
         '''
         Function that passes a URL through `_html_escape_url` to remove any nasty characters,
         and then hashes the now "safe" URL to prevent other safety mechanisms from tampering
         with it (eg: escaping "&" in URL parameters)
         '''
-        url = _html_escape_url(url, safe_mode=self.safe_mode)
+        data_url = self._data_url_re.match(url)
+        charset = None
+        if data_url is not None:
+            mime = data_url.group('mime') or ''
+            if mime.startswith('image/') and data_url.group('token') == ';base64':
+                charset='base64'
+        url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset)
         key = _hash_text(url)
         self._escape_table[url] = key
         return key
-    _safe_protocols = re.compile(r'(https?|ftp):', re.I)
-    def _do_links(self, text):
+    _safe_protocols = r'(?:https?|ftp):\/\/|(?:mailto|tel):'
+    @property
+    def _safe_href(self):
+        '''
+        _safe_href is adapted from pagedown's Markdown.Sanitizer.js
+        From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt
+        Original Showdown code copyright (c) 2007 John Fraser
+        Modifications and bugfixes (c) 2009 Dana Robinson
+        Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc.
+        '''
+        safe = r'-\w'
+        # omitted ['"<>] for XSS reasons
+        less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~'
+        # dot seperated hostname, optional port number, not followed by protocol seperator
+        domain = r'(?:[%s]+(?:\.[%s]+)*)(?:(?<!tel):\d+/?)?(?![^:/]*:/*)' % (safe, safe)
+        fragment = r'[%s]*' % (safe + less_safe)
+        return re.compile(r'^(?:(%s)?(%s)(%s)|(#|\.{,2}/)(%s))$' % (self._safe_protocols, domain, fragment, fragment), re.I)
+    @mark_stage(Stage.LINKS)
+    def _do_links(self, text: str) -> str:
         """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
         This is a combination of Markdown.pl's _DoAnchors() and
@@ -1599,7 +1615,7 @@ class Markdown(object):
                         anchor_allowed_pos = start_idx + len(result)
                         text = text[:start_idx] + result + text[url_end_idx:]
                     elif start_idx >= anchor_allowed_pos:
-                        safe_link = self._safe_protocols.match(url) or url.startswith('#')
+                        safe_link = self._safe_href.match(url)
                         if self.safe_mode and not safe_link:
                             result_head = '<a href="#"%s>' % (title_str)
                         else:
@@ -1655,7 +1671,7 @@ class Markdown(object):
                             curr_pos = start_idx + len(result)
                             text = text[:start_idx] + result + text[match.end():]
                         elif start_idx >= anchor_allowed_pos:
-                            if self.safe_mode and not self._safe_protocols.match(url):
+                            if self.safe_mode and not self._safe_href.match(url):
                                 result_head = '<a href="#"%s>' % (title_str)
                             else:
                                 result_head = '<a href="%s"%s>' % (self._protect_url(url), title_str)
@@ -1672,7 +1688,8 @@ class Markdown(object):
                             curr_pos = start_idx + 1
                     else:
                         # This id isn't defined, leave the markup alone.
-                        curr_pos = match.end()
+                        # set current pos to end of link title and continue from there
+                        curr_pos = p
                     continue
             # Otherwise, it isn't markup.
@@ -1680,7 +1697,11 @@ class Markdown(object):
         return text
-    def header_id_from_text(self, text, prefix, n):
+    def header_id_from_text(self,
+        text: str,
+        prefix: str,
+        n: Optional[int] = None
+    ) -> str:
         """Generate a header id attribute value from the given header
         HTML content.
@@ -1690,7 +1711,7 @@ class Markdown(object):
         @param text {str} The text of the header tag
         @param prefix {str} The requested prefix for header ids. This is the
             value of the "header-ids" extra key, if any. Otherwise, None.
-        @param n {int} The <hN> tag number, i.e. `1` for an <h1> tag.
+        @param n {int} (unused) The <hN> tag number, i.e. `1` for an <h1> tag.
         @returns {str} The value for the header tag's "id" attribute. Return
             None to not have an id attribute and to exclude this header from
             the TOC (if the "toc" extra is specified).
@@ -1705,7 +1726,14 @@ class Markdown(object):
         return header_id
-    def _toc_add_entry(self, level, id, name):
+    def _header_id_exists(self, text: str) -> bool:
+        header_id = _slugify(text)
+        prefix = self.extras['header-ids'].get('prefix')
+        if prefix and isinstance(prefix, str):
+            header_id = prefix + '-' + header_id
+        return header_id in self._count_from_header_id or header_id in map(lambda x: x[1], self._toc)
+    def _toc_add_entry(self, level: int, id: str, name: str) -> None:
         if level > self._toc_depth:
             return
         if self._toc is None:
@@ -1728,7 +1756,8 @@ class Markdown(object):
     _h_re = re.compile(_h_re_base % '*', re.X | re.M)
     _h_re_tag_friendly = re.compile(_h_re_base % '+', re.X | re.M)
-    def _h_sub(self, match):
+    def _h_sub(self, match: re.Match) -> str:
+        '''Handles processing markdown headers'''
         if match.group(1) is not None and match.group(3) == "-":
             return match.group(1)
         elif match.group(1) is not None:
@@ -1746,7 +1775,7 @@ class Markdown(object):
         header_id_attr = ""
         if "header-ids" in self.extras:
             header_id = self.header_id_from_text(header_group,
-                self.extras["header-ids"], n)
+                self.extras["header-ids"].get('prefix'), n)
             if header_id:
                 header_id_attr = ' id="%s"' % header_id
         html = self._run_span_gamut(header_group)
@@ -1754,7 +1783,39 @@ class Markdown(object):
             self._toc_add_entry(n, header_id, html)
         return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
-    def _do_headers(self, text):
+    _h_tag_re = re.compile(r'''
+        ^<h([1-6])(.*)>  # \1 tag num, \2 attrs
+        (.*)  # \3 text
+        </h\1>
+    ''', re.X | re.M)
+    def _h_tag_sub(self, match: re.Match) -> str:
+        '''Different to `_h_sub` in that this function handles existing HTML headers'''
+        text = match.string[match.start(): match.end()]
+        h_level = int(match.group(1))
+        # extract id= attr from tag, trying to account for regex "misses"
+        id_attr = (re.match(r'.*?id=(\S+)?.*', match.group(2) or '') or '')
+        if id_attr:
+            # if id attr exists, extract that
+            id_attr = id_attr.group(1) or ''
+        id_attr = id_attr.strip('\'" ')
+        h_text = match.group(3)
+        # check if header was already processed (ie: was a markdown header rather than HTML)
+        if id_attr and self._header_id_exists(id_attr):
+            return text
+        # generate new header id if none existed
+        header_id = id_attr or self.header_id_from_text(h_text, self.extras['header-ids'].get('prefix'), h_level)
+        if "toc" in self.extras:
+            self._toc_add_entry(h_level, header_id, h_text)
+        if header_id and not id_attr:
+            # '<h[digit]' + new ID + '...'
+            return text[:3] + ' id="%s"' % header_id + text[3:]
+        return text
+    @mark_stage(Stage.HEADERS)
+    def _do_headers(self, text: str) -> str:
         # Setext-style headers:
         #     Header 1
         #     ========
@@ -1778,7 +1839,7 @@ class Markdown(object):
     _marker_ul = '(?:[%s])' % _marker_ul_chars
     _marker_ol = r'(?:\d+\.)'
-    def _list_sub(self, match):
+    def _list_sub(self, match: re.Match) -> str:
         lst = match.group(1)
         lst_type = match.group(4) in self._marker_ul_chars and "ul" or "ol"
@@ -1796,7 +1857,8 @@ class Markdown(object):
         else:
             return "<%s%s>\n%s</%s>\n\n" % (lst_type, lst_opts, result, lst_type)
-    def _do_lists(self, text):
+    @mark_stage(Stage.LISTS)
+    def _do_lists(self, text: str) -> str:
         # Form HTML ordered (numbered) and unordered (bulleted) lists.
         # Iterate over each *non-overlapping* list match.
@@ -1872,20 +1934,24 @@ class Markdown(object):
     _task_list_warpper_str = r'<input type="checkbox" class="task-list-item-checkbox" %sdisabled> %s'
-    def _task_list_item_sub(self, match):
+    def _task_list_item_sub(self, match: re.Match) -> str:
         marker = match.group(1)
         item_text = match.group(2)
         if marker in ['[x]','[X]']:
-                return self._task_list_warpper_str % ('checked ', item_text)
+            return self._task_list_warpper_str % ('checked ', item_text)
         elif marker == '[ ]':
-                return self._task_list_warpper_str % ('', item_text)
+            return self._task_list_warpper_str % ('', item_text)
+        # returning None has same effect as returning empty str, but only
+        # one makes the type checker happy
+        return ''
     _last_li_endswith_two_eols = False
-    def _list_item_sub(self, match):
+    def _list_item_sub(self, match: re.Match) -> str:
         item = match.group(4)
         leading_line = match.group(1)
         if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
-            item = self._run_block_gamut(self._outdent(item))
+            item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1]
+            item = self._run_block_gamut(item)
         else:
             # Recursion for sub-lists:
             item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1])
@@ -1899,7 +1965,7 @@ class Markdown(object):
         return "<li>%s</li>\n" % item
-    def _process_list_items(self, list_str):
+    def _process_list_items(self, list_str: str) -> str:
         # Process the contents of a single ordered or unordered list,
         # splitting it into individual list items.
@@ -1930,7 +1996,12 @@ class Markdown(object):
         self.list_level -= 1
         return list_str
-    def _get_pygments_lexer(self, lexer_name):
+    def _get_pygments_lexer(self, lexer_name: str):
+        '''
+        Returns:
+            `pygments.Lexer` or None if a lexer matching `lexer_name` is
+            not found
+        '''
         try:
             from pygments import lexers, util
         except ImportError:
@@ -1940,7 +2011,21 @@ class Markdown(object):
         except util.ClassNotFound:
             return None
-    def _color_with_pygments(self, codeblock, lexer, **formatter_opts):
+    def _color_with_pygments(
+        self,
+        codeblock: str,
+        lexer,
+        **formatter_opts
+    ) -> str:
+        '''
+        TODO: this function is only referenced by the `FencedCodeBlocks`
+        extra. May be worth moving over there
+        Args:
+            codeblock: the codeblock to highlight
+            lexer (pygments.Lexer): lexer to use
+            formatter_opts: pygments HtmlFormatter options
+        '''
         import pygments
         import pygments.formatters
@@ -1973,82 +2058,22 @@ class Markdown(object):
         formatter = HtmlCodeFormatter(**formatter_opts)
         return pygments.highlight(codeblock, lexer, formatter)
-    def _code_block_sub(self, match, is_fenced_code_block=False):
-        lexer_name = None
-        if is_fenced_code_block:
-            lexer_name = match.group(2)
-            codeblock = match.group(3)
-            codeblock = codeblock[:-1]  # drop one trailing newline
-        else:
-            codeblock = match.group(1)
-            codeblock = self._outdent(codeblock)
-            codeblock = self._detab(codeblock)
-            codeblock = codeblock.lstrip('\n')  # trim leading newlines
-            codeblock = codeblock.rstrip()      # trim trailing whitespace
-        # Use pygments only if not using the highlightjs-lang extra
-        if lexer_name and "highlightjs-lang" not in self.extras:
-            lexer = self._get_pygments_lexer(lexer_name)
-            if lexer:
-                leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
-                return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer, is_fenced_code_block)
+    def _code_block_sub(self, match: re.Match) -> str:
+        codeblock = match.group(1)
+        codeblock = self._outdent(codeblock)
+        codeblock = self._detab(codeblock)
+        codeblock = codeblock.lstrip('\n')  # trim leading newlines
+        codeblock = codeblock.rstrip()      # trim trailing whitespace
         pre_class_str = self._html_class_str_from_tag("pre")
+        code_class_str = self._html_class_str_from_tag("code")
-        if "highlightjs-lang" in self.extras and lexer_name:
-            code_class_str = ' class="%s language-%s"' % (lexer_name, lexer_name)
-        else:
-            code_class_str = self._html_class_str_from_tag("code")
-        if is_fenced_code_block:
-            # Fenced code blocks need to be outdented before encoding, and then reapplied
-            leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
-            if codeblock:
-                # only run the codeblock through the outdenter if not empty
-                leading_indent, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
-            codeblock = self._encode_code(codeblock)
-            if lexer_name == 'mermaid' and 'mermaid' in self.extras:
-                return '\n%s<pre class="mermaid-pre"><div class="mermaid">%s\n</div></pre>\n' % (
-                    leading_indent, codeblock)
-            return "\n%s<pre%s><code%s>%s\n</code></pre>\n" % (
-                leading_indent, pre_class_str, code_class_str, codeblock)
-        else:
-            codeblock = self._encode_code(codeblock)
-            return "\n<pre%s><code%s>%s\n</code></pre>\n" % (
-                pre_class_str, code_class_str, codeblock)
-    def _code_block_with_lexer_sub(self, codeblock, leading_indent, lexer, is_fenced_code_block):
-        if is_fenced_code_block:
-            formatter_opts = self.extras['fenced-code-blocks'] or {}
-        else:
-            formatter_opts = {}
-        def unhash_code(codeblock):
-            for key, sanitized in list(self.html_spans.items()):
-                codeblock = codeblock.replace(key, sanitized)
-            replacements = [
-                ("&amp;", "&"),
-                ("&lt;", "<"),
-                ("&gt;", ">")
-            ]
-            for old, new in replacements:
-                codeblock = codeblock.replace(old, new)
-            return codeblock
-        # remove leading indent from code block
-        _, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
-        codeblock = unhash_code(codeblock)
-        colored = self._color_with_pygments(codeblock, lexer,
-                                            **formatter_opts)
+        codeblock = self._encode_code(codeblock)
-        # add back the indent to all lines
-        return "\n%s\n" % self._uniform_indent(colored, leading_indent, True)
+        return "\n<pre%s><code%s>%s\n</code></pre>\n" % (
+            pre_class_str, code_class_str, codeblock)
-    def _html_class_str_from_tag(self, tag):
+    def _html_class_str_from_tag(self, tag: str) -> str:
         """Get the appropriate ' class="..."' string (note the leading
         space), if any, for the given tag.
         """
@@ -2064,7 +2089,8 @@ class Markdown(object):
                     return ' class="%s"' % html_classes_from_tag[tag]
         return ""
-    def _do_code_blocks(self, text):
+    @mark_stage(Stage.CODE_BLOCKS)
+    def _do_code_blocks(self, text: str) -> str:
         """Process Markdown `<pre><code>` blocks."""
         code_block_re = re.compile(r'''
             (?:\n\n|\A\n?)
@@ -2082,20 +2108,6 @@ class Markdown(object):
             re.M | re.X)
         return code_block_re.sub(self._code_block_sub, text)
-    _fenced_code_block_re = re.compile(r'''
-        (?:\n+|\A\n?|(?<=\n))
-        (^[ \t]*`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n  # $1 = opening fence (captured for back-referencing), $2 = optional lang
-        (.*?)                             # $3 = code block content
-        \1[ \t]*\n                      # closing fence
-        ''', re.M | re.X | re.S)
-    def _fenced_code_block_sub(self, match):
-        return self._code_block_sub(match, is_fenced_code_block=True)
-    def _do_fenced_code_blocks(self, text):
-        """Process ```-fenced unindented code blocks ('fenced-code-blocks' extra)."""
-        return self._fenced_code_block_re.sub(self._fenced_code_block_sub, text)
     # Rules for a code span:
     # - backslash escapes are not interpreted in a code span
     # - to include one or or a run of more backticks the delimiters must
@@ -2114,12 +2126,13 @@ class Markdown(object):
             (?!`)
         ''', re.X | re.S)
-    def _code_span_sub(self, match):
+    def _code_span_sub(self, match: re.Match) -> str:
         c = match.group(2).strip(" \t")
         c = self._encode_code(c)
         return "<code%s>%s</code>" % (self._html_class_str_from_tag("code"), c)
-    def _do_code_spans(self, text):
+    @mark_stage(Stage.CODE_SPANS)
+    def _do_code_spans(self, text: str) -> str:
         #   *   Backtick quotes are used for <code></code> spans.
         #
         #   *   You can use multiple backticks as the delimiters if you want to
@@ -2144,7 +2157,7 @@ class Markdown(object):
         #         ... type <code>`bar`</code> ...
         return self._code_span_re.sub(self._code_span_sub, text)
-    def _encode_code(self, text):
+    def _encode_code(self, text: str) -> str:
         """Encode/escape certain characters inside Markdown code runs.
         The point is that in code, these characters are literals,
         and lose their special Markdown meanings.
@@ -2163,160 +2176,14 @@ class Markdown(object):
         self._code_table[text] = hashed
         return hashed
-    def _wavedrom_block_sub(self, match):
-        # if this isn't a wavedrom diagram block, exit now
-        if match.group(2) != 'wavedrom':
-            return match.string[match.start():match.end()]
-        # dedent the block for processing
-        lead_indent, waves = self._uniform_outdent(match.group(3))
-        # default tags to wrap the wavedrom block in
-        open_tag, close_tag = '<script type="WaveDrom">\n', '</script>'
-        # check if the user would prefer to have the SVG embedded directly
-        if not isinstance(self.extras['wavedrom'], dict):
-            embed_svg = True
-        else:
-            # default behaviour is to embed SVGs
-            embed_svg = self.extras['wavedrom'].get('prefer_embed_svg', True)
-        if embed_svg:
-            try:
-                import wavedrom
-                waves = wavedrom.render(waves).tostring()
-                open_tag, close_tag = '<div>', '\n</div>'
-            except ImportError:
-                pass
-        # hash SVG to prevent <> chars being messed with
-        self._escape_table[waves] = _hash_text(waves)
-        return self._uniform_indent(
-            '\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag),
-            lead_indent, include_empty_lines=True
-        )
-    def _do_wavedrom_blocks(self, text):
-        return self._fenced_code_block_re.sub(self._wavedrom_block_sub, text)
-    _admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
-    _admonitions_re = re.compile(r'''
-        ^(\ *)\.\.\ (%s)::\ *                # $1 leading indent, $2 the admonition
-        (.*)?                                # $3 admonition title
-        ((?:\s*\n\1\ {3,}.*)+?)              # $4 admonition body (required)
-        (?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S))  # until EOF, 3 blank lines or something less indented
-        ''' % _admonitions,
-        re.IGNORECASE | re.MULTILINE | re.VERBOSE
-    )
-    def _do_admonitions_sub(self, match):
-        lead_indent, admonition_name, title, body = match.groups()
-        admonition_type = '<strong>%s</strong>' % admonition_name
-        # figure out the class names to assign the block
-        if admonition_name.lower() == 'admonition':
-            admonition_class = 'admonition'
-        else:
-            admonition_class = 'admonition %s' % admonition_name.lower()
-        # titles are generally optional
-        if title:
-            title = '<em>%s</em>' % title
-        # process the admonition body like regular markdown
-        body = self._run_block_gamut("\n%s\n" % self._uniform_outdent(body)[1])
-        # indent the body before placing inside the aside block
-        admonition = self._uniform_indent('%s\n%s\n\n%s\n' % (admonition_type, title, body), self.tab, False)
-        # wrap it in an aside
-        admonition = '<aside class="%s">\n%s</aside>' % (admonition_class, admonition)
-        # now indent the whole admonition back to where it started
-        return self._uniform_indent(admonition, lead_indent, False)
-    def _do_admonitions(self, text):
-        return self._admonitions_re.sub(self._do_admonitions_sub, text)
-    _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
-    def _do_strike(self, text):
-        text = self._strike_re.sub(r"<s>\1</s>", text)
-        return text
-    _underline_re = re.compile(r"(?<!<!)--(?!>)(?=\S)(.+?)(?<=\S)(?<!<!)--(?!>)", re.S)
-    def _do_underline(self, text):
-        text = self._underline_re.sub(r"<u>\1</u>", text)
-        return text
-    _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
-    def _do_tg_spoiler(self, text):
-        text = self._tg_spoiler_re.sub(r"<tg-spoiler>\1</tg-spoiler>", text)
-        return text
+    _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]?)(?<=\S)\1", re.S)
+    _em_re = re.compile(r"(\*|_)(?=\S)(.*?\S)\1", re.S)
-    _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
-    _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
-    _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
-    _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
-    def _do_italics_and_bold(self, text):
+    @mark_stage(Stage.ITALIC_AND_BOLD)
+    def _do_italics_and_bold(self, text: str) -> str:
         # <strong> must go first:
-        if "code-friendly" in self.extras:
-            text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text)
-            text = self._code_friendly_em_re.sub(r"<em>\1</em>", text)
-        else:
-            text = self._strong_re.sub(r"<strong>\2</strong>", text)
-            text = self._em_re.sub(r"<em>\2</em>", text)
-        return text
-    # "smarty-pants" extra: Very liberal in interpreting a single prime as an
-    # apostrophe; e.g. ignores the fact that "round", "bout", "twer", and
-    # "twixt" can be written without an initial apostrophe. This is fine because
-    # using scare quotes (single quotation marks) is rare.
-    _apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
-    _contractions = ["tis", "twas", "twer", "neath", "o", "n",
-        "round", "bout", "twixt", "nuff", "fraid", "sup"]
-    def _do_smart_contractions(self, text):
-        text = self._apostrophe_year_re.sub(r"&#8217;\1", text)
-        for c in self._contractions:
-            text = text.replace("'%s" % c, "&#8217;%s" % c)
-            text = text.replace("'%s" % c.capitalize(),
-                "&#8217;%s" % c.capitalize())
-        return text
-    # Substitute double-quotes before single-quotes.
-    _opening_single_quote_re = re.compile(r"(?<!\S)'(?=\S)")
-    _opening_double_quote_re = re.compile(r'(?<!\S)"(?=\S)')
-    _closing_single_quote_re = re.compile(r"(?<=\S)'")
-    _closing_double_quote_re = re.compile(r'(?<=\S)"(?=(\s|,|;|\.|\?|!|$))')
-    def _do_smart_punctuation(self, text):
-        """Fancifies 'single quotes', "double quotes", and apostrophes.
-        Converts --, ---, and ... into en dashes, em dashes, and ellipses.
-        Inspiration is: <http://daringfireball.net/projects/smartypants/>
-        See "test/tm-cases/smarty_pants.text" for a full discussion of the
-        support here and
-        <http://code.google.com/p/python-markdown2/issues/detail?id=42> for a
-        discussion of some diversion from the original SmartyPants.
-        """
-        if "'" in text:  # guard for perf
-            text = self._do_smart_contractions(text)
-            text = self._opening_single_quote_re.sub("&#8216;", text)
-            text = self._closing_single_quote_re.sub("&#8217;", text)
-        if '"' in text:  # guard for perf
-            text = self._opening_double_quote_re.sub("&#8220;", text)
-            text = self._closing_double_quote_re.sub("&#8221;", text)
-        text = text.replace("---", "&#8212;")
-        text = text.replace("--", "&#8211;")
-        text = text.replace("...", "&#8230;")
-        text = text.replace(" . . . ", "&#8230;")
-        text = text.replace(". . .", "&#8230;")
-        # TODO: Temporary hack to fix https://github.com/trentm/python-markdown2/issues/150
-        if "footnotes" in self.extras and "footnote-ref" in text:
-            # Quotes in the footnote back ref get converted to "smart" quotes
-            # Change them back here to ensure they work.
-            text = text.replace('class="footnote-ref&#8221;', 'class="footnote-ref"')
+        text = self._strong_re.sub(r"<strong>\2</strong>", text)
+        text = self._em_re.sub(r"<em>\2</em>", text)
         return text
     _block_quote_base = r'''
@@ -2334,10 +2201,10 @@ class Markdown(object):
     _bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
     _bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
     _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
-    def _dedent_two_spaces_sub(self, match):
+    def _dedent_two_spaces_sub(self, match: re.Match) -> str:
         return re.sub(r'(?m)^  ', '', match.group(1))
-    def _block_quote_sub(self, match):
+    def _block_quote_sub(self, match: re.Match) -> str:
         bq = match.group(1)
         is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq)
         # trim one level of quoting
@@ -2358,7 +2225,8 @@ class Markdown(object):
         else:
             return '<blockquote>\n%s\n</blockquote>\n\n' % bq
-    def _do_block_quotes(self, text):
+    @mark_stage(Stage.BLOCK_QUOTES)
+    def _do_block_quotes(self, text: str) -> str:
         if '>' not in text:
             return text
         if 'spoiler' in self.extras:
@@ -2366,7 +2234,8 @@ class Markdown(object):
         else:
             return self._block_quote_re.sub(self._block_quote_sub, text)
-    def _form_paragraphs(self, text):
+    @mark_stage(Stage.PARAGRAPHS)
+    def _form_paragraphs(self, text: str) -> str:
         # Strip leading and trailing lines:
         text = text.strip('\n')
@@ -2396,8 +2265,13 @@ class Markdown(object):
                     ):
                         start = li.start()
                         cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
-                        assert re.match(r'^<(?:ul|ol).*?>', cuddled_list)
-                        graf = graf[:start]
+                        if re.match(r'^<(?:ul|ol).*?>', cuddled_list):
+                            graf = graf[:start]
+                        else:
+                            # Not quite a cuddled list. (See not_quite_a_list_cuddled_lists test case)
+                            # Store as a simple paragraph.
+                            graf = cuddled_list
+                            cuddled_list = None
                 # Wrap <p> tags.
                 graf = self._run_span_gamut(graf)
@@ -2408,7 +2282,7 @@ class Markdown(object):
         return "\n\n".join(grafs)
-    def _add_footnotes(self, text):
+    def _add_footnotes(self, text: str) -> str:
         if self.footnotes:
             footer = [
                 '<div class="footnotes">',
@@ -2421,6 +2295,10 @@ class Markdown(object):
             if not self.footnote_return_symbol:
                 self.footnote_return_symbol = "&#8617;"
+            # self.footnotes is generated in _strip_footnote_definitions, which runs re.sub on the whole
+            # text. This means that the dict keys are inserted in order of appearance. Use the dict to
+            # sort footnote ids by that same order
+            self.footnote_ids.sort(key=lambda a: list(self.footnotes.keys()).index(a))
             for i, id in enumerate(self.footnote_ids):
                 if i != 0:
                     footer.append('')
@@ -2455,7 +2333,7 @@ class Markdown(object):
     _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
     _naked_gt_re = re.compile(r'''(?<![a-z0-9?!/'"-])>''', re.I)
-    def _encode_amps_and_angles(self, text):
+    def _encode_amps_and_angles(self, text: str) -> str:
         # Smart processing for ampersands and angle brackets that need
         # to be encoded.
         text = _AMPERSAND_RE.sub('&amp;', text)
@@ -2469,9 +2347,9 @@ class Markdown(object):
         text = self._naked_gt_re.sub('&gt;', text)
         return text
-    _incomplete_tags_re = re.compile(r"<(/?\w+?(?!\w)\s*?.+?[\s/]+?)")
+    _incomplete_tags_re = re.compile(r"<(!--|/?\w+?(?!\w)\s*?.+?(?:[\s/]+?|$))")
-    def _encode_incomplete_tags(self, text):
+    def _encode_incomplete_tags(self, text: str) -> str:
         if self.safe_mode not in ("replace", "escape"):
             return text
@@ -2483,13 +2361,13 @@ class Markdown(object):
         return self._incomplete_tags_re.sub(incomplete_tags_sub, text)
-    def _encode_backslash_escapes(self, text):
+    def _encode_backslash_escapes(self, text: str) -> str:
         for ch, escape in list(self._escape_table.items()):
             text = text.replace("\\"+ch, escape)
         return text
     _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
-    def _auto_link_sub(self, match):
+    def _auto_link_sub(self, match: re.Match) -> str:
         g1 = match.group(1)
         return '<a href="%s">%s</a>' % (self._protect_url(g1), g1)
@@ -2503,16 +2381,16 @@ class Markdown(object):
           )
           >
         """, re.I | re.X | re.U)
-    def _auto_email_link_sub(self, match):
+    def _auto_email_link_sub(self, match: re.Match) -> str:
         return self._encode_email_address(
             self._unescape_special_chars(match.group(1)))
-    def _do_auto_links(self, text):
+    def _do_auto_links(self, text: str) -> str:
         text = self._auto_link_re.sub(self._auto_link_sub, text)
         text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
         return text
-    def _encode_email_address(self, addr):
+    def _encode_email_address(self, addr: str) -> str:
         #  Input: an email address, e.g. "foo@example.com"
         #
         #  Output: the email address as a mailto link, with each character
@@ -2532,88 +2410,40 @@ class Markdown(object):
                % (''.join(chars), ''.join(chars[7:]))
         return addr
-    _basic_link_re = re.compile(r'!?\[.*?\]\(.*?\)')
-    def _do_link_patterns(self, text):
-        link_from_hash = {}
-        for regex, repl in self.link_patterns:
-            replacements = []
-            for match in regex.finditer(text):
-                if any(self._match_overlaps_substr(text, match, h) for h in link_from_hash):
-                    continue
-                if hasattr(repl, "__call__"):
-                    href = repl(match)
-                else:
-                    href = match.expand(repl)
-                replacements.append((match.span(), href))
-            for (start, end), href in reversed(replacements):
-                # Do not match against links inside brackets.
-                if text[start - 1:start] == '[' and text[end:end + 1] == ']':
-                    continue
-                # Do not match against links in the standard markdown syntax.
-                if text[start - 2:start] == '](' or text[end:end + 2] == '")':
-                    continue
-                # Do not match against links which are escaped.
-                if text[start - 3:start] == '"""' and text[end:end + 3] == '"""':
-                    text = text[:start - 3] + text[start:end] + text[end + 3:]
-                    continue
-                # search the text for anything that looks like a link
-                is_inside_link = False
-                for link_re in (self._auto_link_re, self._basic_link_re):
-                    for match in link_re.finditer(text):
-                        if any((r[0] <= start and end <= r[1]) for r in match.regs):
-                            # if the link pattern start and end pos is within the bounds of
-                            # something that looks like a link, then don't process it
-                            is_inside_link = True
-                            break
-                    else:
-                        continue
-                    break
-                if is_inside_link:
-                    continue
-                escaped_href = (
-                    href.replace('"', '&quot;')  # b/c of attr quote
-                        # To avoid markdown <em> and <strong>:
-                        .replace('*', self._escape_table['*'])
-                        .replace('_', self._escape_table['_']))
-                link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
-                hash = _hash_text(link)
-                link_from_hash[hash] = link
-                text = text[:start] + hash + text[end:]
-        for hash, link in list(link_from_hash.items()):
-            text = text.replace(hash, link)
-        return text
-    def _unescape_special_chars(self, text):
+    def _unescape_special_chars(self, text: str) -> str:
         # Swap back in all the special characters we've hidden.
+        hashmap = tuple(self._escape_table.items()) + tuple(self._code_table.items())
+        # html_blocks table is in format {hash: item} compared to usual {item: hash}
+        hashmap += tuple(tuple(reversed(i)) for i in self.html_blocks.items())
         while True:
             orig_text = text
-            for ch, hash in list(self._escape_table.items()) + list(self._code_table.items()):
+            for ch, hash in hashmap:
                 text = text.replace(hash, ch)
             if text == orig_text:
                 break
         return text
-    def _outdent(self, text):
+    def _outdent(self, text: str) -> str:
         # Remove one level of line-leading tabs or spaces
         return self._outdent_re.sub('', text)
-    def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):
-        # Removes the smallest common leading indentation from each (non empty)
-        # line of `text` and returns said indent along with the outdented text.
-        # The `min_outdent` kwarg makes sure the smallest common whitespace
-        # must be at least this size
-        # The `max_outdent` sets the maximum amount a line can be
-        # outdented by
+    @staticmethod
+    def _uniform_outdent(
+        text: str,
+        min_outdent: Optional[str] = None,
+        max_outdent: Optional[str] = None
+    ) -> Tuple[str, str]:
+        '''
+        Removes the smallest common leading indentation from each (non empty)
+        line of `text` and returns said indent along with the outdented text.
+        Args:
+            min_outdent: make sure the smallest common whitespace is at least this size
+            max_outdent: the maximum amount a line can be outdented by
+        '''
         # find the leading whitespace for every line
-        whitespace = [
+        whitespace: List[Union[str, None]] = [
             re.findall(r'^[ \t]*', line)[0] if line else None
             for line in text.splitlines()
         ]
@@ -2644,14 +2474,34 @@ class Markdown(object):
         return outdent, ''.join(outdented)
-    def _uniform_indent(self, text, indent, include_empty_lines=False):
-        return ''.join(
-            (indent + line if line.strip() or include_empty_lines else '')
-            for line in text.splitlines(True)
-        )
+    @staticmethod
+    def _uniform_indent(
+        text: str,
+        indent: str,
+        include_empty_lines: bool = False,
+        indent_empty_lines: bool = False
+    ) -> str:
+        '''
+        Uniformly indent a block of text by a fixed amount
+        Args:
+            text: the text to indent
+            indent: a string containing the indent to apply
+            include_empty_lines: don't remove whitespace only lines
+            indent_empty_lines: indent whitespace only lines with the rest of the text
+        '''
+        blocks = []
+        for line in text.splitlines(True):
+            if line.strip() or indent_empty_lines:
+                blocks.append(indent + line)
+            elif include_empty_lines:
+                blocks.append(line)
+            else:
+                blocks.append('')
+        return ''.join(blocks)
     @staticmethod
-    def _match_overlaps_substr(text, match, substr):
+    def _match_overlaps_substr(text, match: re.Match, substr: str) -> bool:
         '''
         Checks if a regex match overlaps with a substring in the given text.
         '''
@@ -2676,58 +2526,1093 @@ class MarkdownWithExtras(Markdown):
     - link-patterns (because you need to specify some actual
       link-patterns anyway)
     """
-    extras = ["footnotes", "fenced-code-blocks"]
+    extras = ["footnotes", "fenced-code-blocks"]  # type: ignore
-# ---- internal support functions
+# ----------------------------------------------------------
+# Extras
+# ----------------------------------------------------------
+# Base classes
+# ----------------------------------------------------------
-def calculate_toc_html(toc):
-    """Return the HTML for the current TOC.
+class Extra(ABC):
+    _registry: Dict[str, Type['Extra']] = {}
+    _exec_order: Dict[Stage, Tuple[List[Type['Extra']], List[Type['Extra']]]] = {}
-    This expects the `_toc` attribute to have been set on this instance.
-    """
-    if toc is None:
-        return None
+    name: str
+    '''
+    An identifiable name that users can use to invoke the extra
+    in the Markdown class
+    '''
+    order: Tuple[Collection[Union[Stage, Type['Extra']]], Collection[Union[Stage, Type['Extra']]]]
+    '''
+    Tuple of two iterables containing the stages/extras this extra will run before and
+    after, respectively
+    '''
-    def indent():
-        return '  ' * (len(h_stack) - 1)
-    lines = []
-    h_stack = [0]   # stack of header-level numbers
-    for level, id, name in toc:
-        if level > h_stack[-1]:
-            lines.append("%s<ul>" % indent())
-            h_stack.append(level)
-        elif level == h_stack[-1]:
-            lines[-1] += "</li>"
-        else:
-            while level < h_stack[-1]:
-                h_stack.pop()
-                if not lines[-1].endswith("</li>"):
-                    lines[-1] += "</li>"
-                lines.append("%s</ul></li>" % indent())
-        lines.append('%s<li><a href="#%s">%s</a>' % (
-            indent(), id, name))
-    while len(h_stack) > 1:
-        h_stack.pop()
-        if not lines[-1].endswith("</li>"):
-            lines[-1] += "</li>"
-        lines.append("%s</ul>" % indent())
-    return '\n'.join(lines) + '\n'
+    def __init__(self, md: Markdown, options: Optional[dict]):
+        '''
+        Args:
+            md: An instance of `Markdown`
+            options: a dict of settings to alter the extra's behaviour
+        '''
+        self.md = md
+        self.options = options if options is not None else {}
+    @classmethod
+    def deregister(cls):
+        '''
+        Removes the class from the extras registry and unsets its execution order.
+        '''
+        if cls.name in cls._registry:
+            del cls._registry[cls.name]
-class UnicodeWithAttrs(str):
-    """A subclass of unicode used for the return value of conversion to
-    possibly attach some attributes. E.g. the "toc_html" attribute when
-    the "toc" extra is used.
-    """
-    metadata = None
-    toc_html = None
+        for exec_order in Extra._exec_order.values():
+            # find everywhere this extra is mentioned and remove it
+            for section in exec_order:
+                while cls in section:
+                    section.remove(cls)
-## {{{ http://code.activestate.com/recipes/577257/ (r1)
-_slugify_strip_re = re.compile(r'[^\w\s-]')
-_slugify_hyphenate_re = re.compile(r'[-\s]+')
-def _slugify(value):
+    @classmethod
+    def register(cls):
+        '''
+        Registers the class for use with `Markdown` and calculates its execution order based on
+        the `order` class attribute.
+        '''
+        cls._registry[cls.name] = cls
+        for index, item in enumerate((*cls.order[0], *cls.order[1])):
+            before = index < len(cls.order[0])
+            if not isinstance(item, Stage) and issubclass(item, Extra):
+                # eg: FencedCodeBlocks
+                for exec_orders in Extra._exec_order.values():
+                    # insert this extra everywhere the other one is mentioned
+                    for section in exec_orders:
+                        if item in section:
+                            to_index = section.index(item)
+                            if not before:
+                                to_index += 1
+                            section.insert(to_index, cls)
+            else:
+                # eg: Stage.PREPROCESS
+                Extra._exec_order.setdefault(item, ([], []))
+                if cls in Extra._exec_order[item][0 if before else 1]:
+                    # extra is already runnig after this stage. Don't duplicate that effort
+                    continue
+                if before:
+                    Extra._exec_order[item][0].insert(0, cls)
+                else:
+                    Extra._exec_order[item][1].append(cls)
+    @abstractmethod
+    def run(self, text: str) -> str:
+        '''
+        Run the extra against the given text.
+        Returns:
+            The new text after being modified by the extra
+        '''
+        ...
+    def test(self, text: str) -> bool:
+        '''
+        Check a section of markdown to see if this extra should be run upon it.
+        The default implementation will always return True but it's recommended to override
+        this behaviour to improve performance.
+        '''
+        return True
+class ItalicAndBoldProcessor(Extra):
+    '''
+    An ABC that provides hooks for dealing with italics and bold syntax.
+    This class is set to trigger both before AND after the italics and bold stage.
+    This allows any child classes to intercept instances of bold or italic syntax and
+    change the output or hash it to prevent it from being processed.
+    After the I&B stage any hashes in the `hash_tables` instance variable are replaced.
+    '''
+    name = 'italic-and-bold-processor'
+    order = (Stage.ITALIC_AND_BOLD,), (Stage.ITALIC_AND_BOLD,)
+    strong_re = Markdown._strong_re
+    em_re = Markdown._em_re
+    def __init__(self, md: Markdown, options: dict):
+        super().__init__(md, options)
+        self.hash_table = {}
+    def run(self, text):
+        if self.md.order < Stage.ITALIC_AND_BOLD:
+            text = self.strong_re.sub(self.sub, text)
+            text = self.em_re.sub(self.sub, text)
+        else:
+            # push any hashed values back, using a while loop to deal with recursive hashes
+            orig_text = ''
+            while orig_text != text:
+                orig_text = text
+                for key, substr in self.hash_table.items():
+                    text = text.replace(key, substr)
+        return text
+    @abstractmethod
+    def sub(self, match: re.Match) -> str:
+        # do nothing. Let `Markdown._do_italics_and_bold` do its thing later
+        return match.string[match.start(): match.end()]
+    def sub_hash(self, match: re.Match) -> str:
+        substr = match.string[match.start(): match.end()]
+        key = _hash_text(substr)
+        self.hash_table[key] = substr
+        return key
+    def test(self, text):
+        if self.md.order < Stage.ITALIC_AND_BOLD:
+            return '*' in text or '_' in text
+        return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
+# User facing extras
+# ----------------------------------------------------------
+class Admonitions(Extra):
+    '''
+    Enable parsing of RST admonitions
+    '''
+    name = 'admonitions'
+    order = (Stage.BLOCK_GAMUT, Stage.LINK_DEFS), ()
+    admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
+    admonitions_re = re.compile(r'''
+        ^(\ *)\.\.\ (%s)::\ *                # $1 leading indent, $2 the admonition
+        (.*)?                                # $3 admonition title
+        ((?:\s*\n\1\ {3,}.*)+?)              # $4 admonition body (required)
+        (?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S))  # until EOF, 3 blank lines or something less indented
+        ''' % admonitions,
+        re.IGNORECASE | re.MULTILINE | re.VERBOSE
+    )
+    def test(self, text):
+        return self.admonitions_re.search(text) is not None
+    def sub(self, match: re.Match) -> str:
+        lead_indent, admonition_name, title, body = match.groups()
+        admonition_type = '<strong>%s</strong>' % admonition_name
+        # figure out the class names to assign the block
+        if admonition_name.lower() == 'admonition':
+            admonition_class = 'admonition'
+        else:
+            admonition_class = 'admonition %s' % admonition_name.lower()
+        # titles are generally optional
+        if title:
+            title = '<em>%s</em>' % title
+        # process the admonition body like regular markdown
+        body = self.md._run_block_gamut("\n%s\n" % self.md._uniform_outdent(body)[1])
+        # indent the body before placing inside the aside block
+        admonition = self.md._uniform_indent(
+            '%s\n%s\n\n%s\n' % (admonition_type, title, body),
+            self.md.tab, False
+        )
+        # wrap it in an aside
+        admonition = '<aside class="%s">\n%s</aside>' % (admonition_class, admonition)
+        # now indent the whole admonition back to where it started
+        return self.md._uniform_indent(admonition, lead_indent, False)
+    def run(self, text):
+        return self.admonitions_re.sub(self.sub, text)
+class Alerts(Extra):
+    '''
+    Markdown Alerts as per
+    https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
+    '''
+    name = 'alerts'
+    order = (), (Stage.BLOCK_QUOTES, )
+    alert_re = re.compile(r'''
+        <blockquote>\s*
+        <p>
+        \[!(?P<type>NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]
+        (?P<closing_tag></p>[ \t]*\n?)?
+        (?P<contents>[\s\S]+?)
+        </blockquote>
+    ''', re.X
+    )
+    def test(self, text):
+        return "<blockquote>" in text
+    def sub(self, match: re.Match) -> str:
+        typ = match["type"].lower()
+        heading = f"<em>{match['type'].title()}</em>"
+        contents = match["contents"].strip()
+        if match["closing_tag"]:
+            return f'<div class="alert {typ}">\n{heading}\n{contents}\n</div>'
+        else:
+            return f'<div class="alert {typ}">\n{heading}\n<p>{contents}\n</div>'
+    def run(self, text):
+        return self.alert_re.sub(self.sub, text)
+class _BreaksExtraOpts(TypedDict, total=False):
+    '''Options for the `Breaks` extra'''
+    on_backslash: bool
+    '''Replace backslashes at the end of a line with <br>'''
+    on_newline: bool
+    '''Replace single new line characters with <br> when True'''
+class Breaks(Extra):
+    name = 'breaks'
+    order = (), (Stage.ITALIC_AND_BOLD,)
+    options: _BreaksExtraOpts
+    def run(self, text):
+        on_backslash = self.options.get('on_backslash', False)
+        on_newline = self.options.get('on_newline', False)
+        if on_backslash and on_newline:
+            pattern = r' *\\?'
+        elif on_backslash:
+            pattern = r'(?: *\\| {2,})'
+        elif on_newline:
+            pattern = r' *'
+        else:
+            pattern = r' {2,}'
+        break_tag = "<br%s\n" % self.md.empty_element_suffix
+        text = re.sub(pattern + r"\n(?!\<(?:\/?(ul|ol|li))\>)", break_tag, text)
+        return text
+class CodeFriendly(ItalicAndBoldProcessor):
+    '''
+    Disable _ and __ for em and strong.
+    '''
+    name = 'code-friendly'
+    def sub(self, match: re.Match) -> str:
+        syntax = match.group(1)
+        text: str = match.string[match.start(): match.end()]
+        if '_' in syntax:
+            # if using _this_ syntax, hash the whole thing so that it doesn't get processed
+            key = _hash_text(text)
+            self.hash_table[key] = text
+            return key
+        elif '_' in text:
+            # if the text within the bold/em markers contains '_' then hash those contents to protect them from em_re
+            text = text[len(syntax): -len(syntax)]
+            key = _hash_text(text)
+            self.hash_table[key] = text
+            return syntax + key + syntax
+        # if no underscores are present, the text is fine and we can just leave it alone
+        return super().sub(match)
+class FencedCodeBlocks(Extra):
+    '''
+    Allows a code block to not have to be indented
+    by fencing it with '```' on a line before and after. Based on
+    <http://github.github.com/github-flavored-markdown/> with support for
+    syntax highlighting.
+    '''
+    name = 'fenced-code-blocks'
+    order = (Stage.LINK_DEFS, Stage.BLOCK_GAMUT), (Stage.PREPROCESS,)
+    fenced_code_block_re = re.compile(r'''
+        (?:\n+|\A\n?|(?<=\n))
+        (^[ \t]*`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n  # $1 = opening fence (captured for back-referencing), $2 = optional lang
+        (.*?)                             # $3 = code block content
+        \1[ \t]*\n                      # closing fence
+        ''', re.M | re.X | re.S)
+    def test(self, text):
+        if '```' not in text:
+            return False
+        if self.md.stage == Stage.PREPROCESS and not self.md.safe_mode:
+            return True
+        if self.md.stage == Stage.LINK_DEFS and self.md.safe_mode:
+            return True
+        return self.md.stage == Stage.BLOCK_GAMUT
+    def _code_block_with_lexer_sub(
+        self,
+        codeblock: str,
+        leading_indent: str,
+        lexer
+    ) -> str:
+        '''
+        Args:
+            codeblock: the codeblock to format
+            leading_indent: the indentation to prefix the block with
+            lexer (pygments.Lexer): the lexer to use
+        '''
+        formatter_opts = self.md.extras['fenced-code-blocks'] or {}
+        def unhash_code(codeblock):
+            for key, sanitized in list(self.md.html_spans.items()):
+                codeblock = codeblock.replace(key, sanitized)
+            replacements = [
+                ("&amp;", "&"),
+                ("&lt;", "<"),
+                ("&gt;", ">")
+            ]
+            for old, new in replacements:
+                codeblock = codeblock.replace(old, new)
+            return codeblock
+        # remove leading indent from code block
+        _, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
+        codeblock = unhash_code(codeblock)
+        colored = self.md._color_with_pygments(codeblock, lexer,
+                                               **formatter_opts)
+        # add back the indent to all lines
+        return "\n%s\n" % self.md._uniform_indent(colored, leading_indent, True)
+    def tags(self, lexer_name: str) -> Tuple[str, str]:
+        '''
+        Returns the tags that the encoded code block will be wrapped in, based
+        upon the lexer name.
+        This function can be overridden by subclasses to piggy-back off of the
+        fenced code blocks syntax (see `Mermaid` extra).
+        Returns:
+            The opening and closing tags, as strings within a tuple
+        '''
+        pre_class = self.md._html_class_str_from_tag('pre')
+        if "highlightjs-lang" in self.md.extras and lexer_name:
+            code_class = ' class="%s language-%s"' % (lexer_name, lexer_name)
+        else:
+            code_class = self.md._html_class_str_from_tag('code')
+        return ('<pre%s><code%s>' % (pre_class, code_class), '</code></pre>')
+    def sub(self, match: re.Match) -> str:
+        lexer_name = match.group(2)
+        codeblock = match.group(3)
+        codeblock = codeblock[:-1]  # drop one trailing newline
+        # Use pygments only if not using the highlightjs-lang extra
+        if lexer_name and "highlightjs-lang" not in self.md.extras:
+            lexer = self.md._get_pygments_lexer(lexer_name)
+            if lexer:
+                leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
+                return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer)
+        # Fenced code blocks need to be outdented before encoding, and then reapplied
+        leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
+        if codeblock:
+            # only run the codeblock through the outdenter if not empty
+            leading_indent, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
+        codeblock = self.md._encode_code(codeblock)
+        tags = self.tags(lexer_name)
+        return "\n%s%s%s\n%s%s\n" % (leading_indent, tags[0], codeblock, leading_indent, tags[1])
+    def run(self, text):
+        return self.fenced_code_block_re.sub(self.sub, text)
+class Latex(Extra):
+    '''
+    Convert $ and $$ to <math> and </math> tags for inline and block math.
+    '''
+    name = 'latex'
+    order = (Stage.CODE_BLOCKS, FencedCodeBlocks), ()
+    _single_dollar_re = re.compile(r'(?<!\$)\$(?!\$)(.*?)\$')
+    _double_dollar_re = re.compile(r'\$\$(.*?)\$\$', re.DOTALL)
+    # Ways to escape
+    _pre_code_block_re = re.compile(r"<pre>(.*?)</pre>", re.DOTALL) # Wraped in <pre>
+    _triple_re = re.compile(r'```(.*?)```', re.DOTALL) # Wrapped in a code block ```
+    _single_re = re.compile(r'(?<!`)(`)(.*?)(?<!`)\1(?!`)') # Wrapped in a single `
+    converter = None
+    code_blocks = {}
+    def _convert_single_match(self, match):
+        return self.converter.convert(match.group(1))
+    def _convert_double_match(self, match):
+        return self.converter.convert(match.group(1).replace(r"\n", ''), display="block")
+    def code_placeholder(self, match):
+        placeholder = f"<!--CODE_BLOCK_{len(self.code_blocks)}-->"
+        self.code_blocks[placeholder] = match.group(0)
+        return placeholder
+    def run(self, text):
+        try:
+            import latex2mathml.converter
+            self.converter = latex2mathml.converter
+        except ImportError:
+            raise ImportError('The "latex" extra requires the "latex2mathml" package to be installed.')
+        # Escape by replacing with a code block
+        text = self._pre_code_block_re.sub(self.code_placeholder, text)
+        text = self._single_re.sub(self.code_placeholder, text)
+        text = self._triple_re.sub(self.code_placeholder, text)
+        text = self._single_dollar_re.sub(self._convert_single_match, text)
+        text = self._double_dollar_re.sub(self._convert_double_match, text)
+        # Convert placeholder tag back to original code
+        for placeholder, code_block in self.code_blocks.items():
+            text = text.replace(placeholder, code_block)
+        return text
+class LinkPatterns(Extra):
+    '''
+    Auto-link given regex patterns in text (e.g. bug number
+    references, revision number references).
+    '''
+    name = 'link-patterns'
+    order = (Stage.LINKS,), ()
+    options: _link_patterns
+    _basic_link_re = re.compile(r'!?\[.*?\]\(.*?\)')
+    def run(self, text):
+        link_from_hash = {}
+        for regex, repl in self.options:
+            replacements = []
+            for match in regex.finditer(text):
+                if any(self.md._match_overlaps_substr(text, match, h) for h in link_from_hash):
+                    continue
+                if callable(repl):
+                    href = repl(match)
+                else:
+                    href = match.expand(repl)
+                replacements.append((match.span(), href))
+            for (start, end), href in reversed(replacements):
+                # Do not match against links inside brackets.
+                if text[start - 1:start] == '[' and text[end:end + 1] == ']':
+                    continue
+                # Do not match against links in the standard markdown syntax.
+                if text[start - 2:start] == '](' or text[end:end + 2] == '")':
+                    continue
+                # Do not match against links which are escaped.
+                if text[start - 3:start] == '"""' and text[end:end + 3] == '"""':
+                    text = text[:start - 3] + text[start:end] + text[end + 3:]
+                    continue
+                # search the text for anything that looks like a link
+                is_inside_link = False
+                for link_re in (self.md._auto_link_re, self._basic_link_re):
+                    for match in link_re.finditer(text):
+                        if any((r[0] <= start and end <= r[1]) for r in match.regs):
+                            # if the link pattern start and end pos is within the bounds of
+                            # something that looks like a link, then don't process it
+                            is_inside_link = True
+                            break
+                    else:
+                        continue
+                    break
+                if is_inside_link:
+                    continue
+                escaped_href = (
+                    href.replace('"', '&quot;')  # b/c of attr quote
+                        # To avoid markdown <em> and <strong>:
+                        .replace('*', self.md._escape_table['*'])
+                        .replace('_', self.md._escape_table['_']))
+                link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
+                hash = _hash_text(link)
+                link_from_hash[hash] = link
+                text = text[:start] + hash + text[end:]
+        for hash, link in list(link_from_hash.items()):
+            text = text.replace(hash, link)
+        return text
+    def test(self, text):
+        return True
+class MarkdownInHTML(Extra):
+    '''
+    Allow the use of `markdown="1"` in a block HTML tag to
+    have markdown processing be done on its contents. Similar to
+    <http://michelf.com/projects/php-markdown/extra/#markdown-attr> but with
+    some limitations.
+    '''
+    name = 'markdown-in-html'
+    order = (), (Stage.HASH_HTML,)
+    def run(self, text):
+        def callback(block):
+            indent, block = self.md._uniform_outdent(block)
+            block = self.md._hash_html_block_sub(block)
+            block = self.md._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False)
+            return block
+        return self.md._strict_tag_block_sub(text, self.md._block_tags_a, callback, True)
+    def test(self, text):
+        return True
+class Mermaid(FencedCodeBlocks):
+    name = 'mermaid'
+    order = (FencedCodeBlocks,), ()
+    def tags(self, lexer_name):
+        if lexer_name == 'mermaid':
+            return ('<pre class="mermaid-pre"><div class="mermaid">', '</div></pre>')
+        return super().tags(lexer_name)
+class MiddleWordEm(ItalicAndBoldProcessor):
+    '''
+    Allows or disallows emphasis syntax in the middle of words,
+    defaulting to allow. Disabling this means that `this_text_here` will not be
+    converted to `this<em>text</em>here`.
+    '''
+    name = 'middle-word-em'
+    order = (CodeFriendly,), (Stage.ITALIC_AND_BOLD,)
+    def __init__(self, md: Markdown, options: Union[dict, bool]):
+        '''
+        Args:
+            md: the markdown instance
+            options: can be bool for backwards compatibility but will be converted to a dict
+                in the constructor. All options are:
+                - allowed (bool): whether to allow emphasis in the middle of a word.
+                    If `options` is a bool it will be placed under this key.
+        '''
+        if isinstance(options, bool):
+            options = {'allowed': options}
+        options.setdefault('allowed', True)
+        super().__init__(md, options)
+        self.liberal_em_re = self.em_re
+        if not options['allowed']:
+            self.em_re = re.compile(r'(?<=\b)%s(?=\b)' % self.liberal_em_re.pattern, self.liberal_em_re.flags)
+    def run(self, text):
+        # run strong and whatnot first
+        # this also will process all strict ems
+        text = super().run(text)
+        if self.md.order < self.md.stage:
+            # hash all non-valid ems
+            text = self.liberal_em_re.sub(self.sub_hash, text)
+        return text
+    def sub(self, match: re.Match) -> str:
+        syntax = match.group(1)
+        if len(syntax) != 1:
+            # strong syntax
+            return super().sub(match)
+        return '<em>%s</em>' % match.group(2)
+class Numbering(Extra):
+    '''
+    Support of generic counters.  Non standard extension to
+    allow sequential numbering of figures, tables, equations, exhibits etc.
+    '''
+    name = 'numbering'
+    order = (Stage.LINK_DEFS,), ()
+    def run(self, text):
+        # First pass to define all the references
+        regex_defns = re.compile(r'''
+            \[\#(\w+) # the counter.  Open square plus hash plus a word \1
+            ([^@]*)   # Some optional characters, that aren't an @. \2
+            @(\w+)       # the id.  Should this be normed? \3
+            ([^\]]*)\]   # The rest of the text up to the terminating ] \4
+            ''', re.VERBOSE)
+        regex_subs = re.compile(r"\[@(\w+)\s*\]")  # [@ref_id]
+        counters = {}
+        references = {}
+        replacements = []
+        definition_html = '<figcaption class="{}" id="counter-ref-{}">{}{}{}</figcaption>'
+        reference_html = '<a class="{}" href="#counter-ref-{}">{}</a>'
+        for match in regex_defns.finditer(text):
+            # We must have four match groups otherwise this isn't a numbering reference
+            if len(match.groups()) != 4:
+                continue
+            counter = match.group(1)
+            text_before = match.group(2).strip()
+            ref_id = match.group(3)
+            text_after = match.group(4)
+            number = counters.get(counter, 1)
+            references[ref_id] = (number, counter)
+            replacements.append((match.start(0),
+                                 definition_html.format(counter,
+                                                        ref_id,
+                                                        text_before,
+                                                        number,
+                                                        text_after),
+                                 match.end(0)))
+            counters[counter] = number + 1
+        for repl in reversed(replacements):
+            text = text[:repl[0]] + repl[1] + text[repl[2]:]
+        # Second pass to replace the references with the right
+        # value of the counter
+        # Fwiw, it's vaguely annoying to have to turn the iterator into
+        # a list and then reverse it but I can't think of a better thing to do.
+        for match in reversed(list(regex_subs.finditer(text))):
+            number, counter = references.get(match.group(1), (None, None))
+            if number is not None:
+                repl = reference_html.format(counter,
+                                             match.group(1),
+                                             number)
+            else:
+                repl = reference_html.format(match.group(1),
+                                             'countererror',
+                                             '?' + match.group(1) + '?')
+            if "smarty-pants" in self.md.extras:
+                repl = repl.replace('"', self.md._escape_table['"'])
+            text = text[:match.start()] + repl + text[match.end():]
+        return text
+class PyShell(Extra):
+    '''
+    Treats unindented Python interactive shell sessions as <code>
+    blocks.
+    '''
+    name = 'pyshell'
+    order = (), (Stage.LISTS,)
+    def test(self, text):
+        return ">>>" in text
+    def sub(self, match: re.Match) -> str:
+        if "fenced-code-blocks" in self.md.extras:
+            dedented = _dedent(match.group(0))
+            return self.md.extra_classes['fenced-code-blocks'].run("```pycon\n" + dedented + "```\n")
+        lines = match.group(0).splitlines(0)
+        _dedentlines(lines)
+        indent = ' ' * self.md.tab_width
+        s = ('\n'  # separate from possible cuddled paragraph
+             + indent + ('\n'+indent).join(lines)
+             + '\n')
+        return s
+    def run(self, text):
+        less_than_tab = self.md.tab_width - 1
+        _pyshell_block_re = re.compile(r"""
+            ^([ ]{0,%d})>>>[ ].*\n  # first line
+            ^(\1[^\S\n]*\S.*\n)*    # any number of subsequent lines with at least one character
+            (?=^\1?\n|\Z)           # ends with a blank line or end of document
+            """ % less_than_tab, re.M | re.X)
+        return _pyshell_block_re.sub(self.sub, text)
+class SmartyPants(Extra):
+    '''
+    Replaces ' and " with curly quotation marks or curly
+    apostrophes.  Replaces --, ---, ..., and . . . with en dashes, em dashes,
+    and ellipses.
+    '''
+    name = 'smarty-pants'
+    order = (), (Stage.SPAN_GAMUT,)
+    _opening_single_quote_re = re.compile(r"(?<!\S)'(?=\S)")
+    _opening_double_quote_re = re.compile(r'(?<!\S)"(?=\S)')
+    _closing_single_quote_re = re.compile(r"(?<=\S)'")
+    _closing_double_quote_re = re.compile(r'(?<=\S)"(?=(\s|,|;|\.|\?|!|$))')
+    # "smarty-pants" extra: Very liberal in interpreting a single prime as an
+    # apostrophe; e.g. ignores the fact that "round", "bout", "twer", and
+    # "twixt" can be written without an initial apostrophe. This is fine because
+    # using scare quotes (single quotation marks) is rare.
+    _apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
+    _contractions = ["tis", "twas", "twer", "neath", "o", "n",
+        "round", "bout", "twixt", "nuff", "fraid", "sup"]
+    def contractions(self, text: str) -> str:
+        text = self._apostrophe_year_re.sub(r"&#8217;\1", text)
+        for c in self._contractions:
+            text = text.replace("'%s" % c, "&#8217;%s" % c)
+            text = text.replace("'%s" % c.capitalize(),
+                "&#8217;%s" % c.capitalize())
+        return text
+    def run(self, text):
+        """Fancifies 'single quotes', "double quotes", and apostrophes.
+        Converts --, ---, and ... into en dashes, em dashes, and ellipses.
+        Inspiration is: <http://daringfireball.net/projects/smartypants/>
+        See "test/tm-cases/smarty_pants.text" for a full discussion of the
+        support here and
+        <http://code.google.com/p/python-markdown2/issues/detail?id=42> for a
+        discussion of some diversion from the original SmartyPants.
+        """
+        if "'" in text:  # guard for perf
+            text = self.contractions(text)
+            text = self._opening_single_quote_re.sub("&#8216;", text)
+            text = self._closing_single_quote_re.sub("&#8217;", text)
+        if '"' in text:  # guard for perf
+            text = self._opening_double_quote_re.sub("&#8220;", text)
+            text = self._closing_double_quote_re.sub("&#8221;", text)
+        text = text.replace("---", "&#8212;")
+        text = text.replace("--", "&#8211;")
+        text = text.replace("...", "&#8230;")
+        text = text.replace(" . . . ", "&#8230;")
+        text = text.replace(". . .", "&#8230;")
+        # TODO: Temporary hack to fix https://github.com/trentm/python-markdown2/issues/150
+        if "footnotes" in self.md.extras and "footnote-ref" in text:
+            # Quotes in the footnote back ref get converted to "smart" quotes
+            # Change them back here to ensure they work.
+            text = text.replace('class="footnote-ref&#8221;', 'class="footnote-ref"')
+        return text
+    def test(self, text):
+        return "'" in text or '"' in text
+class Strike(Extra):
+    '''
+    Text inside of double tilde is ~~strikethrough~~
+    '''
+    name = 'strike'
+    order = (Stage.ITALIC_AND_BOLD,), ()
+    _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
+    def run(self, text):
+        return self._strike_re.sub(r"<s>\1</s>", text)
+    def test(self, text):
+        return '~~' in text
+class Tables(Extra):
+    '''
+    Tables using the same format as GFM
+    <https://help.github.com/articles/github-flavored-markdown#tables> and
+    PHP-Markdown Extra <https://michelf.ca/projects/php-markdown/extra/#table>.
+    '''
+    name = 'tables'
+    order = (), (Stage.LISTS,)
+    def run(self, text):
+        """Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
+        https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
+        """
+        less_than_tab = self.md.tab_width - 1
+        table_re = re.compile(r'''
+                (?:(?<=\n)|\A\n?)             # leading blank line
+                ^[ ]{0,%d}                      # allowed whitespace
+                (.*[|].*)[ ]*\n                   # $1: header row (at least one pipe)
+                ^[ ]{0,%d}                      # allowed whitespace
+                (                               # $2: underline row
+                    # underline row with leading bar
+                    (?:  \|\ *:?-+:?\ *  )+  \|? \s?[ ]*\n
+                    |
+                    # or, underline row without leading bar
+                    (?:  \ *:?-+:?\ *\|  )+  (?:  \ *:?-+:?\ *  )? \s?[ ]*\n
+                )
+                (                               # $3: data rows
+                    (?:
+                        ^[ ]{0,%d}(?!\ )         # ensure line begins with 0 to less_than_tab spaces
+                        .*\|.*[ ]*\n
+                    )+
+                )
+            ''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
+        return table_re.sub(self.sub, text)
+    def sub(self, match: re.Match) -> str:
+        trim_space_re = '^[ \t\n]+|[ \t\n]+$'
+        trim_bar_re = r'^\||\|$'
+        split_bar_re = r'^\||(?<![\`\\])\|'
+        escape_bar_re = r'\\\|'
+        head, underline, body = match.groups()
+        # Determine aligns for columns.
+        cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
+        align_from_col_idx = {}
+        for col_idx, col in enumerate(cols):
+            if col[0] == ':' and col[-1] == ':':
+                align_from_col_idx[col_idx] = ' style="text-align:center;"'
+            elif col[0] == ':':
+                align_from_col_idx[col_idx] = ' style="text-align:left;"'
+            elif col[-1] == ':':
+                align_from_col_idx[col_idx] = ' style="text-align:right;"'
+        # thead
+        hlines = ['<table%s>' % self.md._html_class_str_from_tag('table'), '<thead%s>' % self.md._html_class_str_from_tag('thead'), '<tr>']
+        cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
+        for col_idx, col in enumerate(cols):
+            hlines.append('  <th%s>%s</th>' % (
+                align_from_col_idx.get(col_idx, ''),
+                self.md._run_span_gamut(col)
+            ))
+        hlines.append('</tr>')
+        hlines.append('</thead>')
+        # tbody
+        hlines.append('<tbody>')
+        for line in body.strip('\n').split('\n'):
+            hlines.append('<tr>')
+            cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
+            for col_idx, col in enumerate(cols):
+                hlines.append('  <td%s>%s</td>' % (
+                    align_from_col_idx.get(col_idx, ''),
+                    self.md._run_span_gamut(col)
+                ))
+            hlines.append('</tr>')
+        hlines.append('</tbody>')
+        hlines.append('</table>')
+        return '\n'.join(hlines) + '\n'
+class TelegramSpoiler(Extra):
+    name = 'tg-spoiler'
+    order = (), (Stage.ITALIC_AND_BOLD,)
+    _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
+    def run(self, text):
+        return self._tg_spoiler_re.sub(r"<tg-spoiler>\1</tg-spoiler>", text)
+    def test(self, text):
+        return '||' in text
+class Underline(Extra):
+    '''
+    Text inside of double dash is --underlined--.
+    '''
+    name = 'underline'
+    order = (Stage.ITALIC_AND_BOLD,), ()
+    _underline_re = re.compile(r"(?<!<!)--(?!>)(?=\S)(.+?)(?<=\S)(?<!<!)--(?!>)", re.S)
+    def run(self, text):
+        return self._underline_re.sub(r"<u>\1</u>", text)
+    def test(self, text):
+        return '--' in text
+class _WavedromExtraOpts(TypedDict, total=False):
+    '''Options for the `Wavedrom` extra'''
+    prefer_embed_svg: bool
+    '''
+    Use the `wavedrom` library to convert diagrams to SVGs and embed them directly.
+    This will only work if the `wavedrom` library has been installed.
+    Defaults to `True`
+    '''
+class Wavedrom(Extra):
+    '''
+    Support for generating Wavedrom digital timing diagrams
+    '''
+    name = 'wavedrom'
+    order = (Stage.CODE_BLOCKS, FencedCodeBlocks), ()
+    options: _WavedromExtraOpts
+    def test(self, text):
+        match = FencedCodeBlocks.fenced_code_block_re.search(text)
+        return match is None or match.group(2) == 'wavedrom'
+    def sub(self, match: re.Match) -> str:
+        # dedent the block for processing
+        lead_indent, waves = self.md._uniform_outdent(match.group(3))
+        # default tags to wrap the wavedrom block in
+        open_tag, close_tag = '<script type="WaveDrom">\n', '</script>'
+        # check if the user would prefer to have the SVG embedded directly
+        embed_svg = self.options.get('prefer_embed_svg', True)
+        if embed_svg:
+            try:
+                import wavedrom
+                waves = wavedrom.render(waves).tostring()
+                open_tag, close_tag = '<div>', '\n</div>'
+            except ImportError:
+                pass
+        # hash SVG to prevent <> chars being messed with
+        self.md._escape_table[waves] = _hash_text(waves)
+        return self.md._uniform_indent(
+            '\n%s%s%s\n' % (open_tag, self.md._escape_table[waves], close_tag),
+            lead_indent, include_empty_lines=True
+        )
+    def run(self, text):
+        return FencedCodeBlocks.fenced_code_block_re.sub(self.sub, text)
+class WikiTables(Extra):
+    '''
+    Google Code Wiki-style tables. See
+    <http://code.google.com/p/support/wiki/WikiSyntax#Tables>.
+    '''
+    name = 'wiki-tables'
+    order = (Tables,), ()
+    def run(self, text):
+        less_than_tab = self.md.tab_width - 1
+        wiki_table_re = re.compile(r'''
+            (?:(?<=\n\n)|\A\n?)            # leading blank line
+            ^([ ]{0,%d})\|\|.+?\|\|[ ]*\n  # first line
+            (^\1\|\|.+?\|\|\n)*        # any number of subsequent lines
+            ''' % less_than_tab, re.M | re.X)
+        return wiki_table_re.sub(self.sub, text)
+    def sub(self, match: re.Match) -> str:
+        ttext = match.group(0).strip()
+        rows = []
+        for line in ttext.splitlines(0):
+            line = line.strip()[2:-2].strip()
+            row = [c.strip() for c in re.split(r'(?<!\\)\|\|', line)]
+            rows.append(row)
+        hlines = []
+        def add_hline(line, indents=0):
+            hlines.append((self.md.tab * indents) + line)
+        def format_cell(text):
+            return self.md._run_span_gamut(re.sub(r"^\s*~", "", cell).strip(" "))
+        add_hline('<table%s>' % self.md._html_class_str_from_tag('table'))
+        # Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
+        if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
+            add_hline('<thead%s>' % self.md._html_class_str_from_tag('thead'), 1)
+            add_hline('<tr>', 2)
+            for cell in rows[0]:
+                add_hline("<th>{}</th>".format(format_cell(cell)), 3)
+            add_hline('</tr>', 2)
+            add_hline('</thead>', 1)
+            # Only one header row allowed.
+            rows = rows[1:]
+        # If no more rows, don't create a tbody.
+        if rows:
+            add_hline('<tbody>', 1)
+            for row in rows:
+                add_hline('<tr>', 2)
+                for cell in row:
+                    add_hline('<td>{}</td>'.format(format_cell(cell)), 3)
+                add_hline('</tr>', 2)
+            add_hline('</tbody>', 1)
+        add_hline('</table>')
+        return '\n'.join(hlines) + '\n'
+    def test(self, text):
+        return '||' in text
+# Register extras
+Admonitions.register()
+Alerts.register()
+Breaks.register()
+CodeFriendly.register()
+FencedCodeBlocks.register()
+Latex.register()
+LinkPatterns.register()
+MarkdownInHTML.register()
+MiddleWordEm.register()
+Mermaid.register()
+Numbering.register()
+PyShell.register()
+SmartyPants.register()
+Strike.register()
+Tables.register()
+TelegramSpoiler.register()
+Underline.register()
+Wavedrom.register()
+WikiTables.register()
+# ----------------------------------------------------------
+# ---- internal support functions
+def calculate_toc_html(toc: Union[List[Tuple[int, str, str]], None]) -> Optional[str]:
+    """Return the HTML for the current TOC.
+    This expects the `_toc` attribute to have been set on this instance.
+    """
+    if toc is None:
+        return None
+    def indent():
+        return '  ' * (len(h_stack) - 1)
+    lines = []
+    h_stack = [0]   # stack of header-level numbers
+    for level, id, name in toc:
+        if level > h_stack[-1]:
+            lines.append("%s<ul>" % indent())
+            h_stack.append(level)
+        elif level == h_stack[-1]:
+            lines[-1] += "</li>"
+        else:
+            while level < h_stack[-1]:
+                h_stack.pop()
+                if not lines[-1].endswith("</li>"):
+                    lines[-1] += "</li>"
+                lines.append("%s</ul></li>" % indent())
+        lines.append('%s<li><a href="#%s">%s</a>' % (
+            indent(), id, name))
+    while len(h_stack) > 1:
+        h_stack.pop()
+        if not lines[-1].endswith("</li>"):
+            lines[-1] += "</li>"
+        lines.append("%s</ul>" % indent())
+    return '\n'.join(lines) + '\n'
+class UnicodeWithAttrs(str):
+    """A subclass of unicode used for the return value of conversion to
+    possibly attach some attributes. E.g. the "toc_html" attribute when
+    the "toc" extra is used.
+    """
+    metadata: Optional[Dict[str, str]] = None
+    toc_html: Optional[str] = None
+## {{{ http://code.activestate.com/recipes/577257/ (r1)
+_slugify_strip_re = re.compile(r'[^\w\s-]')
+_slugify_hyphenate_re = re.compile(r'[-\s]+')
+def _slugify(value: str) -> str:
     """
     Normalizes string, converts to lowercase, removes non-alpha characters,
     and converts spaces to hyphens.
@@ -2735,15 +3620,14 @@ def _slugify(value):
     From Django's "django/template/defaultfilters.py".
     """
     import unicodedata
-    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode()
+    value = unicodedata.normalize('NFKD', value).encode('utf-8', 'ignore').decode()
     value = _slugify_strip_re.sub('', value).strip().lower()
     return _slugify_hyphenate_re.sub('-', value)
 ## end of http://code.activestate.com/recipes/577257/ }}}
 # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
-def _curry(*args, **kwargs):
-    function, args = args[0], args[1:]
+def _curry(function: Callable, *args, **kwargs) -> Callable:
     def result(*rest, **kwrest):
         combined = kwargs.copy()
         combined.update(kwrest)
@@ -2752,7 +3636,7 @@ def _curry(*args, **kwargs):
 # Recipe: regex_from_encoded_pattern (1.0)
-def _regex_from_encoded_pattern(s):
+def _regex_from_encoded_pattern(s: str) -> re.Pattern:
     """'foo'    -> re.compile(re.escape('foo'))
        '/foo/'  -> re.compile('foo')
        '/foo/i' -> re.compile('foo', re.I)
@@ -2782,7 +3666,7 @@ def _regex_from_encoded_pattern(s):
 # Recipe: dedent (0.1.2)
-def _dedentlines(lines, tabsize=8, skip_first_line=False):
+def _dedentlines(lines: List[str], tabsize: int = 8, skip_first_line: bool = False) -> List[str]:
     """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
         "lines" is a list of lines to dedent.
@@ -2800,7 +3684,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
               % (tabsize, skip_first_line))
     margin = None
     for i, line in enumerate(lines):
-        if i == 0 and skip_first_line: continue
+        if i == 0 and skip_first_line:
+            continue
         indent = 0
         for ch in line:
             if ch == ' ':
@@ -2813,16 +3698,19 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
                 break
         else:
             continue  # skip all-whitespace lines
-        if DEBUG: print("dedent: indent=%d: %r" % (indent, line))
+        if DEBUG:
+            print("dedent: indent=%d: %r" % (indent, line))
         if margin is None:
             margin = indent
         else:
             margin = min(margin, indent)
-    if DEBUG: print("dedent: margin=%r" % margin)
+    if DEBUG:
+        print("dedent: margin=%r" % margin)
     if margin is not None and margin > 0:
         for i, line in enumerate(lines):
-            if i == 0 and skip_first_line: continue
+            if i == 0 and skip_first_line:
+                continue
             removed = 0
             for j, ch in enumerate(line):
                 if ch == ' ':
@@ -2830,7 +3718,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
                 elif ch == '\t':
                     removed += tabsize - (removed % tabsize)
                 elif ch in '\r\n':
-                    if DEBUG: print("dedent: %r: EOL -> strip up to EOL" % line)
+                    if DEBUG:
+                        print("dedent: %r: EOL -> strip up to EOL" % line)
                     lines[i] = lines[i][j:]
                     break
                 else:
@@ -2852,7 +3741,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
     return lines
-def _dedent(text, tabsize=8, skip_first_line=False):
+def _dedent(text: str, tabsize: int = 8, skip_first_line: bool = False) -> str:
     """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
         "text" is the text to dedent.
@@ -2863,7 +3752,7 @@ def _dedent(text, tabsize=8, skip_first_line=False):
     textwrap.dedent(s), but don't expand tabs to spaces
     """
-    lines = text.splitlines(1)
+    lines = text.splitlines(True)
     _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line)
     return ''.join(lines)
@@ -2895,7 +3784,7 @@ class _memoized(object):
         return self.func.__doc__
-def _xml_oneliner_re_from_tab_width(tab_width):
+def _xml_oneliner_re_from_tab_width(tab_width: int) -> re.Pattern:
     """Standalone XML processing instruction regex."""
     return re.compile(r"""
         (?:
@@ -2917,7 +3806,7 @@ def _xml_oneliner_re_from_tab_width(tab_width):
 _xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
-def _hr_tag_re_from_tab_width(tab_width):
+def _hr_tag_re_from_tab_width(tab_width: int) -> re.Pattern:
     return re.compile(r"""
         (?:
             (?<=\n\n)       # Starting after a blank line
@@ -2937,7 +3826,7 @@ def _hr_tag_re_from_tab_width(tab_width):
 _hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
-def _xml_escape_attr(attr, skip_single_quote=True):
+def _xml_escape_attr(attr: str, skip_single_quote: bool = True) -> str:
     """Escape the given string for use in an HTML/XML tag attribute.
     By default this doesn't bother with escaping `'` to `&#39;`, presuming that
@@ -2954,7 +3843,7 @@ def _xml_escape_attr(attr, skip_single_quote=True):
     return escaped
-def _xml_encode_email_char_at_random(ch):
+def _xml_encode_email_char_at_random(ch: str) -> str:
     r = random()
     # Roughly 10% raw, 45% hex, 45% dec.
     # '@' *must* be encoded. I [John Gruber] insist.
@@ -2968,14 +3857,25 @@ def _xml_encode_email_char_at_random(ch):
         return '&#%s;' % ord(ch)
-def _html_escape_url(attr, safe_mode=False):
-    """Replace special characters that are potentially malicious in url string."""
+def _html_escape_url(
+    attr: str,
+    safe_mode: Union[_safe_mode, bool, None] = False,
+    charset: Optional[str] = None
+):
+    """
+    Replace special characters that are potentially malicious in url string.
+    Args:
+        charset: don't escape characters from this charset. Currently the only
+            exception is for '+' when charset=='base64'
+    """
     escaped = (attr
         .replace('"', '&quot;')
         .replace('<', '&lt;')
         .replace('>', '&gt;'))
     if safe_mode:
-        escaped = escaped.replace('+', ' ')
+        if charset != 'base64':
+            escaped = escaped.replace('+', ' ')
         escaped = escaped.replace("'", "&#39;")
     return escaped
@@ -3065,8 +3965,10 @@ def main(argv=None):
         f = open(opts.link_patterns_file)
         try:
             for i, line in enumerate(f.readlines()):
-                if not line.strip(): continue
-                if line.lstrip().startswith("#"): continue
+                if not line.strip():
+                    continue
+                if line.lstrip().startswith("#"):
+                    continue
                 try:
                     pat, href = line.rstrip().rsplit(None, 1)
                 except ValueError:

pdoc 14.5.1__py3-none-any.whl → 14.6.0__py3-none-any.whl

pdoc 14.5.1py3-none-any.whl → 14.6.0py3-none-any.whl