pdoc 13.0.1__py3-none-any.whl → 13.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdoc/__init__.py +9 -1
- pdoc/__main__.py +7 -0
- pdoc/_compat.py +5 -2
- pdoc/doc.py +33 -4
- pdoc/doc_ast.py +10 -6
- pdoc/markdown2/__init__.py +371 -253
- pdoc/render.py +3 -0
- pdoc/render_helpers.py +1 -0
- pdoc/templates/default/frame.html.jinja2 +1 -0
- pdoc/templates/mermaid.html.jinja2 +18 -0
- {pdoc-13.0.1.dist-info → pdoc-13.1.1.dist-info}/METADATA +1 -1
- {pdoc-13.0.1.dist-info → pdoc-13.1.1.dist-info}/RECORD +16 -15
- {pdoc-13.0.1.dist-info → pdoc-13.1.1.dist-info}/WHEEL +1 -1
- {pdoc-13.0.1.dist-info → pdoc-13.1.1.dist-info}/LICENSE +0 -0
- {pdoc-13.0.1.dist-info → pdoc-13.1.1.dist-info}/entry_points.txt +0 -0
- {pdoc-13.0.1.dist-info → pdoc-13.1.1.dist-info}/top_level.txt +0 -0
pdoc/markdown2/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# fmt: off
|
2
2
|
# flake8: noqa
|
3
3
|
# type: ignore
|
4
|
-
# Taken from here: https://github.com/trentm/python-markdown2/blob/
|
4
|
+
# Taken from here: https://github.com/trentm/python-markdown2/blob/bce3f18ed86a19b418c8114a712bb6fee790c4c2/lib/markdown2.py
|
5
5
|
|
6
6
|
#!/usr/bin/env python
|
7
7
|
# Copyright (c) 2012 Trent Mick.
|
@@ -61,8 +61,8 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
|
|
61
61
|
highlighting when using fenced-code-blocks and highlightjs.
|
62
62
|
* html-classes: Takes a dict mapping html tag names (lowercase) to a
|
63
63
|
string to use for a "class" tag attribute. Currently only supports "img",
|
64
|
-
"table", "pre" and "
|
65
|
-
tags.
|
64
|
+
"table", "thead", "pre", "code", "ul" and "ol" tags. Add an issue if you require
|
65
|
+
this for other tags.
|
66
66
|
* link-patterns: Auto-link given regex patterns in text (e.g. bug number
|
67
67
|
references, revision number references).
|
68
68
|
* markdown-in-html: Allow the use of `markdown="1"` in a block HTML tag to
|
@@ -95,6 +95,7 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
|
|
95
95
|
on Extras.
|
96
96
|
* wiki-tables: Google Code Wiki-style tables. See
|
97
97
|
<http://code.google.com/p/support/wiki/WikiSyntax#Tables>.
|
98
|
+
* wavedrom: Support for generating Wavedrom digital timing diagrams
|
98
99
|
* xml: Passes one-liner processing instructions and namespaced XML tags.
|
99
100
|
"""
|
100
101
|
|
@@ -103,18 +104,18 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
|
|
103
104
|
# not yet sure if there implications with this. Compare 'pydoc sre'
|
104
105
|
# and 'perldoc perlre'.
|
105
106
|
|
106
|
-
__version_info__ = (2, 4,
|
107
|
+
__version_info__ = (2, 4, 9)
|
107
108
|
__version__ = '.'.join(map(str, __version_info__))
|
108
109
|
__author__ = "Trent Mick"
|
109
110
|
|
110
|
-
import
|
111
|
-
import re
|
112
|
-
import logging
|
113
|
-
from hashlib import sha256
|
114
|
-
import optparse
|
115
|
-
from random import random, randint
|
111
|
+
import argparse
|
116
112
|
import codecs
|
113
|
+
import logging
|
114
|
+
import re
|
115
|
+
import sys
|
117
116
|
from collections import defaultdict
|
117
|
+
from hashlib import sha256
|
118
|
+
from random import randint, random
|
118
119
|
|
119
120
|
# ---- globals
|
120
121
|
|
@@ -123,18 +124,16 @@ log = logging.getLogger("markdown")
|
|
123
124
|
|
124
125
|
DEFAULT_TAB_WIDTH = 4
|
125
126
|
|
126
|
-
SECRET_SALT = bytes(randint(0, 1000000))
|
127
|
-
|
128
127
|
|
128
|
+
SECRET_SALT = bytes(randint(0, 1000000))
|
129
129
|
# MD5 function was previously used for this; the "md5" prefix was kept for
|
130
130
|
# backwards compatibility.
|
131
131
|
def _hash_text(s):
|
132
132
|
return 'md5-' + sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:]
|
133
133
|
|
134
|
-
|
135
134
|
# Table of hash values for escaped characters:
|
136
135
|
g_escape_table = dict([(ch, _hash_text(ch))
|
137
|
-
|
136
|
+
for ch in '\\`*_{}[]()>#+-.!'])
|
138
137
|
|
139
138
|
# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
|
140
139
|
# http://bumppo.net/projects/amputator/
|
@@ -232,7 +231,7 @@ class Markdown(object):
|
|
232
231
|
|
233
232
|
if "toc" in self.extras:
|
234
233
|
if "header-ids" not in self.extras:
|
235
|
-
self.extras["header-ids"] = None
|
234
|
+
self.extras["header-ids"] = None # "toc" implies "header-ids"
|
236
235
|
|
237
236
|
if self.extras["toc"] is None:
|
238
237
|
self._toc_depth = 6
|
@@ -293,8 +292,8 @@ class Markdown(object):
|
|
293
292
|
[^#'"] # We don't want to match href values that start with # (like footnotes)
|
294
293
|
)
|
295
294
|
""",
|
296
|
-
|
297
|
-
|
295
|
+
re.IGNORECASE | re.VERBOSE
|
296
|
+
)
|
298
297
|
|
299
298
|
def convert(self, text):
|
300
299
|
"""Convert the given text."""
|
@@ -354,6 +353,9 @@ class Markdown(object):
|
|
354
353
|
|
355
354
|
text = self.preprocess(text)
|
356
355
|
|
356
|
+
if 'wavedrom' in self.extras:
|
357
|
+
text = self._do_wavedrom_blocks(text)
|
358
|
+
|
357
359
|
if "fenced-code-blocks" in self.extras and not self.safe_mode:
|
358
360
|
text = self._do_fenced_code_blocks(text)
|
359
361
|
|
@@ -452,18 +454,18 @@ class Markdown(object):
|
|
452
454
|
#
|
453
455
|
# # header
|
454
456
|
_meta_data_pattern = re.compile(r'''
|
455
|
-
^(
|
457
|
+
^{0}( # optional opening fence
|
456
458
|
(?:
|
457
|
-
|
459
|
+
{1}:(?:\n+[ \t]+.*)+ # indented lists
|
458
460
|
)|(?:
|
459
|
-
(?:
|
460
|
-
(?=\n
|
461
|
+
(?:{1}:\s+>(?:\n\s+.*)+?) # multiline long descriptions
|
462
|
+
(?=\n{1}:\s*.*\n|\s*\Z) # match up until the start of the next key:value definition or the end of the input text
|
461
463
|
)|(?:
|
462
|
-
|
464
|
+
{1}:(?! >).*\n? # simple key:value pair, leading spaces allowed
|
463
465
|
)
|
464
|
-
)
|
465
|
-
''', re.MULTILINE | re.VERBOSE
|
466
|
-
|
466
|
+
){0} # optional closing fence
|
467
|
+
'''.format(r'(?:---[\ \t]*\n)?', r'[\S \t]*\w[\S \t]*\s*'), re.MULTILINE | re.VERBOSE
|
468
|
+
)
|
467
469
|
|
468
470
|
_key_val_list_pat = re.compile(
|
469
471
|
r"^-(?:[ \t]*([^\n]*)(?:[ \t]*[:-][ \t]*(\S+))?)(?:\n((?:[ \t]+[^\n]+\n?)+))?",
|
@@ -546,8 +548,7 @@ class Markdown(object):
|
|
546
548
|
|
547
549
|
return tail
|
548
550
|
|
549
|
-
_emacs_oneliner_vars_pat = re.compile(r"((?:<!--)?\s*-\*-)\s*(?:(\S[^\r\n]*?)([\r\n]\s*)?)?(-\*-\s*(?:-->)?)",
|
550
|
-
re.UNICODE)
|
551
|
+
_emacs_oneliner_vars_pat = re.compile(r"((?:<!--)?\s*-\*-)\s*(?:(\S[^\r\n]*?)([\r\n]\s*)?)?(-\*-\s*(?:-->)?)", re.UNICODE)
|
551
552
|
# This regular expression is intended to match blocks like this:
|
552
553
|
# PREFIX Local Variables: SUFFIX
|
553
554
|
# PREFIX mode: Tcl SUFFIX
|
@@ -629,7 +630,7 @@ class Markdown(object):
|
|
629
630
|
return {}
|
630
631
|
# Don't validate suffix on last line. Emacs doesn't care,
|
631
632
|
# neither should we.
|
632
|
-
if i != len(lines)
|
633
|
+
if i != len(lines)-1 and not line.endswith(suffix):
|
633
634
|
log.debug("emacs variables error: line '%s' "
|
634
635
|
"does not use proper suffix '%s'"
|
635
636
|
% (line, suffix))
|
@@ -668,7 +669,7 @@ class Markdown(object):
|
|
668
669
|
# Unquote values.
|
669
670
|
for var, val in list(emacs_vars.items()):
|
670
671
|
if len(val) > 1 and (val.startswith('"') and val.endswith('"')
|
671
|
-
|
672
|
+
or val.startswith('"') and val.endswith('"')):
|
672
673
|
emacs_vars[var] = val[1:-1]
|
673
674
|
|
674
675
|
return emacs_vars
|
@@ -724,7 +725,7 @@ class Markdown(object):
|
|
724
725
|
(?=\n+|\Z) # followed by a newline or end of document
|
725
726
|
)
|
726
727
|
""" % _block_tags_a,
|
727
|
-
|
728
|
+
re.X | re.M)
|
728
729
|
|
729
730
|
_block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
|
730
731
|
_block_tags_b += _html5tags
|
@@ -740,13 +741,16 @@ class Markdown(object):
|
|
740
741
|
(?=\n+|\Z) # followed by a newline or end of document
|
741
742
|
)
|
742
743
|
""" % _block_tags_b,
|
743
|
-
|
744
|
+
re.X | re.M)
|
744
745
|
|
745
746
|
_html_markdown_attr_re = re.compile(
|
746
747
|
r'''\s+markdown=("1"|'1')''')
|
747
|
-
|
748
748
|
def _hash_html_block_sub(self, match, raw=False):
|
749
|
-
|
749
|
+
if isinstance(match, str):
|
750
|
+
html = match
|
751
|
+
else:
|
752
|
+
html = match.group(1)
|
753
|
+
|
750
754
|
if raw and self.safe_mode:
|
751
755
|
html = self._sanitize_html(html)
|
752
756
|
elif 'markdown-in-html' in self.extras and 'markdown=' in html:
|
@@ -762,8 +766,8 @@ class Markdown(object):
|
|
762
766
|
l_key = _hash_text(last_line)
|
763
767
|
self.html_blocks[l_key] = last_line
|
764
768
|
return ''.join(["\n\n", f_key,
|
765
|
-
|
766
|
-
|
769
|
+
"\n\n", middle, "\n\n",
|
770
|
+
l_key, "\n\n"])
|
767
771
|
key = _hash_text(html)
|
768
772
|
self.html_blocks[key] = html
|
769
773
|
return "\n\n" + key + "\n\n"
|
@@ -797,7 +801,7 @@ class Markdown(object):
|
|
797
801
|
# the inner nested divs must be indented.
|
798
802
|
# We need to do this before the next, more liberal match, because the next
|
799
803
|
# match will start at the first `<div>` and stop at the first `</div>`.
|
800
|
-
text = self.
|
804
|
+
text = self._strict_tag_block_sub(text, self._block_tags_a, hash_html_block_sub)
|
801
805
|
|
802
806
|
# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
|
803
807
|
text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
|
@@ -840,7 +844,7 @@ class Markdown(object):
|
|
840
844
|
pass
|
841
845
|
elif start_idx == 1 and text[0] == '\n':
|
842
846
|
start_idx = 0 # to match minute detail of Markdown.pl regex
|
843
|
-
elif text[start_idx
|
847
|
+
elif text[start_idx-2:start_idx] == '\n\n':
|
844
848
|
pass
|
845
849
|
else:
|
846
850
|
break
|
@@ -852,7 +856,7 @@ class Markdown(object):
|
|
852
856
|
break
|
853
857
|
end_idx += 1
|
854
858
|
# - Must be following by 2 newlines or hit end of text.
|
855
|
-
if text[end_idx:end_idx
|
859
|
+
if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'):
|
856
860
|
continue
|
857
861
|
|
858
862
|
# Escape and hash (must match `_hash_html_block_sub`).
|
@@ -876,6 +880,39 @@ class Markdown(object):
|
|
876
880
|
|
877
881
|
return text
|
878
882
|
|
883
|
+
def _strict_tag_block_sub(self, text, html_tags_re, callback):
|
884
|
+
tag_count = 0
|
885
|
+
current_tag = html_tags_re
|
886
|
+
block = ''
|
887
|
+
result = ''
|
888
|
+
|
889
|
+
for chunk in text.splitlines(True):
|
890
|
+
is_markup = re.match(r'^(?:</code>(?=</pre>))?(</?(%s)\b>?)' % current_tag, chunk)
|
891
|
+
block += chunk
|
892
|
+
|
893
|
+
if is_markup:
|
894
|
+
if chunk.startswith('</'):
|
895
|
+
tag_count -= 1
|
896
|
+
else:
|
897
|
+
# if close tag is in same line
|
898
|
+
if '</%s>' % is_markup.group(2) in chunk[is_markup.end():]:
|
899
|
+
# we must ignore these
|
900
|
+
is_markup = None
|
901
|
+
else:
|
902
|
+
tag_count += 1
|
903
|
+
current_tag = is_markup.group(2)
|
904
|
+
|
905
|
+
if tag_count == 0:
|
906
|
+
if is_markup:
|
907
|
+
block = callback(block.rstrip('\n')) # remove trailing newline
|
908
|
+
current_tag = html_tags_re
|
909
|
+
result += block
|
910
|
+
block = ''
|
911
|
+
|
912
|
+
result += block
|
913
|
+
|
914
|
+
return result
|
915
|
+
|
879
916
|
def _strip_link_definitions(self, text):
|
880
917
|
# Strips link definitions from text, stores the URLs and titles in
|
881
918
|
# hash references.
|
@@ -905,7 +942,7 @@ class Markdown(object):
|
|
905
942
|
|
906
943
|
def _extract_link_def_sub(self, match):
|
907
944
|
id, url, title = match.groups()
|
908
|
-
key = id.lower()
|
945
|
+
key = id.lower() # Link IDs are case-insensitive
|
909
946
|
self.urls[key] = self._encode_amps_and_angles(url)
|
910
947
|
if title:
|
911
948
|
self.titles[key] = title
|
@@ -1008,7 +1045,7 @@ class Markdown(object):
|
|
1008
1045
|
# Lookahead for non-space at line-start, or end of doc.
|
1009
1046
|
(?:(?=^[ ]{0,%d}\S)|\Z)
|
1010
1047
|
''' % (less_than_tab, self.tab_width, self.tab_width),
|
1011
|
-
|
1048
|
+
re.X | re.M)
|
1012
1049
|
return footnote_def_re.sub(self._extract_footnote_def_sub, text)
|
1013
1050
|
|
1014
1051
|
_hr_re = re.compile(r'^[ ]{0,3}([-_*])[ ]{0,2}(\1[ ]{0,2}){2,}$', re.M)
|
@@ -1020,6 +1057,9 @@ class Markdown(object):
|
|
1020
1057
|
if 'admonitions' in self.extras:
|
1021
1058
|
text = self._do_admonitions(text)
|
1022
1059
|
|
1060
|
+
if 'wavedrom' in self.extras:
|
1061
|
+
text = self._do_wavedrom_blocks(text)
|
1062
|
+
|
1023
1063
|
if "fenced-code-blocks" in self.extras:
|
1024
1064
|
text = self._do_fenced_code_blocks(text)
|
1025
1065
|
|
@@ -1030,7 +1070,7 @@ class Markdown(object):
|
|
1030
1070
|
# you wish, you may use spaces between the hyphens or asterisks."
|
1031
1071
|
# Markdown.pl 1.0.1's hr regexes limit the number of spaces between the
|
1032
1072
|
# hr chars to one or two. We'll reproduce that limit here.
|
1033
|
-
hr = "\n<hr"
|
1073
|
+
hr = "\n<hr"+self.empty_element_suffix+"\n"
|
1034
1074
|
text = re.sub(self._hr_re, hr, text)
|
1035
1075
|
|
1036
1076
|
text = self._do_lists(text)
|
@@ -1064,7 +1104,7 @@ class Markdown(object):
|
|
1064
1104
|
_dedentlines(lines)
|
1065
1105
|
indent = ' ' * self.tab_width
|
1066
1106
|
s = ('\n' # separate from possible cuddled paragraph
|
1067
|
-
+ indent + ('\n'
|
1107
|
+
+ indent + ('\n'+indent).join(lines)
|
1068
1108
|
+ '\n')
|
1069
1109
|
return s
|
1070
1110
|
|
@@ -1093,8 +1133,7 @@ class Markdown(object):
|
|
1093
1133
|
head, underline, body = match.groups()
|
1094
1134
|
|
1095
1135
|
# Determine aligns for columns.
|
1096
|
-
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in
|
1097
|
-
re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
|
1136
|
+
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
|
1098
1137
|
align_from_col_idx = {}
|
1099
1138
|
for col_idx, col in enumerate(cols):
|
1100
1139
|
if col[0] == ':' and col[-1] == ':':
|
@@ -1105,9 +1144,8 @@ class Markdown(object):
|
|
1105
1144
|
align_from_col_idx[col_idx] = ' style="text-align:right;"'
|
1106
1145
|
|
1107
1146
|
# thead
|
1108
|
-
hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead>', '<tr>']
|
1109
|
-
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in
|
1110
|
-
re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
|
1147
|
+
hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead%s>' % self._html_class_str_from_tag('thead'), '<tr>']
|
1148
|
+
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
|
1111
1149
|
for col_idx, col in enumerate(cols):
|
1112
1150
|
hlines.append(' <th%s>%s</th>' % (
|
1113
1151
|
align_from_col_idx.get(col_idx, ''),
|
@@ -1120,8 +1158,7 @@ class Markdown(object):
|
|
1120
1158
|
hlines.append('<tbody>')
|
1121
1159
|
for line in body.strip('\n').split('\n'):
|
1122
1160
|
hlines.append('<tr>')
|
1123
|
-
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in
|
1124
|
-
re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
|
1161
|
+
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
|
1125
1162
|
for col_idx, col in enumerate(cols):
|
1126
1163
|
hlines.append(' <td%s>%s</td>' % (
|
1127
1164
|
align_from_col_idx.get(col_idx, ''),
|
@@ -1183,7 +1220,7 @@ class Markdown(object):
|
|
1183
1220
|
add_hline('<table%s>' % self._html_class_str_from_tag('table'))
|
1184
1221
|
# Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
|
1185
1222
|
if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
|
1186
|
-
add_hline('<thead>', 1)
|
1223
|
+
add_hline('<thead%s>' % self._html_class_str_from_tag('thead'), 1)
|
1187
1224
|
add_hline('<tr>', 2)
|
1188
1225
|
for cell in rows[0]:
|
1189
1226
|
add_hline("<th>{}</th>".format(format_cell(cell)), 3)
|
@@ -1245,6 +1282,9 @@ class Markdown(object):
|
|
1245
1282
|
|
1246
1283
|
text = self._do_italics_and_bold(text)
|
1247
1284
|
|
1285
|
+
if "tg-spoiler" in self.extras:
|
1286
|
+
text = self._do_tg_spoiler(text)
|
1287
|
+
|
1248
1288
|
if "smarty-pants" in self.extras:
|
1249
1289
|
text = self._do_smart_punctuation(text)
|
1250
1290
|
|
@@ -1259,18 +1299,21 @@ class Markdown(object):
|
|
1259
1299
|
# "Sorta" because auto-links are identified as "tag" tokens.
|
1260
1300
|
_sorta_html_tokenize_re = re.compile(r"""
|
1261
1301
|
(
|
1262
|
-
#
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1302
|
+
\\* # escapes
|
1303
|
+
(?:
|
1304
|
+
# tag
|
1305
|
+
</?
|
1306
|
+
(?:\w+) # tag name
|
1307
|
+
(?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
|
1308
|
+
\s*/?>
|
1309
|
+
|
|
1310
|
+
# auto-link (e.g., <http://www.activestate.com/>)
|
1311
|
+
<[\w~:/?#\[\]@!$&'\(\)*+,;%=\.\\-]+>
|
1312
|
+
|
|
1313
|
+
<!--.*?--> # comment
|
1314
|
+
|
|
1315
|
+
<\?.*?\?> # processing instruction
|
1316
|
+
)
|
1274
1317
|
)
|
1275
1318
|
""", re.X)
|
1276
1319
|
|
@@ -1281,20 +1324,27 @@ class Markdown(object):
|
|
1281
1324
|
# it isn't susceptible to unmatched '<' and '>' in HTML tags).
|
1282
1325
|
# Note, however, that '>' is not allowed in an auto-link URL
|
1283
1326
|
# here.
|
1327
|
+
lead_escape_re = re.compile(r'^((?:\\\\)*(?!\\))')
|
1284
1328
|
escaped = []
|
1285
1329
|
is_html_markup = False
|
1286
1330
|
for token in self._sorta_html_tokenize_re.split(text):
|
1287
|
-
|
1331
|
+
# check token is preceded by 0 or more PAIRS of escapes, because escape pairs
|
1332
|
+
# escape themselves and don't affect the token
|
1333
|
+
if is_html_markup and lead_escape_re.match(token):
|
1288
1334
|
# Within tags/HTML-comments/auto-links, encode * and _
|
1289
1335
|
# so they don't conflict with their use in Markdown for
|
1290
1336
|
# italics and strong. We're replacing each such
|
1291
1337
|
# character with its corresponding MD5 checksum value;
|
1292
1338
|
# this is likely overkill, but it should prevent us from
|
1293
1339
|
# colliding with the escape values by accident.
|
1294
|
-
|
1295
|
-
|
1340
|
+
escape_seq, token = lead_escape_re.split(token)[1:] or ('', token)
|
1341
|
+
escaped.append(
|
1342
|
+
escape_seq.replace('\\\\', self._escape_table['\\'])
|
1343
|
+
+ token.replace('*', self._escape_table['*'])
|
1344
|
+
.replace('_', self._escape_table['_'])
|
1345
|
+
)
|
1296
1346
|
else:
|
1297
|
-
escaped.append(self._encode_backslash_escapes(token))
|
1347
|
+
escaped.append(self._encode_backslash_escapes(token.replace('\\<', '<')))
|
1298
1348
|
is_html_markup = not is_html_markup
|
1299
1349
|
return ''.join(escaped)
|
1300
1350
|
|
@@ -1404,13 +1454,13 @@ class Markdown(object):
|
|
1404
1454
|
def _extract_url_and_title(self, text, start):
|
1405
1455
|
"""Extracts the url and (optional) title from the tail of a link"""
|
1406
1456
|
# text[start] equals the opening parenthesis
|
1407
|
-
idx = self._find_non_whitespace(text, start
|
1457
|
+
idx = self._find_non_whitespace(text, start+1)
|
1408
1458
|
if idx == len(text):
|
1409
1459
|
return None, None, None
|
1410
1460
|
end_idx = idx
|
1411
1461
|
has_anglebrackets = text[idx] == "<"
|
1412
1462
|
if has_anglebrackets:
|
1413
|
-
end_idx = self._find_balanced(text, end_idx
|
1463
|
+
end_idx = self._find_balanced(text, end_idx+1, "<", ">")
|
1414
1464
|
end_idx = self._find_balanced(text, end_idx, "(", ")")
|
1415
1465
|
match = self._inline_link_title.search(text, idx, end_idx)
|
1416
1466
|
if not match:
|
@@ -1420,8 +1470,18 @@ class Markdown(object):
|
|
1420
1470
|
url = self._strip_anglebrackets.sub(r'\1', url)
|
1421
1471
|
return url, title, end_idx
|
1422
1472
|
|
1423
|
-
|
1473
|
+
def _protect_url(self, url):
|
1474
|
+
'''
|
1475
|
+
Function that passes a URL through `_html_escape_url` to remove any nasty characters,
|
1476
|
+
and then hashes the now "safe" URL to prevent other safety mechanisms from tampering
|
1477
|
+
with it (eg: escaping "&" in URL parameters)
|
1478
|
+
'''
|
1479
|
+
url = _html_escape_url(url, safe_mode=self.safe_mode)
|
1480
|
+
key = _hash_text(url)
|
1481
|
+
self._escape_table[url] = key
|
1482
|
+
return key
|
1424
1483
|
|
1484
|
+
_safe_protocols = re.compile(r'(https?|ftp):', re.I)
|
1425
1485
|
def _do_links(self, text):
|
1426
1486
|
"""Turn Markdown link shortcuts into XHTML <a> and <img> tags.
|
1427
1487
|
|
@@ -1467,8 +1527,8 @@ class Markdown(object):
|
|
1467
1527
|
# matching brackets in img alt text -- we'll differ in that
|
1468
1528
|
# regard.
|
1469
1529
|
bracket_depth = 0
|
1470
|
-
for p in range(start_idx
|
1471
|
-
|
1530
|
+
for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
|
1531
|
+
text_length)):
|
1472
1532
|
ch = text[p]
|
1473
1533
|
if ch == ']':
|
1474
1534
|
bracket_depth -= 1
|
@@ -1481,7 +1541,7 @@ class Markdown(object):
|
|
1481
1541
|
# This isn't markup.
|
1482
1542
|
curr_pos = start_idx + 1
|
1483
1543
|
continue
|
1484
|
-
link_text = text[start_idx
|
1544
|
+
link_text = text[start_idx+1:p]
|
1485
1545
|
|
1486
1546
|
# Fix for issue 341 - Injecting XSS into link text
|
1487
1547
|
if self.safe_mode:
|
@@ -1496,10 +1556,10 @@ class Markdown(object):
|
|
1496
1556
|
result = '<sup class="footnote-ref" id="fnref-%s">' \
|
1497
1557
|
'<a href="#fn-%s">%s</a></sup>' \
|
1498
1558
|
% (normed_id, normed_id, len(self.footnote_ids))
|
1499
|
-
text = text[:start_idx] + result + text[p
|
1559
|
+
text = text[:start_idx] + result + text[p+1:]
|
1500
1560
|
else:
|
1501
1561
|
# This id isn't defined, leave the markup alone.
|
1502
|
-
curr_pos = p
|
1562
|
+
curr_pos = p+1
|
1503
1563
|
continue
|
1504
1564
|
|
1505
1565
|
# Now determine what this is by the remainder.
|
@@ -1510,40 +1570,40 @@ class Markdown(object):
|
|
1510
1570
|
url, title, url_end_idx = self._extract_url_and_title(text, p)
|
1511
1571
|
if url is not None:
|
1512
1572
|
# Handle an inline anchor or img.
|
1513
|
-
is_img = start_idx > 0 and text[start_idx
|
1573
|
+
is_img = start_idx > 0 and text[start_idx-1] == "!"
|
1514
1574
|
if is_img:
|
1515
1575
|
start_idx -= 1
|
1516
1576
|
|
1517
1577
|
# We've got to encode these to avoid conflicting
|
1518
1578
|
# with italics/bold.
|
1519
1579
|
url = url.replace('*', self._escape_table['*']) \
|
1520
|
-
|
1580
|
+
.replace('_', self._escape_table['_'])
|
1521
1581
|
if title:
|
1522
1582
|
title_str = ' title="%s"' % (
|
1523
1583
|
_xml_escape_attr(title)
|
1524
|
-
|
1525
|
-
|
1584
|
+
.replace('*', self._escape_table['*'])
|
1585
|
+
.replace('_', self._escape_table['_']))
|
1526
1586
|
else:
|
1527
1587
|
title_str = ''
|
1528
1588
|
if is_img:
|
1529
1589
|
img_class_str = self._html_class_str_from_tag("img")
|
1530
1590
|
result = '<img src="%s" alt="%s"%s%s%s' \
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1591
|
+
% (self._protect_url(url),
|
1592
|
+
_xml_escape_attr(link_text),
|
1593
|
+
title_str,
|
1594
|
+
img_class_str,
|
1595
|
+
self.empty_element_suffix)
|
1536
1596
|
if "smarty-pants" in self.extras:
|
1537
1597
|
result = result.replace('"', self._escape_table['"'])
|
1538
1598
|
curr_pos = start_idx + len(result)
|
1599
|
+
anchor_allowed_pos = start_idx + len(result)
|
1539
1600
|
text = text[:start_idx] + result + text[url_end_idx:]
|
1540
1601
|
elif start_idx >= anchor_allowed_pos:
|
1541
1602
|
safe_link = self._safe_protocols.match(url) or url.startswith('#')
|
1542
1603
|
if self.safe_mode and not safe_link:
|
1543
1604
|
result_head = '<a href="#"%s>' % (title_str)
|
1544
1605
|
else:
|
1545
|
-
result_head = '<a href="%s"%s>' % (
|
1546
|
-
_html_escape_url(url, safe_mode=self.safe_mode), title_str)
|
1606
|
+
result_head = '<a href="%s"%s>' % (self._protect_url(url), title_str)
|
1547
1607
|
result = '%s%s</a>' % (result_head, link_text)
|
1548
1608
|
if "smarty-pants" in self.extras:
|
1549
1609
|
result = result.replace('"', self._escape_table['"'])
|
@@ -1562,7 +1622,7 @@ class Markdown(object):
|
|
1562
1622
|
match = self._tail_of_reference_link_re.match(text, p)
|
1563
1623
|
if match:
|
1564
1624
|
# Handle a reference-style anchor or img.
|
1565
|
-
is_img = start_idx > 0 and text[start_idx
|
1625
|
+
is_img = start_idx > 0 and text[start_idx-1] == "!"
|
1566
1626
|
if is_img:
|
1567
1627
|
start_idx -= 1
|
1568
1628
|
link_id = match.group("id").lower()
|
@@ -1573,7 +1633,7 @@ class Markdown(object):
|
|
1573
1633
|
# We've got to encode these to avoid conflicting
|
1574
1634
|
# with italics/bold.
|
1575
1635
|
url = url.replace('*', self._escape_table['*']) \
|
1576
|
-
|
1636
|
+
.replace('_', self._escape_table['_'])
|
1577
1637
|
title = self.titles.get(link_id)
|
1578
1638
|
if title:
|
1579
1639
|
title = _xml_escape_attr(title) \
|
@@ -1585,11 +1645,11 @@ class Markdown(object):
|
|
1585
1645
|
if is_img:
|
1586
1646
|
img_class_str = self._html_class_str_from_tag("img")
|
1587
1647
|
result = '<img src="%s" alt="%s"%s%s%s' \
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1648
|
+
% (self._protect_url(url),
|
1649
|
+
_xml_escape_attr(link_text),
|
1650
|
+
title_str,
|
1651
|
+
img_class_str,
|
1652
|
+
self.empty_element_suffix)
|
1593
1653
|
if "smarty-pants" in self.extras:
|
1594
1654
|
result = result.replace('"', self._escape_table['"'])
|
1595
1655
|
curr_pos = start_idx + len(result)
|
@@ -1598,8 +1658,7 @@ class Markdown(object):
|
|
1598
1658
|
if self.safe_mode and not self._safe_protocols.match(url):
|
1599
1659
|
result_head = '<a href="#"%s>' % (title_str)
|
1600
1660
|
else:
|
1601
|
-
result_head = '<a href="%s"%s>' % (
|
1602
|
-
_html_escape_url(url, safe_mode=self.safe_mode), title_str)
|
1661
|
+
result_head = '<a href="%s"%s>' % (self._protect_url(url), title_str)
|
1603
1662
|
result = '%s%s</a>' % (result_head, link_text)
|
1604
1663
|
if "smarty-pants" in self.extras:
|
1605
1664
|
result = result.replace('"', self._escape_table['"'])
|
@@ -1687,7 +1746,7 @@ class Markdown(object):
|
|
1687
1746
|
header_id_attr = ""
|
1688
1747
|
if "header-ids" in self.extras:
|
1689
1748
|
header_id = self.header_id_from_text(header_group,
|
1690
|
-
|
1749
|
+
self.extras["header-ids"], n)
|
1691
1750
|
if header_id:
|
1692
1751
|
header_id_attr = ' id="%s"' % header_id
|
1693
1752
|
html = self._run_span_gamut(header_group)
|
@@ -1721,12 +1780,21 @@ class Markdown(object):
|
|
1721
1780
|
|
1722
1781
|
def _list_sub(self, match):
|
1723
1782
|
lst = match.group(1)
|
1724
|
-
lst_type = match.group(
|
1783
|
+
lst_type = match.group(4) in self._marker_ul_chars and "ul" or "ol"
|
1784
|
+
|
1785
|
+
if lst_type == 'ol' and match.group(4) != '1.':
|
1786
|
+
# if list doesn't start at 1 then set the ol start attribute
|
1787
|
+
lst_opts = ' start="%s"' % match.group(4)[:-1]
|
1788
|
+
else:
|
1789
|
+
lst_opts = ''
|
1790
|
+
|
1791
|
+
lst_opts = lst_opts + self._html_class_str_from_tag(lst_type)
|
1792
|
+
|
1725
1793
|
result = self._process_list_items(lst)
|
1726
1794
|
if self.list_level:
|
1727
|
-
return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type)
|
1795
|
+
return "<%s%s>\n%s</%s>\n" % (lst_type, lst_opts, result, lst_type)
|
1728
1796
|
else:
|
1729
|
-
return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type)
|
1797
|
+
return "<%s%s>\n%s</%s>\n\n" % (lst_type, lst_opts, result, lst_type)
|
1730
1798
|
|
1731
1799
|
def _do_lists(self, text):
|
1732
1800
|
# Form HTML ordered (numbered) and unordered (bulleted) lists.
|
@@ -1740,16 +1808,17 @@ class Markdown(object):
|
|
1740
1808
|
hits = []
|
1741
1809
|
for marker_pat in (self._marker_ul, self._marker_ol):
|
1742
1810
|
less_than_tab = self.tab_width - 1
|
1811
|
+
other_marker_pat = self._marker_ul if marker_pat == self._marker_ol else self._marker_ol
|
1743
1812
|
whole_list = r'''
|
1744
1813
|
( # \1 = whole list
|
1745
1814
|
( # \2
|
1746
|
-
[ ]{0,%d}
|
1747
|
-
(%s) # \
|
1815
|
+
([ ]{0,%d}) # \3 = the indentation level of the list item marker
|
1816
|
+
(%s) # \4 = first list item marker
|
1748
1817
|
[ \t]+
|
1749
|
-
(?!\ *\
|
1818
|
+
(?!\ *\4\ ) # '- - - ...' isn't a list. See 'not_quite_a_list' test case.
|
1750
1819
|
)
|
1751
1820
|
(?:.+?)
|
1752
|
-
( # \
|
1821
|
+
( # \5
|
1753
1822
|
\Z
|
1754
1823
|
|
|
1755
1824
|
\n{2,}
|
@@ -1758,13 +1827,19 @@ class Markdown(object):
|
|
1758
1827
|
[ \t]*
|
1759
1828
|
%s[ \t]+
|
1760
1829
|
)
|
1830
|
+
|
|
1831
|
+
\n+
|
1832
|
+
(?=
|
1833
|
+
\3 # lookahead for a different style of list item marker
|
1834
|
+
%s[ \t]+
|
1835
|
+
)
|
1761
1836
|
)
|
1762
1837
|
)
|
1763
|
-
''' % (less_than_tab, marker_pat, marker_pat)
|
1838
|
+
''' % (less_than_tab, marker_pat, marker_pat, other_marker_pat)
|
1764
1839
|
if self.list_level: # sub-list
|
1765
|
-
list_re = re.compile("^"
|
1840
|
+
list_re = re.compile("^"+whole_list, re.X | re.M | re.S)
|
1766
1841
|
else:
|
1767
|
-
list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"
|
1842
|
+
list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list,
|
1768
1843
|
re.X | re.M | re.S)
|
1769
1844
|
match = list_re.search(text, pos)
|
1770
1845
|
if match:
|
@@ -1788,7 +1863,7 @@ class Markdown(object):
|
|
1788
1863
|
(\n{1,2})) # eols = \5
|
1789
1864
|
(?= \n* (\Z | \2 (?P<next_marker>%s) [ \t]+))
|
1790
1865
|
''' % (_marker_any, _marker_any),
|
1791
|
-
|
1866
|
+
re.M | re.X | re.S)
|
1792
1867
|
|
1793
1868
|
_task_list_item_re = re.compile(r'''
|
1794
1869
|
(\[[\ xX]\])[ \t]+ # tasklist marker = \1
|
@@ -1800,13 +1875,12 @@ class Markdown(object):
|
|
1800
1875
|
def _task_list_item_sub(self, match):
|
1801
1876
|
marker = match.group(1)
|
1802
1877
|
item_text = match.group(2)
|
1803
|
-
if marker in ['[x]',
|
1804
|
-
|
1878
|
+
if marker in ['[x]','[X]']:
|
1879
|
+
return self._task_list_warpper_str % ('checked ', item_text)
|
1805
1880
|
elif marker == '[ ]':
|
1806
|
-
|
1881
|
+
return self._task_list_warpper_str % ('', item_text)
|
1807
1882
|
|
1808
1883
|
_last_li_endswith_two_eols = False
|
1809
|
-
|
1810
1884
|
def _list_item_sub(self, match):
|
1811
1885
|
item = match.group(4)
|
1812
1886
|
leading_line = match.group(1)
|
@@ -1910,19 +1984,13 @@ class Markdown(object):
|
|
1910
1984
|
codeblock = self._outdent(codeblock)
|
1911
1985
|
codeblock = self._detab(codeblock)
|
1912
1986
|
codeblock = codeblock.lstrip('\n') # trim leading newlines
|
1913
|
-
codeblock = codeblock.rstrip()
|
1914
|
-
|
1915
|
-
# Note: "code-color" extra is DEPRECATED.
|
1916
|
-
if "code-color" in self.extras and codeblock.startswith(":::"):
|
1917
|
-
lexer_name, rest = codeblock.split('\n', 1)
|
1918
|
-
lexer_name = lexer_name[3:].strip()
|
1919
|
-
codeblock = rest.lstrip("\n") # Remove lexer declaration line.
|
1987
|
+
codeblock = codeblock.rstrip() # trim trailing whitespace
|
1920
1988
|
|
1921
1989
|
# Use pygments only if not using the highlightjs-lang extra
|
1922
1990
|
if lexer_name and "highlightjs-lang" not in self.extras:
|
1923
1991
|
lexer = self._get_pygments_lexer(lexer_name)
|
1924
1992
|
if lexer:
|
1925
|
-
leading_indent = ' '
|
1993
|
+
leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
|
1926
1994
|
return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer, is_fenced_code_block)
|
1927
1995
|
|
1928
1996
|
pre_class_str = self._html_class_str_from_tag("pre")
|
@@ -1935,10 +2003,16 @@ class Markdown(object):
|
|
1935
2003
|
if is_fenced_code_block:
|
1936
2004
|
# Fenced code blocks need to be outdented before encoding, and then reapplied
|
1937
2005
|
leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
|
1938
|
-
|
2006
|
+
if codeblock:
|
2007
|
+
# only run the codeblock through the outdenter if not empty
|
2008
|
+
leading_indent, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
|
1939
2009
|
|
1940
2010
|
codeblock = self._encode_code(codeblock)
|
1941
2011
|
|
2012
|
+
if lexer_name == 'mermaid' and 'mermaid' in self.extras:
|
2013
|
+
return '\n%s<pre class="mermaid-pre"><div class="mermaid">%s\n</div></pre>\n' % (
|
2014
|
+
leading_indent, codeblock)
|
2015
|
+
|
1942
2016
|
return "\n%s<pre%s><code%s>%s\n</code></pre>\n" % (
|
1943
2017
|
leading_indent, pre_class_str, code_class_str, codeblock)
|
1944
2018
|
else:
|
@@ -1951,7 +2025,7 @@ class Markdown(object):
|
|
1951
2025
|
if is_fenced_code_block:
|
1952
2026
|
formatter_opts = self.extras['fenced-code-blocks'] or {}
|
1953
2027
|
else:
|
1954
|
-
formatter_opts =
|
2028
|
+
formatter_opts = {}
|
1955
2029
|
|
1956
2030
|
def unhash_code(codeblock):
|
1957
2031
|
for key, sanitized in list(self.html_spans.items()):
|
@@ -1964,9 +2038,8 @@ class Markdown(object):
|
|
1964
2038
|
for old, new in replacements:
|
1965
2039
|
codeblock = codeblock.replace(old, new)
|
1966
2040
|
return codeblock
|
1967
|
-
|
1968
2041
|
# remove leading indent from code block
|
1969
|
-
|
2042
|
+
_, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
|
1970
2043
|
|
1971
2044
|
codeblock = unhash_code(codeblock)
|
1972
2045
|
colored = self._color_with_pygments(codeblock, lexer,
|
@@ -2006,7 +2079,7 @@ class Markdown(object):
|
|
2006
2079
|
# Needed when syntax highlighting is being used.
|
2007
2080
|
(?!([^<]|<(/?)span)*\</code\>)
|
2008
2081
|
''' % (self.tab_width, self.tab_width),
|
2009
|
-
|
2082
|
+
re.M | re.X)
|
2010
2083
|
return code_block_re.sub(self._code_block_sub, text)
|
2011
2084
|
|
2012
2085
|
_fenced_code_block_re = re.compile(r'''
|
@@ -2090,6 +2163,42 @@ class Markdown(object):
|
|
2090
2163
|
self._code_table[text] = hashed
|
2091
2164
|
return hashed
|
2092
2165
|
|
2166
|
+
def _wavedrom_block_sub(self, match):
|
2167
|
+
# if this isn't a wavedrom diagram block, exit now
|
2168
|
+
if match.group(2) != 'wavedrom':
|
2169
|
+
return match.string[match.start():match.end()]
|
2170
|
+
|
2171
|
+
# dedent the block for processing
|
2172
|
+
lead_indent, waves = self._uniform_outdent(match.group(3))
|
2173
|
+
# default tags to wrap the wavedrom block in
|
2174
|
+
open_tag, close_tag = '<script type="WaveDrom">\n', '</script>'
|
2175
|
+
|
2176
|
+
# check if the user would prefer to have the SVG embedded directly
|
2177
|
+
if not isinstance(self.extras['wavedrom'], dict):
|
2178
|
+
embed_svg = True
|
2179
|
+
else:
|
2180
|
+
# default behaviour is to embed SVGs
|
2181
|
+
embed_svg = self.extras['wavedrom'].get('prefer_embed_svg', True)
|
2182
|
+
|
2183
|
+
if embed_svg:
|
2184
|
+
try:
|
2185
|
+
import wavedrom
|
2186
|
+
waves = wavedrom.render(waves).tostring()
|
2187
|
+
open_tag, close_tag = '<div>', '\n</div>'
|
2188
|
+
except ImportError:
|
2189
|
+
pass
|
2190
|
+
|
2191
|
+
# hash SVG to prevent <> chars being messed with
|
2192
|
+
self._escape_table[waves] = _hash_text(waves)
|
2193
|
+
|
2194
|
+
return self._uniform_indent(
|
2195
|
+
'\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag),
|
2196
|
+
lead_indent, include_empty_lines=True
|
2197
|
+
)
|
2198
|
+
|
2199
|
+
def _do_wavedrom_blocks(self, text):
|
2200
|
+
return self._fenced_code_block_re.sub(self._wavedrom_block_sub, text)
|
2201
|
+
|
2093
2202
|
_admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
|
2094
2203
|
_admonitions_re = re.compile(r'''
|
2095
2204
|
^(\ *)\.\.\ (%s)::\ * # $1 leading indent, $2 the admonition
|
@@ -2097,8 +2206,8 @@ class Markdown(object):
|
|
2097
2206
|
((?:\s*\n\1\ {3,}.*)+?) # $4 admonition body (required)
|
2098
2207
|
(?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S)) # until EOF, 3 blank lines or something less indented
|
2099
2208
|
''' % _admonitions,
|
2100
|
-
|
2101
|
-
|
2209
|
+
re.IGNORECASE | re.MULTILINE | re.VERBOSE
|
2210
|
+
)
|
2102
2211
|
|
2103
2212
|
def _do_admonitions_sub(self, match):
|
2104
2213
|
lead_indent, admonition_name, title, body = match.groups()
|
@@ -2129,22 +2238,24 @@ class Markdown(object):
|
|
2129
2238
|
return self._admonitions_re.sub(self._do_admonitions_sub, text)
|
2130
2239
|
|
2131
2240
|
_strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
|
2132
|
-
|
2133
2241
|
def _do_strike(self, text):
|
2134
2242
|
text = self._strike_re.sub(r"<s>\1</s>", text)
|
2135
2243
|
return text
|
2136
2244
|
|
2137
2245
|
_underline_re = re.compile(r"(?<!<!)--(?!>)(?=\S)(.+?)(?<=\S)(?<!<!)--(?!>)", re.S)
|
2138
|
-
|
2139
2246
|
def _do_underline(self, text):
|
2140
2247
|
text = self._underline_re.sub(r"<u>\1</u>", text)
|
2141
2248
|
return text
|
2142
2249
|
|
2250
|
+
_tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
|
2251
|
+
def _do_tg_spoiler(self, text):
|
2252
|
+
text = self._tg_spoiler_re.sub(r"<tg-spoiler>\1</tg-spoiler>", text)
|
2253
|
+
return text
|
2254
|
+
|
2143
2255
|
_strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
|
2144
2256
|
_em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
|
2145
2257
|
_code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
|
2146
2258
|
_code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
|
2147
|
-
|
2148
2259
|
def _do_italics_and_bold(self, text):
|
2149
2260
|
# <strong> must go first:
|
2150
2261
|
if "code-friendly" in self.extras:
|
@@ -2161,14 +2272,13 @@ class Markdown(object):
|
|
2161
2272
|
# using scare quotes (single quotation marks) is rare.
|
2162
2273
|
_apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
|
2163
2274
|
_contractions = ["tis", "twas", "twer", "neath", "o", "n",
|
2164
|
-
|
2165
|
-
|
2275
|
+
"round", "bout", "twixt", "nuff", "fraid", "sup"]
|
2166
2276
|
def _do_smart_contractions(self, text):
|
2167
2277
|
text = self._apostrophe_year_re.sub(r"’\1", text)
|
2168
2278
|
for c in self._contractions:
|
2169
2279
|
text = text.replace("'%s" % c, "’%s" % c)
|
2170
2280
|
text = text.replace("'%s" % c.capitalize(),
|
2171
|
-
|
2281
|
+
"’%s" % c.capitalize())
|
2172
2282
|
return text
|
2173
2283
|
|
2174
2284
|
# Substitute double-quotes before single-quotes.
|
@@ -2176,7 +2286,6 @@ class Markdown(object):
|
|
2176
2286
|
_opening_double_quote_re = re.compile(r'(?<!\S)"(?=\S)')
|
2177
2287
|
_closing_single_quote_re = re.compile(r"(?<=\S)'")
|
2178
2288
|
_closing_double_quote_re = re.compile(r'(?<=\S)"(?=(\s|,|;|\.|\?|!|$))')
|
2179
|
-
|
2180
2289
|
def _do_smart_punctuation(self, text):
|
2181
2290
|
"""Fancifies 'single quotes', "double quotes", and apostrophes.
|
2182
2291
|
Converts --, ---, and ... into en dashes, em dashes, and ellipses.
|
@@ -2225,7 +2334,6 @@ class Markdown(object):
|
|
2225
2334
|
_bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
|
2226
2335
|
_bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
|
2227
2336
|
_html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
|
2228
|
-
|
2229
2337
|
def _dedent_two_spaces_sub(self, match):
|
2230
2338
|
return re.sub(r'(?m)^ ', '', match.group(1))
|
2231
2339
|
|
@@ -2239,7 +2347,7 @@ class Markdown(object):
|
|
2239
2347
|
bq = self._bq_one_level_re.sub('', bq)
|
2240
2348
|
# trim whitespace-only lines
|
2241
2349
|
bq = self._ws_only_line_re.sub('', bq)
|
2242
|
-
bq = self._run_block_gamut(bq)
|
2350
|
+
bq = self._run_block_gamut(bq) # recurse
|
2243
2351
|
|
2244
2352
|
bq = re.sub('(?m)^', ' ', bq)
|
2245
2353
|
# These leading spaces screw with <pre> content, so we need to fix that:
|
@@ -2280,15 +2388,15 @@ class Markdown(object):
|
|
2280
2388
|
# consider numeric bullets (e.g. "1." and "2.") to be
|
2281
2389
|
# equal.
|
2282
2390
|
if (li and len(li.group(2)) <= 3
|
2283
|
-
|
2284
|
-
|
2285
|
-
|
2286
|
-
|
2287
|
-
|
2391
|
+
and (
|
2392
|
+
(li.group("next_marker") and li.group("marker")[-1] == li.group("next_marker")[-1])
|
2393
|
+
or
|
2394
|
+
li.group("next_marker") is None
|
2395
|
+
)
|
2288
2396
|
):
|
2289
2397
|
start = li.start()
|
2290
2398
|
cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
|
2291
|
-
assert
|
2399
|
+
assert re.match(r'^<(?:ul|ol).*?>', cuddled_list)
|
2292
2400
|
graf = graf[:start]
|
2293
2401
|
|
2294
2402
|
# Wrap <p> tags.
|
@@ -2320,21 +2428,21 @@ class Markdown(object):
|
|
2320
2428
|
footer.append(self._run_block_gamut(self.footnotes[id]))
|
2321
2429
|
try:
|
2322
2430
|
backlink = ('<a href="#fnref-%s" ' +
|
2323
|
-
|
2324
|
-
|
2325
|
-
|
2326
|
-
|
2431
|
+
'class="footnoteBackLink" ' +
|
2432
|
+
'title="' + self.footnote_title + '">' +
|
2433
|
+
self.footnote_return_symbol +
|
2434
|
+
'</a>') % (id, i+1)
|
2327
2435
|
except TypeError:
|
2328
2436
|
log.debug("Footnote error. `footnote_title` "
|
2329
2437
|
"must include parameter. Using defaults.")
|
2330
2438
|
backlink = ('<a href="#fnref-%s" '
|
2331
|
-
|
2332
|
-
|
2333
|
-
|
2439
|
+
'class="footnoteBackLink" '
|
2440
|
+
'title="Jump back to footnote %d in the text.">'
|
2441
|
+
'↩</a>' % (id, i+1))
|
2334
2442
|
|
2335
2443
|
if footer[-1].endswith("</p>"):
|
2336
2444
|
footer[-1] = footer[-1][:-len("</p>")] \
|
2337
|
-
|
2445
|
+
+ ' ' + backlink + "</p>"
|
2338
2446
|
else:
|
2339
2447
|
footer.append("\n<p>%s</p>" % backlink)
|
2340
2448
|
footer.append('</li>')
|
@@ -2370,18 +2478,20 @@ class Markdown(object):
|
|
2370
2478
|
if text.endswith(">"):
|
2371
2479
|
return text # this is not an incomplete tag, this is a link in the form <http://x.y.z>
|
2372
2480
|
|
2373
|
-
|
2481
|
+
def incomplete_tags_sub(match):
|
2482
|
+
return match.group().replace('<', '<')
|
2483
|
+
|
2484
|
+
return self._incomplete_tags_re.sub(incomplete_tags_sub, text)
|
2374
2485
|
|
2375
2486
|
def _encode_backslash_escapes(self, text):
|
2376
2487
|
for ch, escape in list(self._escape_table.items()):
|
2377
|
-
text = text.replace("\\"
|
2488
|
+
text = text.replace("\\"+ch, escape)
|
2378
2489
|
return text
|
2379
2490
|
|
2380
2491
|
_auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
|
2381
|
-
|
2382
2492
|
def _auto_link_sub(self, match):
|
2383
2493
|
g1 = match.group(1)
|
2384
|
-
return '<a href="%s">%s</a>' % (g1, g1)
|
2494
|
+
return '<a href="%s">%s</a>' % (self._protect_url(g1), g1)
|
2385
2495
|
|
2386
2496
|
_auto_email_link_re = re.compile(r"""
|
2387
2497
|
<
|
@@ -2393,7 +2503,6 @@ class Markdown(object):
|
|
2393
2503
|
)
|
2394
2504
|
>
|
2395
2505
|
""", re.I | re.X | re.U)
|
2396
|
-
|
2397
2506
|
def _auto_email_link_sub(self, match):
|
2398
2507
|
return self._encode_email_address(
|
2399
2508
|
self._unescape_special_chars(match.group(1)))
|
@@ -2424,12 +2533,14 @@ class Markdown(object):
|
|
2424
2533
|
return addr
|
2425
2534
|
|
2426
2535
|
_basic_link_re = re.compile(r'!?\[.*?\]\(.*?\)')
|
2427
|
-
|
2428
2536
|
def _do_link_patterns(self, text):
|
2429
2537
|
link_from_hash = {}
|
2430
2538
|
for regex, repl in self.link_patterns:
|
2431
2539
|
replacements = []
|
2432
2540
|
for match in regex.finditer(text):
|
2541
|
+
if any(self._match_overlaps_substr(text, match, h) for h in link_from_hash):
|
2542
|
+
continue
|
2543
|
+
|
2433
2544
|
if hasattr(repl, "__call__"):
|
2434
2545
|
href = repl(match)
|
2435
2546
|
else:
|
@@ -2468,9 +2579,9 @@ class Markdown(object):
|
|
2468
2579
|
|
2469
2580
|
escaped_href = (
|
2470
2581
|
href.replace('"', '"') # b/c of attr quote
|
2471
|
-
|
2472
|
-
|
2473
|
-
|
2582
|
+
# To avoid markdown <em> and <strong>:
|
2583
|
+
.replace('*', self._escape_table['*'])
|
2584
|
+
.replace('_', self._escape_table['_']))
|
2474
2585
|
link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
|
2475
2586
|
hash = _hash_text(link)
|
2476
2587
|
link_from_hash[hash] = link
|
@@ -2481,56 +2592,57 @@ class Markdown(object):
|
|
2481
2592
|
|
2482
2593
|
def _unescape_special_chars(self, text):
|
2483
2594
|
# Swap back in all the special characters we've hidden.
|
2484
|
-
|
2485
|
-
|
2595
|
+
while True:
|
2596
|
+
orig_text = text
|
2597
|
+
for ch, hash in list(self._escape_table.items()) + list(self._code_table.items()):
|
2598
|
+
text = text.replace(hash, ch)
|
2599
|
+
if text == orig_text:
|
2600
|
+
break
|
2486
2601
|
return text
|
2487
2602
|
|
2488
2603
|
def _outdent(self, text):
|
2489
2604
|
# Remove one level of line-leading tabs or spaces
|
2490
2605
|
return self._outdent_re.sub('', text)
|
2491
2606
|
|
2492
|
-
def _uniform_outdent(self, text, min_outdent=None):
|
2493
|
-
# Removes the smallest common leading indentation from each
|
2494
|
-
# of `text` and returns said indent along with the outdented text.
|
2495
|
-
# The `min_outdent` kwarg
|
2496
|
-
#
|
2497
|
-
|
2498
|
-
#
|
2499
|
-
|
2500
|
-
#
|
2501
|
-
|
2502
|
-
|
2503
|
-
|
2504
|
-
|
2505
|
-
if not
|
2607
|
+
def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):
|
2608
|
+
# Removes the smallest common leading indentation from each (non empty)
|
2609
|
+
# line of `text` and returns said indent along with the outdented text.
|
2610
|
+
# The `min_outdent` kwarg makes sure the smallest common whitespace
|
2611
|
+
# must be at least this size
|
2612
|
+
# The `max_outdent` sets the maximum amount a line can be
|
2613
|
+
# outdented by
|
2614
|
+
|
2615
|
+
# find the leading whitespace for every line
|
2616
|
+
whitespace = [
|
2617
|
+
re.findall(r'^[ \t]*', line)[0] if line else None
|
2618
|
+
for line in text.splitlines()
|
2619
|
+
]
|
2620
|
+
whitespace_not_empty = [i for i in whitespace if i is not None]
|
2621
|
+
|
2622
|
+
# if no whitespace detected (ie: no lines in code block, issue #505)
|
2623
|
+
if not whitespace_not_empty:
|
2506
2624
|
return '', text
|
2507
|
-
# Get smallest common leading indent
|
2508
|
-
ws = sorted(ws)[0]
|
2509
|
-
# Dedent every line by smallest common indent
|
2510
|
-
return ws, ''.join(
|
2511
|
-
(line.replace(ws, '', 1) if line.startswith(ws) else line)
|
2512
|
-
for line in text.splitlines(True)
|
2513
|
-
)
|
2514
2625
|
|
2515
|
-
|
2516
|
-
|
2517
|
-
#
|
2518
|
-
|
2519
|
-
|
2520
|
-
|
2521
|
-
|
2522
|
-
|
2523
|
-
|
2524
|
-
|
2525
|
-
|
2526
|
-
|
2527
|
-
|
2528
|
-
|
2529
|
-
|
2530
|
-
|
2531
|
-
|
2532
|
-
|
2533
|
-
|
2626
|
+
# get minimum common whitespace
|
2627
|
+
outdent = min(whitespace_not_empty)
|
2628
|
+
# adjust min common ws to be within bounds
|
2629
|
+
if min_outdent is not None:
|
2630
|
+
outdent = min([i for i in whitespace_not_empty if i >= min_outdent] or [min_outdent])
|
2631
|
+
if max_outdent is not None:
|
2632
|
+
outdent = min(outdent, max_outdent)
|
2633
|
+
|
2634
|
+
outdented = []
|
2635
|
+
for line_ws, line in zip(whitespace, text.splitlines(True)):
|
2636
|
+
if line.startswith(outdent):
|
2637
|
+
# if line starts with smallest common ws, dedent it
|
2638
|
+
outdented.append(line.replace(outdent, '', 1))
|
2639
|
+
elif line_ws is not None and line_ws < outdent:
|
2640
|
+
# if less indented than min common whitespace then outdent as much as possible
|
2641
|
+
outdented.append(line.replace(line_ws, '', 1))
|
2642
|
+
else:
|
2643
|
+
outdented.append(line)
|
2644
|
+
|
2645
|
+
return outdent, ''.join(outdented)
|
2534
2646
|
|
2535
2647
|
def _uniform_indent(self, text, indent, include_empty_lines=False):
|
2536
2648
|
return ''.join(
|
@@ -2538,12 +2650,25 @@ class Markdown(object):
|
|
2538
2650
|
for line in text.splitlines(True)
|
2539
2651
|
)
|
2540
2652
|
|
2653
|
+
@staticmethod
|
2654
|
+
def _match_overlaps_substr(text, match, substr):
|
2655
|
+
'''
|
2656
|
+
Checks if a regex match overlaps with a substring in the given text.
|
2657
|
+
'''
|
2658
|
+
for instance in re.finditer(re.escape(substr), text):
|
2659
|
+
start, end = instance.span()
|
2660
|
+
if start <= match.start() <= end:
|
2661
|
+
return True
|
2662
|
+
if start <= match.end() <= end:
|
2663
|
+
return True
|
2664
|
+
return False
|
2665
|
+
|
2541
2666
|
|
2542
2667
|
class MarkdownWithExtras(Markdown):
|
2543
2668
|
"""A markdowner class that enables most extras:
|
2544
2669
|
|
2545
2670
|
- footnotes
|
2546
|
-
- code-
|
2671
|
+
- fenced-code-blocks (only highlights code if 'pygments' Python module on path)
|
2547
2672
|
|
2548
2673
|
These are not included:
|
2549
2674
|
- pyshell (specific to Python-related documenting)
|
@@ -2551,7 +2676,7 @@ class MarkdownWithExtras(Markdown):
|
|
2551
2676
|
- link-patterns (because you need to specify some actual
|
2552
2677
|
link-patterns anyway)
|
2553
2678
|
"""
|
2554
|
-
extras = ["footnotes", "code-
|
2679
|
+
extras = ["footnotes", "fenced-code-blocks"]
|
2555
2680
|
|
2556
2681
|
|
2557
2682
|
# ---- internal support functions
|
@@ -2567,9 +2692,8 @@ def calculate_toc_html(toc):
|
|
2567
2692
|
|
2568
2693
|
def indent():
|
2569
2694
|
return ' ' * (len(h_stack) - 1)
|
2570
|
-
|
2571
2695
|
lines = []
|
2572
|
-
h_stack = [0]
|
2696
|
+
h_stack = [0] # stack of header-level numbers
|
2573
2697
|
for level, id, name in toc:
|
2574
2698
|
if level > h_stack[-1]:
|
2575
2699
|
lines.append("%s<ul>" % indent())
|
@@ -2600,12 +2724,9 @@ class UnicodeWithAttrs(str):
|
|
2600
2724
|
metadata = None
|
2601
2725
|
toc_html = None
|
2602
2726
|
|
2603
|
-
|
2604
2727
|
## {{{ http://code.activestate.com/recipes/577257/ (r1)
|
2605
2728
|
_slugify_strip_re = re.compile(r'[^\w\s-]')
|
2606
2729
|
_slugify_hyphenate_re = re.compile(r'[-\s]+')
|
2607
|
-
|
2608
|
-
|
2609
2730
|
def _slugify(value):
|
2610
2731
|
"""
|
2611
2732
|
Normalizes string, converts to lowercase, removes non-alpha characters,
|
@@ -2617,20 +2738,16 @@ def _slugify(value):
|
|
2617
2738
|
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode()
|
2618
2739
|
value = _slugify_strip_re.sub('', value).strip().lower()
|
2619
2740
|
return _slugify_hyphenate_re.sub('-', value)
|
2620
|
-
|
2621
|
-
|
2622
2741
|
## end of http://code.activestate.com/recipes/577257/ }}}
|
2623
2742
|
|
2624
2743
|
|
2625
2744
|
# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
|
2626
2745
|
def _curry(*args, **kwargs):
|
2627
2746
|
function, args = args[0], args[1:]
|
2628
|
-
|
2629
2747
|
def result(*rest, **kwrest):
|
2630
2748
|
combined = kwargs.copy()
|
2631
2749
|
combined.update(kwrest)
|
2632
2750
|
return function(*args + rest, **combined)
|
2633
|
-
|
2634
2751
|
return result
|
2635
2752
|
|
2636
2753
|
|
@@ -2643,7 +2760,7 @@ def _regex_from_encoded_pattern(s):
|
|
2643
2760
|
if s.startswith('/') and s.rfind('/') != 0:
|
2644
2761
|
# Parse it: /PATTERN/FLAGS
|
2645
2762
|
idx = s.rfind('/')
|
2646
|
-
_, flags_str = s[1:idx], s[idx
|
2763
|
+
_, flags_str = s[1:idx], s[idx+1:]
|
2647
2764
|
flag_from_char = {
|
2648
2765
|
"i": re.IGNORECASE,
|
2649
2766
|
"l": re.LOCALE,
|
@@ -2679,7 +2796,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
|
|
2679
2796
|
"""
|
2680
2797
|
DEBUG = False
|
2681
2798
|
if DEBUG:
|
2682
|
-
print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"
|
2799
|
+
print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
|
2683
2800
|
% (tabsize, skip_first_line))
|
2684
2801
|
margin = None
|
2685
2802
|
for i, line in enumerate(lines):
|
@@ -2721,13 +2838,13 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
|
|
2721
2838
|
"line %r while removing %d-space margin"
|
2722
2839
|
% (ch, line, margin))
|
2723
2840
|
if DEBUG:
|
2724
|
-
print("dedent: %r: %r -> removed %d/%d"
|
2841
|
+
print("dedent: %r: %r -> removed %d/%d"\
|
2725
2842
|
% (line, ch, removed, margin))
|
2726
2843
|
if removed == margin:
|
2727
|
-
lines[i] = lines[i][j
|
2844
|
+
lines[i] = lines[i][j+1:]
|
2728
2845
|
break
|
2729
2846
|
elif removed > margin:
|
2730
|
-
lines[i] = ' '
|
2847
|
+
lines[i] = ' '*(removed-margin) + lines[i][j+1:]
|
2731
2848
|
break
|
2732
2849
|
else:
|
2733
2850
|
if removed:
|
@@ -2758,7 +2875,6 @@ class _memoized(object):
|
|
2758
2875
|
|
2759
2876
|
http://wiki.python.org/moin/PythonDecoratorLibrary
|
2760
2877
|
"""
|
2761
|
-
|
2762
2878
|
def __init__(self, func):
|
2763
2879
|
self.func = func
|
2764
2880
|
self.cache = {}
|
@@ -2798,8 +2914,6 @@ def _xml_oneliner_re_from_tab_width(tab_width):
|
|
2798
2914
|
(?=\n{2,}|\Z) # followed by a blank line or end of document
|
2799
2915
|
)
|
2800
2916
|
""" % (tab_width - 1), re.X)
|
2801
|
-
|
2802
|
-
|
2803
2917
|
_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
|
2804
2918
|
|
2805
2919
|
|
@@ -2820,8 +2934,6 @@ def _hr_tag_re_from_tab_width(tab_width):
|
|
2820
2934
|
(?=\n{2,}|\Z) # followed by a blank line or end of document
|
2821
2935
|
)
|
2822
2936
|
""" % (tab_width - 1), re.X)
|
2823
|
-
|
2824
|
-
|
2825
2937
|
_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
|
2826
2938
|
|
2827
2939
|
|
@@ -2834,9 +2946,9 @@ def _xml_escape_attr(attr, skip_single_quote=True):
|
|
2834
2946
|
escaped = _AMPERSAND_RE.sub('&', attr)
|
2835
2947
|
|
2836
2948
|
escaped = (attr
|
2837
|
-
|
2838
|
-
|
2839
|
-
|
2949
|
+
.replace('"', '"')
|
2950
|
+
.replace('<', '<')
|
2951
|
+
.replace('>', '>'))
|
2840
2952
|
if not skip_single_quote:
|
2841
2953
|
escaped = escaped.replace("'", "'")
|
2842
2954
|
return escaped
|
@@ -2859,9 +2971,9 @@ def _xml_encode_email_char_at_random(ch):
|
|
2859
2971
|
def _html_escape_url(attr, safe_mode=False):
|
2860
2972
|
"""Replace special characters that are potentially malicious in url string."""
|
2861
2973
|
escaped = (attr
|
2862
|
-
|
2863
|
-
|
2864
|
-
|
2974
|
+
.replace('"', '"')
|
2975
|
+
.replace('<', '<')
|
2976
|
+
.replace('>', '>'))
|
2865
2977
|
if safe_mode:
|
2866
2978
|
escaped = escaped.replace('+', ' ')
|
2867
2979
|
escaped = escaped.replace("'", "'")
|
@@ -2870,9 +2982,8 @@ def _html_escape_url(attr, safe_mode=False):
|
|
2870
2982
|
|
2871
2983
|
# ---- mainline
|
2872
2984
|
|
2873
|
-
class _NoReflowFormatter(
|
2874
|
-
"""An
|
2875
|
-
|
2985
|
+
class _NoReflowFormatter(argparse.RawDescriptionHelpFormatter):
|
2986
|
+
"""An argparse formatter that does NOT reflow the description."""
|
2876
2987
|
def format_description(self, description):
|
2877
2988
|
return description or ""
|
2878
2989
|
|
@@ -2888,38 +2999,45 @@ def main(argv=None):
|
|
2888
2999
|
if not logging.root.handlers:
|
2889
3000
|
logging.basicConfig()
|
2890
3001
|
|
2891
|
-
|
2892
|
-
|
2893
|
-
|
2894
|
-
|
2895
|
-
|
2896
|
-
|
3002
|
+
parser = argparse.ArgumentParser(
|
3003
|
+
prog="markdown2", description=cmdln_desc, usage='%(prog)s [PATHS...]',
|
3004
|
+
formatter_class=_NoReflowFormatter
|
3005
|
+
)
|
3006
|
+
parser.add_argument('--version', action='version',
|
3007
|
+
version='%(prog)s {version}'.format(version=__version__))
|
3008
|
+
parser.add_argument('paths', nargs='*',
|
3009
|
+
help=(
|
3010
|
+
'optional list of files to convert.'
|
3011
|
+
'If none are given, stdin will be used'
|
3012
|
+
))
|
3013
|
+
parser.add_argument("-v", "--verbose", dest="log_level",
|
2897
3014
|
action="store_const", const=logging.DEBUG,
|
2898
3015
|
help="more verbose output")
|
2899
|
-
parser.
|
3016
|
+
parser.add_argument("--encoding",
|
2900
3017
|
help="specify encoding of text content")
|
2901
|
-
parser.
|
3018
|
+
parser.add_argument("--html4tags", action="store_true", default=False,
|
2902
3019
|
help="use HTML 4 style for empty element tags")
|
2903
|
-
parser.
|
3020
|
+
parser.add_argument("-s", "--safe", metavar="MODE", dest="safe_mode",
|
2904
3021
|
help="sanitize literal HTML: 'escape' escapes "
|
2905
3022
|
"HTML meta chars, 'replace' replaces with an "
|
2906
3023
|
"[HTML_REMOVED] note")
|
2907
|
-
parser.
|
3024
|
+
parser.add_argument("-x", "--extras", action="append",
|
2908
3025
|
help="Turn on specific extra features (not part of "
|
2909
3026
|
"the core Markdown spec). See above.")
|
2910
|
-
parser.
|
3027
|
+
parser.add_argument("--use-file-vars",
|
2911
3028
|
help="Look for and use Emacs-style 'markdown-extras' "
|
2912
3029
|
"file var to turn on extras. See "
|
2913
3030
|
"<https://github.com/trentm/python-markdown2/wiki/Extras>")
|
2914
|
-
parser.
|
3031
|
+
parser.add_argument("--link-patterns-file",
|
2915
3032
|
help="path to a link pattern file")
|
2916
|
-
parser.
|
3033
|
+
parser.add_argument("--self-test", action="store_true",
|
2917
3034
|
help="run internal self-tests (some doctests)")
|
2918
|
-
parser.
|
3035
|
+
parser.add_argument("--compare", action="store_true",
|
2919
3036
|
help="run against Markdown.pl as well (for testing)")
|
2920
3037
|
parser.set_defaults(log_level=logging.INFO, compare=False,
|
2921
3038
|
encoding="utf-8", safe_mode=None, use_file_vars=False)
|
2922
|
-
opts
|
3039
|
+
opts = parser.parse_args()
|
3040
|
+
paths = opts.paths
|
2923
3041
|
log.setLevel(opts.log_level)
|
2924
3042
|
|
2925
3043
|
if opts.self_test:
|
@@ -2953,7 +3071,7 @@ def main(argv=None):
|
|
2953
3071
|
pat, href = line.rstrip().rsplit(None, 1)
|
2954
3072
|
except ValueError:
|
2955
3073
|
raise MarkdownError("%s:%d: invalid link pattern line: %r"
|
2956
|
-
% (opts.link_patterns_file, i
|
3074
|
+
% (opts.link_patterns_file, i+1, line))
|
2957
3075
|
link_patterns.append(
|
2958
3076
|
(_regex_from_encoded_pattern(pat), href))
|
2959
3077
|
finally:
|
@@ -2961,7 +3079,7 @@ def main(argv=None):
|
|
2961
3079
|
else:
|
2962
3080
|
link_patterns = None
|
2963
3081
|
|
2964
|
-
from os.path import
|
3082
|
+
from os.path import abspath, dirname, exists, join
|
2965
3083
|
markdown_pl = join(dirname(dirname(abspath(__file__))), "test",
|
2966
3084
|
"Markdown.pl")
|
2967
3085
|
if not paths:
|
@@ -2974,7 +3092,7 @@ def main(argv=None):
|
|
2974
3092
|
text = fp.read()
|
2975
3093
|
fp.close()
|
2976
3094
|
if opts.compare:
|
2977
|
-
from subprocess import
|
3095
|
+
from subprocess import PIPE, Popen
|
2978
3096
|
print("==== Markdown.pl ====")
|
2979
3097
|
p = Popen('perl %s' % markdown_pl, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True)
|
2980
3098
|
p.stdin.write(text.encode('utf-8'))
|
@@ -2983,15 +3101,15 @@ def main(argv=None):
|
|
2983
3101
|
sys.stdout.write(perl_html)
|
2984
3102
|
print("==== markdown2.py ====")
|
2985
3103
|
html = markdown(text,
|
2986
|
-
|
2987
|
-
|
2988
|
-
|
2989
|
-
|
2990
|
-
|
3104
|
+
html4tags=opts.html4tags,
|
3105
|
+
safe_mode=opts.safe_mode,
|
3106
|
+
extras=extras, link_patterns=link_patterns,
|
3107
|
+
use_file_vars=opts.use_file_vars,
|
3108
|
+
cli=True)
|
2991
3109
|
sys.stdout.write(html)
|
2992
3110
|
if extras and "toc" in extras:
|
2993
3111
|
log.debug("toc_html: " +
|
2994
|
-
|
3112
|
+
str(html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')))
|
2995
3113
|
if opts.compare:
|
2996
3114
|
test_dir = join(dirname(dirname(abspath(__file__))), "test")
|
2997
3115
|
if exists(join(test_dir, "test_markdown2.py")):
|