pdoc 13.0.1__py3-none-any.whl → 13.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  # fmt: off
2
2
  # flake8: noqa
3
3
  # type: ignore
4
- # Taken from here: https://github.com/trentm/python-markdown2/blob/ac5e7b956e9b8bc952039bfecb158ef1ddd7d422
4
+ # Taken from here: https://github.com/trentm/python-markdown2/blob/bce3f18ed86a19b418c8114a712bb6fee790c4c2/lib/markdown2.py
5
5
 
6
6
  #!/usr/bin/env python
7
7
  # Copyright (c) 2012 Trent Mick.
@@ -61,8 +61,8 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
61
61
  highlighting when using fenced-code-blocks and highlightjs.
62
62
  * html-classes: Takes a dict mapping html tag names (lowercase) to a
63
63
  string to use for a "class" tag attribute. Currently only supports "img",
64
- "table", "pre" and "code" tags. Add an issue if you require this for other
65
- tags.
64
+ "table", "thead", "pre", "code", "ul" and "ol" tags. Add an issue if you require
65
+ this for other tags.
66
66
  * link-patterns: Auto-link given regex patterns in text (e.g. bug number
67
67
  references, revision number references).
68
68
  * markdown-in-html: Allow the use of `markdown="1"` in a block HTML tag to
@@ -95,6 +95,7 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
95
95
  on Extras.
96
96
  * wiki-tables: Google Code Wiki-style tables. See
97
97
  <http://code.google.com/p/support/wiki/WikiSyntax#Tables>.
98
+ * wavedrom: Support for generating Wavedrom digital timing diagrams
98
99
  * xml: Passes one-liner processing instructions and namespaced XML tags.
99
100
  """
100
101
 
@@ -103,18 +104,18 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
103
104
  # not yet sure if there implications with this. Compare 'pydoc sre'
104
105
  # and 'perldoc perlre'.
105
106
 
106
- __version_info__ = (2, 4, 4)
107
+ __version_info__ = (2, 4, 9)
107
108
  __version__ = '.'.join(map(str, __version_info__))
108
109
  __author__ = "Trent Mick"
109
110
 
110
- import sys
111
- import re
112
- import logging
113
- from hashlib import sha256
114
- import optparse
115
- from random import random, randint
111
+ import argparse
116
112
  import codecs
113
+ import logging
114
+ import re
115
+ import sys
117
116
  from collections import defaultdict
117
+ from hashlib import sha256
118
+ from random import randint, random
118
119
 
119
120
  # ---- globals
120
121
 
@@ -123,18 +124,16 @@ log = logging.getLogger("markdown")
123
124
 
124
125
  DEFAULT_TAB_WIDTH = 4
125
126
 
126
- SECRET_SALT = bytes(randint(0, 1000000))
127
-
128
127
 
128
+ SECRET_SALT = bytes(randint(0, 1000000))
129
129
  # MD5 function was previously used for this; the "md5" prefix was kept for
130
130
  # backwards compatibility.
131
131
  def _hash_text(s):
132
132
  return 'md5-' + sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:]
133
133
 
134
-
135
134
  # Table of hash values for escaped characters:
136
135
  g_escape_table = dict([(ch, _hash_text(ch))
137
- for ch in '\\`*_{}[]()>#+-.!'])
136
+ for ch in '\\`*_{}[]()>#+-.!'])
138
137
 
139
138
  # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
140
139
  # http://bumppo.net/projects/amputator/
@@ -232,7 +231,7 @@ class Markdown(object):
232
231
 
233
232
  if "toc" in self.extras:
234
233
  if "header-ids" not in self.extras:
235
- self.extras["header-ids"] = None # "toc" implies "header-ids"
234
+ self.extras["header-ids"] = None # "toc" implies "header-ids"
236
235
 
237
236
  if self.extras["toc"] is None:
238
237
  self._toc_depth = 6
@@ -293,8 +292,8 @@ class Markdown(object):
293
292
  [^#'"] # We don't want to match href values that start with # (like footnotes)
294
293
  )
295
294
  """,
296
- re.IGNORECASE | re.VERBOSE
297
- )
295
+ re.IGNORECASE | re.VERBOSE
296
+ )
298
297
 
299
298
  def convert(self, text):
300
299
  """Convert the given text."""
@@ -354,6 +353,9 @@ class Markdown(object):
354
353
 
355
354
  text = self.preprocess(text)
356
355
 
356
+ if 'wavedrom' in self.extras:
357
+ text = self._do_wavedrom_blocks(text)
358
+
357
359
  if "fenced-code-blocks" in self.extras and not self.safe_mode:
358
360
  text = self._do_fenced_code_blocks(text)
359
361
 
@@ -452,18 +454,18 @@ class Markdown(object):
452
454
  #
453
455
  # # header
454
456
  _meta_data_pattern = re.compile(r'''
455
- ^(?:---[\ \t]*\n)?( # optional opening fence
457
+ ^{0}( # optional opening fence
456
458
  (?:
457
- [\S \t]*\w[\S \t]*\s*:(?:\n+[ \t]+.*)+ # indented lists
459
+ {1}:(?:\n+[ \t]+.*)+ # indented lists
458
460
  )|(?:
459
- (?:[\S \t]*\w[\S \t]*\s*:\s+>(?:\n\s+.*)+?) # multiline long descriptions
460
- (?=\n[\S \t]*\w[\S \t]*\s*:\s*.*\n|\s*\Z) # match up until the start of the next key:value definition or the end of the input text
461
+ (?:{1}:\s+>(?:\n\s+.*)+?) # multiline long descriptions
462
+ (?=\n{1}:\s*.*\n|\s*\Z) # match up until the start of the next key:value definition or the end of the input text
461
463
  )|(?:
462
- [\S \t]*\w[\S \t]*\s*:(?! >).*\n? # simple key:value pair, leading spaces allowed
464
+ {1}:(?! >).*\n? # simple key:value pair, leading spaces allowed
463
465
  )
464
- )(?:---[\ \t]*\n)? # optional closing fence
465
- ''', re.MULTILINE | re.VERBOSE
466
- )
466
+ ){0} # optional closing fence
467
+ '''.format(r'(?:---[\ \t]*\n)?', r'[\S \t]*\w[\S \t]*\s*'), re.MULTILINE | re.VERBOSE
468
+ )
467
469
 
468
470
  _key_val_list_pat = re.compile(
469
471
  r"^-(?:[ \t]*([^\n]*)(?:[ \t]*[:-][ \t]*(\S+))?)(?:\n((?:[ \t]+[^\n]+\n?)+))?",
@@ -546,8 +548,7 @@ class Markdown(object):
546
548
 
547
549
  return tail
548
550
 
549
- _emacs_oneliner_vars_pat = re.compile(r"((?:<!--)?\s*-\*-)\s*(?:(\S[^\r\n]*?)([\r\n]\s*)?)?(-\*-\s*(?:-->)?)",
550
- re.UNICODE)
551
+ _emacs_oneliner_vars_pat = re.compile(r"((?:<!--)?\s*-\*-)\s*(?:(\S[^\r\n]*?)([\r\n]\s*)?)?(-\*-\s*(?:-->)?)", re.UNICODE)
551
552
  # This regular expression is intended to match blocks like this:
552
553
  # PREFIX Local Variables: SUFFIX
553
554
  # PREFIX mode: Tcl SUFFIX
@@ -629,7 +630,7 @@ class Markdown(object):
629
630
  return {}
630
631
  # Don't validate suffix on last line. Emacs doesn't care,
631
632
  # neither should we.
632
- if i != len(lines) - 1 and not line.endswith(suffix):
633
+ if i != len(lines)-1 and not line.endswith(suffix):
633
634
  log.debug("emacs variables error: line '%s' "
634
635
  "does not use proper suffix '%s'"
635
636
  % (line, suffix))
@@ -668,7 +669,7 @@ class Markdown(object):
668
669
  # Unquote values.
669
670
  for var, val in list(emacs_vars.items()):
670
671
  if len(val) > 1 and (val.startswith('"') and val.endswith('"')
671
- or val.startswith('"') and val.endswith('"')):
672
+ or val.startswith('"') and val.endswith('"')):
672
673
  emacs_vars[var] = val[1:-1]
673
674
 
674
675
  return emacs_vars
@@ -724,7 +725,7 @@ class Markdown(object):
724
725
  (?=\n+|\Z) # followed by a newline or end of document
725
726
  )
726
727
  """ % _block_tags_a,
727
- re.X | re.M)
728
+ re.X | re.M)
728
729
 
729
730
  _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
730
731
  _block_tags_b += _html5tags
@@ -740,13 +741,16 @@ class Markdown(object):
740
741
  (?=\n+|\Z) # followed by a newline or end of document
741
742
  )
742
743
  """ % _block_tags_b,
743
- re.X | re.M)
744
+ re.X | re.M)
744
745
 
745
746
  _html_markdown_attr_re = re.compile(
746
747
  r'''\s+markdown=("1"|'1')''')
747
-
748
748
  def _hash_html_block_sub(self, match, raw=False):
749
- html = match.group(1)
749
+ if isinstance(match, str):
750
+ html = match
751
+ else:
752
+ html = match.group(1)
753
+
750
754
  if raw and self.safe_mode:
751
755
  html = self._sanitize_html(html)
752
756
  elif 'markdown-in-html' in self.extras and 'markdown=' in html:
@@ -762,8 +766,8 @@ class Markdown(object):
762
766
  l_key = _hash_text(last_line)
763
767
  self.html_blocks[l_key] = last_line
764
768
  return ''.join(["\n\n", f_key,
765
- "\n\n", middle, "\n\n",
766
- l_key, "\n\n"])
769
+ "\n\n", middle, "\n\n",
770
+ l_key, "\n\n"])
767
771
  key = _hash_text(html)
768
772
  self.html_blocks[key] = html
769
773
  return "\n\n" + key + "\n\n"
@@ -797,7 +801,7 @@ class Markdown(object):
797
801
  # the inner nested divs must be indented.
798
802
  # We need to do this before the next, more liberal match, because the next
799
803
  # match will start at the first `<div>` and stop at the first `</div>`.
800
- text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
804
+ text = self._strict_tag_block_sub(text, self._block_tags_a, hash_html_block_sub)
801
805
 
802
806
  # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
803
807
  text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
@@ -840,7 +844,7 @@ class Markdown(object):
840
844
  pass
841
845
  elif start_idx == 1 and text[0] == '\n':
842
846
  start_idx = 0 # to match minute detail of Markdown.pl regex
843
- elif text[start_idx - 2:start_idx] == '\n\n':
847
+ elif text[start_idx-2:start_idx] == '\n\n':
844
848
  pass
845
849
  else:
846
850
  break
@@ -852,7 +856,7 @@ class Markdown(object):
852
856
  break
853
857
  end_idx += 1
854
858
  # - Must be following by 2 newlines or hit end of text.
855
- if text[end_idx:end_idx + 2] not in ('', '\n', '\n\n'):
859
+ if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'):
856
860
  continue
857
861
 
858
862
  # Escape and hash (must match `_hash_html_block_sub`).
@@ -876,6 +880,39 @@ class Markdown(object):
876
880
 
877
881
  return text
878
882
 
883
+ def _strict_tag_block_sub(self, text, html_tags_re, callback):
884
+ tag_count = 0
885
+ current_tag = html_tags_re
886
+ block = ''
887
+ result = ''
888
+
889
+ for chunk in text.splitlines(True):
890
+ is_markup = re.match(r'^(?:</code>(?=</pre>))?(</?(%s)\b>?)' % current_tag, chunk)
891
+ block += chunk
892
+
893
+ if is_markup:
894
+ if chunk.startswith('</'):
895
+ tag_count -= 1
896
+ else:
897
+ # if close tag is in same line
898
+ if '</%s>' % is_markup.group(2) in chunk[is_markup.end():]:
899
+ # we must ignore these
900
+ is_markup = None
901
+ else:
902
+ tag_count += 1
903
+ current_tag = is_markup.group(2)
904
+
905
+ if tag_count == 0:
906
+ if is_markup:
907
+ block = callback(block.rstrip('\n')) # remove trailing newline
908
+ current_tag = html_tags_re
909
+ result += block
910
+ block = ''
911
+
912
+ result += block
913
+
914
+ return result
915
+
879
916
  def _strip_link_definitions(self, text):
880
917
  # Strips link definitions from text, stores the URLs and titles in
881
918
  # hash references.
@@ -905,7 +942,7 @@ class Markdown(object):
905
942
 
906
943
  def _extract_link_def_sub(self, match):
907
944
  id, url, title = match.groups()
908
- key = id.lower() # Link IDs are case-insensitive
945
+ key = id.lower() # Link IDs are case-insensitive
909
946
  self.urls[key] = self._encode_amps_and_angles(url)
910
947
  if title:
911
948
  self.titles[key] = title
@@ -1008,7 +1045,7 @@ class Markdown(object):
1008
1045
  # Lookahead for non-space at line-start, or end of doc.
1009
1046
  (?:(?=^[ ]{0,%d}\S)|\Z)
1010
1047
  ''' % (less_than_tab, self.tab_width, self.tab_width),
1011
- re.X | re.M)
1048
+ re.X | re.M)
1012
1049
  return footnote_def_re.sub(self._extract_footnote_def_sub, text)
1013
1050
 
1014
1051
  _hr_re = re.compile(r'^[ ]{0,3}([-_*])[ ]{0,2}(\1[ ]{0,2}){2,}$', re.M)
@@ -1020,6 +1057,9 @@ class Markdown(object):
1020
1057
  if 'admonitions' in self.extras:
1021
1058
  text = self._do_admonitions(text)
1022
1059
 
1060
+ if 'wavedrom' in self.extras:
1061
+ text = self._do_wavedrom_blocks(text)
1062
+
1023
1063
  if "fenced-code-blocks" in self.extras:
1024
1064
  text = self._do_fenced_code_blocks(text)
1025
1065
 
@@ -1030,7 +1070,7 @@ class Markdown(object):
1030
1070
  # you wish, you may use spaces between the hyphens or asterisks."
1031
1071
  # Markdown.pl 1.0.1's hr regexes limit the number of spaces between the
1032
1072
  # hr chars to one or two. We'll reproduce that limit here.
1033
- hr = "\n<hr" + self.empty_element_suffix + "\n"
1073
+ hr = "\n<hr"+self.empty_element_suffix+"\n"
1034
1074
  text = re.sub(self._hr_re, hr, text)
1035
1075
 
1036
1076
  text = self._do_lists(text)
@@ -1064,7 +1104,7 @@ class Markdown(object):
1064
1104
  _dedentlines(lines)
1065
1105
  indent = ' ' * self.tab_width
1066
1106
  s = ('\n' # separate from possible cuddled paragraph
1067
- + indent + ('\n' + indent).join(lines)
1107
+ + indent + ('\n'+indent).join(lines)
1068
1108
  + '\n')
1069
1109
  return s
1070
1110
 
@@ -1093,8 +1133,7 @@ class Markdown(object):
1093
1133
  head, underline, body = match.groups()
1094
1134
 
1095
1135
  # Determine aligns for columns.
1096
- cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in
1097
- re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
1136
+ cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)))]
1098
1137
  align_from_col_idx = {}
1099
1138
  for col_idx, col in enumerate(cols):
1100
1139
  if col[0] == ':' and col[-1] == ':':
@@ -1105,9 +1144,8 @@ class Markdown(object):
1105
1144
  align_from_col_idx[col_idx] = ' style="text-align:right;"'
1106
1145
 
1107
1146
  # thead
1108
- hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead>', '<tr>']
1109
- cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in
1110
- re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
1147
+ hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead%s>' % self._html_class_str_from_tag('thead'), '<tr>']
1148
+ cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
1111
1149
  for col_idx, col in enumerate(cols):
1112
1150
  hlines.append(' <th%s>%s</th>' % (
1113
1151
  align_from_col_idx.get(col_idx, ''),
@@ -1120,8 +1158,7 @@ class Markdown(object):
1120
1158
  hlines.append('<tbody>')
1121
1159
  for line in body.strip('\n').split('\n'):
1122
1160
  hlines.append('<tr>')
1123
- cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in
1124
- re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
1161
+ cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
1125
1162
  for col_idx, col in enumerate(cols):
1126
1163
  hlines.append(' <td%s>%s</td>' % (
1127
1164
  align_from_col_idx.get(col_idx, ''),
@@ -1183,7 +1220,7 @@ class Markdown(object):
1183
1220
  add_hline('<table%s>' % self._html_class_str_from_tag('table'))
1184
1221
  # Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
1185
1222
  if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
1186
- add_hline('<thead>', 1)
1223
+ add_hline('<thead%s>' % self._html_class_str_from_tag('thead'), 1)
1187
1224
  add_hline('<tr>', 2)
1188
1225
  for cell in rows[0]:
1189
1226
  add_hline("<th>{}</th>".format(format_cell(cell)), 3)
@@ -1245,6 +1282,9 @@ class Markdown(object):
1245
1282
 
1246
1283
  text = self._do_italics_and_bold(text)
1247
1284
 
1285
+ if "tg-spoiler" in self.extras:
1286
+ text = self._do_tg_spoiler(text)
1287
+
1248
1288
  if "smarty-pants" in self.extras:
1249
1289
  text = self._do_smart_punctuation(text)
1250
1290
 
@@ -1259,18 +1299,21 @@ class Markdown(object):
1259
1299
  # "Sorta" because auto-links are identified as "tag" tokens.
1260
1300
  _sorta_html_tokenize_re = re.compile(r"""
1261
1301
  (
1262
- # tag
1263
- </?
1264
- (?:\w+) # tag name
1265
- (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
1266
- \s*/?>
1267
- |
1268
- # auto-link (e.g., <http://www.activestate.com/>)
1269
- <[\w~:/?#\[\]@!$&'\(\)*+,;%=\.\\-]+>
1270
- |
1271
- <!--.*?--> # comment
1272
- |
1273
- <\?.*?\?> # processing instruction
1302
+ \\* # escapes
1303
+ (?:
1304
+ # tag
1305
+ </?
1306
+ (?:\w+) # tag name
1307
+ (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
1308
+ \s*/?>
1309
+ |
1310
+ # auto-link (e.g., <http://www.activestate.com/>)
1311
+ <[\w~:/?#\[\]@!$&'\(\)*+,;%=\.\\-]+>
1312
+ |
1313
+ <!--.*?--> # comment
1314
+ |
1315
+ <\?.*?\?> # processing instruction
1316
+ )
1274
1317
  )
1275
1318
  """, re.X)
1276
1319
 
@@ -1281,20 +1324,27 @@ class Markdown(object):
1281
1324
  # it isn't susceptible to unmatched '<' and '>' in HTML tags).
1282
1325
  # Note, however, that '>' is not allowed in an auto-link URL
1283
1326
  # here.
1327
+ lead_escape_re = re.compile(r'^((?:\\\\)*(?!\\))')
1284
1328
  escaped = []
1285
1329
  is_html_markup = False
1286
1330
  for token in self._sorta_html_tokenize_re.split(text):
1287
- if is_html_markup:
1331
+ # check token is preceded by 0 or more PAIRS of escapes, because escape pairs
1332
+ # escape themselves and don't affect the token
1333
+ if is_html_markup and lead_escape_re.match(token):
1288
1334
  # Within tags/HTML-comments/auto-links, encode * and _
1289
1335
  # so they don't conflict with their use in Markdown for
1290
1336
  # italics and strong. We're replacing each such
1291
1337
  # character with its corresponding MD5 checksum value;
1292
1338
  # this is likely overkill, but it should prevent us from
1293
1339
  # colliding with the escape values by accident.
1294
- escaped.append(token.replace('*', self._escape_table['*'])
1295
- .replace('_', self._escape_table['_']))
1340
+ escape_seq, token = lead_escape_re.split(token)[1:] or ('', token)
1341
+ escaped.append(
1342
+ escape_seq.replace('\\\\', self._escape_table['\\'])
1343
+ + token.replace('*', self._escape_table['*'])
1344
+ .replace('_', self._escape_table['_'])
1345
+ )
1296
1346
  else:
1297
- escaped.append(self._encode_backslash_escapes(token))
1347
+ escaped.append(self._encode_backslash_escapes(token.replace('\\<', '&lt;')))
1298
1348
  is_html_markup = not is_html_markup
1299
1349
  return ''.join(escaped)
1300
1350
 
@@ -1404,13 +1454,13 @@ class Markdown(object):
1404
1454
  def _extract_url_and_title(self, text, start):
1405
1455
  """Extracts the url and (optional) title from the tail of a link"""
1406
1456
  # text[start] equals the opening parenthesis
1407
- idx = self._find_non_whitespace(text, start + 1)
1457
+ idx = self._find_non_whitespace(text, start+1)
1408
1458
  if idx == len(text):
1409
1459
  return None, None, None
1410
1460
  end_idx = idx
1411
1461
  has_anglebrackets = text[idx] == "<"
1412
1462
  if has_anglebrackets:
1413
- end_idx = self._find_balanced(text, end_idx + 1, "<", ">")
1463
+ end_idx = self._find_balanced(text, end_idx+1, "<", ">")
1414
1464
  end_idx = self._find_balanced(text, end_idx, "(", ")")
1415
1465
  match = self._inline_link_title.search(text, idx, end_idx)
1416
1466
  if not match:
@@ -1420,8 +1470,18 @@ class Markdown(object):
1420
1470
  url = self._strip_anglebrackets.sub(r'\1', url)
1421
1471
  return url, title, end_idx
1422
1472
 
1423
- _safe_protocols = re.compile(r'(https?|ftp):', re.I)
1473
+ def _protect_url(self, url):
1474
+ '''
1475
+ Function that passes a URL through `_html_escape_url` to remove any nasty characters,
1476
+ and then hashes the now "safe" URL to prevent other safety mechanisms from tampering
1477
+ with it (eg: escaping "&" in URL parameters)
1478
+ '''
1479
+ url = _html_escape_url(url, safe_mode=self.safe_mode)
1480
+ key = _hash_text(url)
1481
+ self._escape_table[url] = key
1482
+ return key
1424
1483
 
1484
+ _safe_protocols = re.compile(r'(https?|ftp):', re.I)
1425
1485
  def _do_links(self, text):
1426
1486
  """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
1427
1487
 
@@ -1467,8 +1527,8 @@ class Markdown(object):
1467
1527
  # matching brackets in img alt text -- we'll differ in that
1468
1528
  # regard.
1469
1529
  bracket_depth = 0
1470
- for p in range(start_idx + 1, min(start_idx + MAX_LINK_TEXT_SENTINEL,
1471
- text_length)):
1530
+ for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
1531
+ text_length)):
1472
1532
  ch = text[p]
1473
1533
  if ch == ']':
1474
1534
  bracket_depth -= 1
@@ -1481,7 +1541,7 @@ class Markdown(object):
1481
1541
  # This isn't markup.
1482
1542
  curr_pos = start_idx + 1
1483
1543
  continue
1484
- link_text = text[start_idx + 1:p]
1544
+ link_text = text[start_idx+1:p]
1485
1545
 
1486
1546
  # Fix for issue 341 - Injecting XSS into link text
1487
1547
  if self.safe_mode:
@@ -1496,10 +1556,10 @@ class Markdown(object):
1496
1556
  result = '<sup class="footnote-ref" id="fnref-%s">' \
1497
1557
  '<a href="#fn-%s">%s</a></sup>' \
1498
1558
  % (normed_id, normed_id, len(self.footnote_ids))
1499
- text = text[:start_idx] + result + text[p + 1:]
1559
+ text = text[:start_idx] + result + text[p+1:]
1500
1560
  else:
1501
1561
  # This id isn't defined, leave the markup alone.
1502
- curr_pos = p + 1
1562
+ curr_pos = p+1
1503
1563
  continue
1504
1564
 
1505
1565
  # Now determine what this is by the remainder.
@@ -1510,40 +1570,40 @@ class Markdown(object):
1510
1570
  url, title, url_end_idx = self._extract_url_and_title(text, p)
1511
1571
  if url is not None:
1512
1572
  # Handle an inline anchor or img.
1513
- is_img = start_idx > 0 and text[start_idx - 1] == "!"
1573
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
1514
1574
  if is_img:
1515
1575
  start_idx -= 1
1516
1576
 
1517
1577
  # We've got to encode these to avoid conflicting
1518
1578
  # with italics/bold.
1519
1579
  url = url.replace('*', self._escape_table['*']) \
1520
- .replace('_', self._escape_table['_'])
1580
+ .replace('_', self._escape_table['_'])
1521
1581
  if title:
1522
1582
  title_str = ' title="%s"' % (
1523
1583
  _xml_escape_attr(title)
1524
- .replace('*', self._escape_table['*'])
1525
- .replace('_', self._escape_table['_']))
1584
+ .replace('*', self._escape_table['*'])
1585
+ .replace('_', self._escape_table['_']))
1526
1586
  else:
1527
1587
  title_str = ''
1528
1588
  if is_img:
1529
1589
  img_class_str = self._html_class_str_from_tag("img")
1530
1590
  result = '<img src="%s" alt="%s"%s%s%s' \
1531
- % (_html_escape_url(url, safe_mode=self.safe_mode),
1532
- _xml_escape_attr(link_text),
1533
- title_str,
1534
- img_class_str,
1535
- self.empty_element_suffix)
1591
+ % (self._protect_url(url),
1592
+ _xml_escape_attr(link_text),
1593
+ title_str,
1594
+ img_class_str,
1595
+ self.empty_element_suffix)
1536
1596
  if "smarty-pants" in self.extras:
1537
1597
  result = result.replace('"', self._escape_table['"'])
1538
1598
  curr_pos = start_idx + len(result)
1599
+ anchor_allowed_pos = start_idx + len(result)
1539
1600
  text = text[:start_idx] + result + text[url_end_idx:]
1540
1601
  elif start_idx >= anchor_allowed_pos:
1541
1602
  safe_link = self._safe_protocols.match(url) or url.startswith('#')
1542
1603
  if self.safe_mode and not safe_link:
1543
1604
  result_head = '<a href="#"%s>' % (title_str)
1544
1605
  else:
1545
- result_head = '<a href="%s"%s>' % (
1546
- _html_escape_url(url, safe_mode=self.safe_mode), title_str)
1606
+ result_head = '<a href="%s"%s>' % (self._protect_url(url), title_str)
1547
1607
  result = '%s%s</a>' % (result_head, link_text)
1548
1608
  if "smarty-pants" in self.extras:
1549
1609
  result = result.replace('"', self._escape_table['"'])
@@ -1562,7 +1622,7 @@ class Markdown(object):
1562
1622
  match = self._tail_of_reference_link_re.match(text, p)
1563
1623
  if match:
1564
1624
  # Handle a reference-style anchor or img.
1565
- is_img = start_idx > 0 and text[start_idx - 1] == "!"
1625
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
1566
1626
  if is_img:
1567
1627
  start_idx -= 1
1568
1628
  link_id = match.group("id").lower()
@@ -1573,7 +1633,7 @@ class Markdown(object):
1573
1633
  # We've got to encode these to avoid conflicting
1574
1634
  # with italics/bold.
1575
1635
  url = url.replace('*', self._escape_table['*']) \
1576
- .replace('_', self._escape_table['_'])
1636
+ .replace('_', self._escape_table['_'])
1577
1637
  title = self.titles.get(link_id)
1578
1638
  if title:
1579
1639
  title = _xml_escape_attr(title) \
@@ -1585,11 +1645,11 @@ class Markdown(object):
1585
1645
  if is_img:
1586
1646
  img_class_str = self._html_class_str_from_tag("img")
1587
1647
  result = '<img src="%s" alt="%s"%s%s%s' \
1588
- % (_html_escape_url(url, safe_mode=self.safe_mode),
1589
- _xml_escape_attr(link_text),
1590
- title_str,
1591
- img_class_str,
1592
- self.empty_element_suffix)
1648
+ % (self._protect_url(url),
1649
+ _xml_escape_attr(link_text),
1650
+ title_str,
1651
+ img_class_str,
1652
+ self.empty_element_suffix)
1593
1653
  if "smarty-pants" in self.extras:
1594
1654
  result = result.replace('"', self._escape_table['"'])
1595
1655
  curr_pos = start_idx + len(result)
@@ -1598,8 +1658,7 @@ class Markdown(object):
1598
1658
  if self.safe_mode and not self._safe_protocols.match(url):
1599
1659
  result_head = '<a href="#"%s>' % (title_str)
1600
1660
  else:
1601
- result_head = '<a href="%s"%s>' % (
1602
- _html_escape_url(url, safe_mode=self.safe_mode), title_str)
1661
+ result_head = '<a href="%s"%s>' % (self._protect_url(url), title_str)
1603
1662
  result = '%s%s</a>' % (result_head, link_text)
1604
1663
  if "smarty-pants" in self.extras:
1605
1664
  result = result.replace('"', self._escape_table['"'])
@@ -1687,7 +1746,7 @@ class Markdown(object):
1687
1746
  header_id_attr = ""
1688
1747
  if "header-ids" in self.extras:
1689
1748
  header_id = self.header_id_from_text(header_group,
1690
- self.extras["header-ids"], n)
1749
+ self.extras["header-ids"], n)
1691
1750
  if header_id:
1692
1751
  header_id_attr = ' id="%s"' % header_id
1693
1752
  html = self._run_span_gamut(header_group)
@@ -1721,12 +1780,21 @@ class Markdown(object):
1721
1780
 
1722
1781
  def _list_sub(self, match):
1723
1782
  lst = match.group(1)
1724
- lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol"
1783
+ lst_type = match.group(4) in self._marker_ul_chars and "ul" or "ol"
1784
+
1785
+ if lst_type == 'ol' and match.group(4) != '1.':
1786
+ # if list doesn't start at 1 then set the ol start attribute
1787
+ lst_opts = ' start="%s"' % match.group(4)[:-1]
1788
+ else:
1789
+ lst_opts = ''
1790
+
1791
+ lst_opts = lst_opts + self._html_class_str_from_tag(lst_type)
1792
+
1725
1793
  result = self._process_list_items(lst)
1726
1794
  if self.list_level:
1727
- return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type)
1795
+ return "<%s%s>\n%s</%s>\n" % (lst_type, lst_opts, result, lst_type)
1728
1796
  else:
1729
- return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type)
1797
+ return "<%s%s>\n%s</%s>\n\n" % (lst_type, lst_opts, result, lst_type)
1730
1798
 
1731
1799
  def _do_lists(self, text):
1732
1800
  # Form HTML ordered (numbered) and unordered (bulleted) lists.
@@ -1740,16 +1808,17 @@ class Markdown(object):
1740
1808
  hits = []
1741
1809
  for marker_pat in (self._marker_ul, self._marker_ol):
1742
1810
  less_than_tab = self.tab_width - 1
1811
+ other_marker_pat = self._marker_ul if marker_pat == self._marker_ol else self._marker_ol
1743
1812
  whole_list = r'''
1744
1813
  ( # \1 = whole list
1745
1814
  ( # \2
1746
- [ ]{0,%d}
1747
- (%s) # \3 = first list item marker
1815
+ ([ ]{0,%d}) # \3 = the indentation level of the list item marker
1816
+ (%s) # \4 = first list item marker
1748
1817
  [ \t]+
1749
- (?!\ *\3\ ) # '- - - ...' isn't a list. See 'not_quite_a_list' test case.
1818
+ (?!\ *\4\ ) # '- - - ...' isn't a list. See 'not_quite_a_list' test case.
1750
1819
  )
1751
1820
  (?:.+?)
1752
- ( # \4
1821
+ ( # \5
1753
1822
  \Z
1754
1823
  |
1755
1824
  \n{2,}
@@ -1758,13 +1827,19 @@ class Markdown(object):
1758
1827
  [ \t]*
1759
1828
  %s[ \t]+
1760
1829
  )
1830
+ |
1831
+ \n+
1832
+ (?=
1833
+ \3 # lookahead for a different style of list item marker
1834
+ %s[ \t]+
1835
+ )
1761
1836
  )
1762
1837
  )
1763
- ''' % (less_than_tab, marker_pat, marker_pat)
1838
+ ''' % (less_than_tab, marker_pat, marker_pat, other_marker_pat)
1764
1839
  if self.list_level: # sub-list
1765
- list_re = re.compile("^" + whole_list, re.X | re.M | re.S)
1840
+ list_re = re.compile("^"+whole_list, re.X | re.M | re.S)
1766
1841
  else:
1767
- list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)" + whole_list,
1842
+ list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list,
1768
1843
  re.X | re.M | re.S)
1769
1844
  match = list_re.search(text, pos)
1770
1845
  if match:
@@ -1788,7 +1863,7 @@ class Markdown(object):
1788
1863
  (\n{1,2})) # eols = \5
1789
1864
  (?= \n* (\Z | \2 (?P<next_marker>%s) [ \t]+))
1790
1865
  ''' % (_marker_any, _marker_any),
1791
- re.M | re.X | re.S)
1866
+ re.M | re.X | re.S)
1792
1867
 
1793
1868
  _task_list_item_re = re.compile(r'''
1794
1869
  (\[[\ xX]\])[ \t]+ # tasklist marker = \1
@@ -1800,13 +1875,12 @@ class Markdown(object):
1800
1875
  def _task_list_item_sub(self, match):
1801
1876
  marker = match.group(1)
1802
1877
  item_text = match.group(2)
1803
- if marker in ['[x]', '[X]']:
1804
- return self._task_list_warpper_str % ('checked ', item_text)
1878
+ if marker in ['[x]','[X]']:
1879
+ return self._task_list_warpper_str % ('checked ', item_text)
1805
1880
  elif marker == '[ ]':
1806
- return self._task_list_warpper_str % ('', item_text)
1881
+ return self._task_list_warpper_str % ('', item_text)
1807
1882
 
1808
1883
  _last_li_endswith_two_eols = False
1809
-
1810
1884
  def _list_item_sub(self, match):
1811
1885
  item = match.group(4)
1812
1886
  leading_line = match.group(1)
@@ -1910,19 +1984,13 @@ class Markdown(object):
1910
1984
  codeblock = self._outdent(codeblock)
1911
1985
  codeblock = self._detab(codeblock)
1912
1986
  codeblock = codeblock.lstrip('\n') # trim leading newlines
1913
- codeblock = codeblock.rstrip() # trim trailing whitespace
1914
-
1915
- # Note: "code-color" extra is DEPRECATED.
1916
- if "code-color" in self.extras and codeblock.startswith(":::"):
1917
- lexer_name, rest = codeblock.split('\n', 1)
1918
- lexer_name = lexer_name[3:].strip()
1919
- codeblock = rest.lstrip("\n") # Remove lexer declaration line.
1987
+ codeblock = codeblock.rstrip() # trim trailing whitespace
1920
1988
 
1921
1989
  # Use pygments only if not using the highlightjs-lang extra
1922
1990
  if lexer_name and "highlightjs-lang" not in self.extras:
1923
1991
  lexer = self._get_pygments_lexer(lexer_name)
1924
1992
  if lexer:
1925
- leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
1993
+ leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
1926
1994
  return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer, is_fenced_code_block)
1927
1995
 
1928
1996
  pre_class_str = self._html_class_str_from_tag("pre")
@@ -1935,10 +2003,16 @@ class Markdown(object):
1935
2003
  if is_fenced_code_block:
1936
2004
  # Fenced code blocks need to be outdented before encoding, and then reapplied
1937
2005
  leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
1938
- leading_indent, codeblock = self._uniform_outdent_limit(codeblock, leading_indent)
2006
+ if codeblock:
2007
+ # only run the codeblock through the outdenter if not empty
2008
+ leading_indent, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
1939
2009
 
1940
2010
  codeblock = self._encode_code(codeblock)
1941
2011
 
2012
+ if lexer_name == 'mermaid' and 'mermaid' in self.extras:
2013
+ return '\n%s<pre class="mermaid-pre"><div class="mermaid">%s\n</div></pre>\n' % (
2014
+ leading_indent, codeblock)
2015
+
1942
2016
  return "\n%s<pre%s><code%s>%s\n</code></pre>\n" % (
1943
2017
  leading_indent, pre_class_str, code_class_str, codeblock)
1944
2018
  else:
@@ -1951,7 +2025,7 @@ class Markdown(object):
1951
2025
  if is_fenced_code_block:
1952
2026
  formatter_opts = self.extras['fenced-code-blocks'] or {}
1953
2027
  else:
1954
- formatter_opts = self.extras['code-color'] or {}
2028
+ formatter_opts = {}
1955
2029
 
1956
2030
  def unhash_code(codeblock):
1957
2031
  for key, sanitized in list(self.html_spans.items()):
@@ -1964,9 +2038,8 @@ class Markdown(object):
1964
2038
  for old, new in replacements:
1965
2039
  codeblock = codeblock.replace(old, new)
1966
2040
  return codeblock
1967
-
1968
2041
  # remove leading indent from code block
1969
- leading_indent, codeblock = self._uniform_outdent(codeblock)
2042
+ _, codeblock = self._uniform_outdent(codeblock, max_outdent=leading_indent)
1970
2043
 
1971
2044
  codeblock = unhash_code(codeblock)
1972
2045
  colored = self._color_with_pygments(codeblock, lexer,
@@ -2006,7 +2079,7 @@ class Markdown(object):
2006
2079
  # Needed when syntax highlighting is being used.
2007
2080
  (?!([^<]|<(/?)span)*\</code\>)
2008
2081
  ''' % (self.tab_width, self.tab_width),
2009
- re.M | re.X)
2082
+ re.M | re.X)
2010
2083
  return code_block_re.sub(self._code_block_sub, text)
2011
2084
 
2012
2085
  _fenced_code_block_re = re.compile(r'''
@@ -2090,6 +2163,42 @@ class Markdown(object):
2090
2163
  self._code_table[text] = hashed
2091
2164
  return hashed
2092
2165
 
2166
+ def _wavedrom_block_sub(self, match):
2167
+ # if this isn't a wavedrom diagram block, exit now
2168
+ if match.group(2) != 'wavedrom':
2169
+ return match.string[match.start():match.end()]
2170
+
2171
+ # dedent the block for processing
2172
+ lead_indent, waves = self._uniform_outdent(match.group(3))
2173
+ # default tags to wrap the wavedrom block in
2174
+ open_tag, close_tag = '<script type="WaveDrom">\n', '</script>'
2175
+
2176
+ # check if the user would prefer to have the SVG embedded directly
2177
+ if not isinstance(self.extras['wavedrom'], dict):
2178
+ embed_svg = True
2179
+ else:
2180
+ # default behaviour is to embed SVGs
2181
+ embed_svg = self.extras['wavedrom'].get('prefer_embed_svg', True)
2182
+
2183
+ if embed_svg:
2184
+ try:
2185
+ import wavedrom
2186
+ waves = wavedrom.render(waves).tostring()
2187
+ open_tag, close_tag = '<div>', '\n</div>'
2188
+ except ImportError:
2189
+ pass
2190
+
2191
+ # hash SVG to prevent <> chars being messed with
2192
+ self._escape_table[waves] = _hash_text(waves)
2193
+
2194
+ return self._uniform_indent(
2195
+ '\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag),
2196
+ lead_indent, include_empty_lines=True
2197
+ )
2198
+
2199
+ def _do_wavedrom_blocks(self, text):
2200
+ return self._fenced_code_block_re.sub(self._wavedrom_block_sub, text)
2201
+
2093
2202
  _admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
2094
2203
  _admonitions_re = re.compile(r'''
2095
2204
  ^(\ *)\.\.\ (%s)::\ * # $1 leading indent, $2 the admonition
@@ -2097,8 +2206,8 @@ class Markdown(object):
2097
2206
  ((?:\s*\n\1\ {3,}.*)+?) # $4 admonition body (required)
2098
2207
  (?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S)) # until EOF, 3 blank lines or something less indented
2099
2208
  ''' % _admonitions,
2100
- re.IGNORECASE | re.MULTILINE | re.VERBOSE
2101
- )
2209
+ re.IGNORECASE | re.MULTILINE | re.VERBOSE
2210
+ )
2102
2211
 
2103
2212
  def _do_admonitions_sub(self, match):
2104
2213
  lead_indent, admonition_name, title, body = match.groups()
@@ -2129,22 +2238,24 @@ class Markdown(object):
2129
2238
  return self._admonitions_re.sub(self._do_admonitions_sub, text)
2130
2239
 
2131
2240
  _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
2132
-
2133
2241
  def _do_strike(self, text):
2134
2242
  text = self._strike_re.sub(r"<s>\1</s>", text)
2135
2243
  return text
2136
2244
 
2137
2245
  _underline_re = re.compile(r"(?<!<!)--(?!>)(?=\S)(.+?)(?<=\S)(?<!<!)--(?!>)", re.S)
2138
-
2139
2246
  def _do_underline(self, text):
2140
2247
  text = self._underline_re.sub(r"<u>\1</u>", text)
2141
2248
  return text
2142
2249
 
2250
+ _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
2251
+ def _do_tg_spoiler(self, text):
2252
+ text = self._tg_spoiler_re.sub(r"<tg-spoiler>\1</tg-spoiler>", text)
2253
+ return text
2254
+
2143
2255
  _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
2144
2256
  _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
2145
2257
  _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
2146
2258
  _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
2147
-
2148
2259
  def _do_italics_and_bold(self, text):
2149
2260
  # <strong> must go first:
2150
2261
  if "code-friendly" in self.extras:
@@ -2161,14 +2272,13 @@ class Markdown(object):
2161
2272
  # using scare quotes (single quotation marks) is rare.
2162
2273
  _apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
2163
2274
  _contractions = ["tis", "twas", "twer", "neath", "o", "n",
2164
- "round", "bout", "twixt", "nuff", "fraid", "sup"]
2165
-
2275
+ "round", "bout", "twixt", "nuff", "fraid", "sup"]
2166
2276
  def _do_smart_contractions(self, text):
2167
2277
  text = self._apostrophe_year_re.sub(r"&#8217;\1", text)
2168
2278
  for c in self._contractions:
2169
2279
  text = text.replace("'%s" % c, "&#8217;%s" % c)
2170
2280
  text = text.replace("'%s" % c.capitalize(),
2171
- "&#8217;%s" % c.capitalize())
2281
+ "&#8217;%s" % c.capitalize())
2172
2282
  return text
2173
2283
 
2174
2284
  # Substitute double-quotes before single-quotes.
@@ -2176,7 +2286,6 @@ class Markdown(object):
2176
2286
  _opening_double_quote_re = re.compile(r'(?<!\S)"(?=\S)')
2177
2287
  _closing_single_quote_re = re.compile(r"(?<=\S)'")
2178
2288
  _closing_double_quote_re = re.compile(r'(?<=\S)"(?=(\s|,|;|\.|\?|!|$))')
2179
-
2180
2289
  def _do_smart_punctuation(self, text):
2181
2290
  """Fancifies 'single quotes', "double quotes", and apostrophes.
2182
2291
  Converts --, ---, and ... into en dashes, em dashes, and ellipses.
@@ -2225,7 +2334,6 @@ class Markdown(object):
2225
2334
  _bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
2226
2335
  _bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
2227
2336
  _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
2228
-
2229
2337
  def _dedent_two_spaces_sub(self, match):
2230
2338
  return re.sub(r'(?m)^ ', '', match.group(1))
2231
2339
 
@@ -2239,7 +2347,7 @@ class Markdown(object):
2239
2347
  bq = self._bq_one_level_re.sub('', bq)
2240
2348
  # trim whitespace-only lines
2241
2349
  bq = self._ws_only_line_re.sub('', bq)
2242
- bq = self._run_block_gamut(bq) # recurse
2350
+ bq = self._run_block_gamut(bq) # recurse
2243
2351
 
2244
2352
  bq = re.sub('(?m)^', ' ', bq)
2245
2353
  # These leading spaces screw with <pre> content, so we need to fix that:
@@ -2280,15 +2388,15 @@ class Markdown(object):
2280
2388
  # consider numeric bullets (e.g. "1." and "2.") to be
2281
2389
  # equal.
2282
2390
  if (li and len(li.group(2)) <= 3
2283
- and (
2284
- (li.group("next_marker") and li.group("marker")[-1] == li.group("next_marker")[-1])
2285
- or
2286
- li.group("next_marker") is None
2287
- )
2391
+ and (
2392
+ (li.group("next_marker") and li.group("marker")[-1] == li.group("next_marker")[-1])
2393
+ or
2394
+ li.group("next_marker") is None
2395
+ )
2288
2396
  ):
2289
2397
  start = li.start()
2290
2398
  cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
2291
- assert cuddled_list.startswith("<ul>") or cuddled_list.startswith("<ol>")
2399
+ assert re.match(r'^<(?:ul|ol).*?>', cuddled_list)
2292
2400
  graf = graf[:start]
2293
2401
 
2294
2402
  # Wrap <p> tags.
@@ -2320,21 +2428,21 @@ class Markdown(object):
2320
2428
  footer.append(self._run_block_gamut(self.footnotes[id]))
2321
2429
  try:
2322
2430
  backlink = ('<a href="#fnref-%s" ' +
2323
- 'class="footnoteBackLink" ' +
2324
- 'title="' + self.footnote_title + '">' +
2325
- self.footnote_return_symbol +
2326
- '</a>') % (id, i + 1)
2431
+ 'class="footnoteBackLink" ' +
2432
+ 'title="' + self.footnote_title + '">' +
2433
+ self.footnote_return_symbol +
2434
+ '</a>') % (id, i+1)
2327
2435
  except TypeError:
2328
2436
  log.debug("Footnote error. `footnote_title` "
2329
2437
  "must include parameter. Using defaults.")
2330
2438
  backlink = ('<a href="#fnref-%s" '
2331
- 'class="footnoteBackLink" '
2332
- 'title="Jump back to footnote %d in the text.">'
2333
- '&#8617;</a>' % (id, i + 1))
2439
+ 'class="footnoteBackLink" '
2440
+ 'title="Jump back to footnote %d in the text.">'
2441
+ '&#8617;</a>' % (id, i+1))
2334
2442
 
2335
2443
  if footer[-1].endswith("</p>"):
2336
2444
  footer[-1] = footer[-1][:-len("</p>")] \
2337
- + '&#160;' + backlink + "</p>"
2445
+ + '&#160;' + backlink + "</p>"
2338
2446
  else:
2339
2447
  footer.append("\n<p>%s</p>" % backlink)
2340
2448
  footer.append('</li>')
@@ -2370,18 +2478,20 @@ class Markdown(object):
2370
2478
  if text.endswith(">"):
2371
2479
  return text # this is not an incomplete tag, this is a link in the form <http://x.y.z>
2372
2480
 
2373
- return self._incomplete_tags_re.sub("&lt;\\1", text)
2481
+ def incomplete_tags_sub(match):
2482
+ return match.group().replace('<', '&lt;')
2483
+
2484
+ return self._incomplete_tags_re.sub(incomplete_tags_sub, text)
2374
2485
 
2375
2486
  def _encode_backslash_escapes(self, text):
2376
2487
  for ch, escape in list(self._escape_table.items()):
2377
- text = text.replace("\\" + ch, escape)
2488
+ text = text.replace("\\"+ch, escape)
2378
2489
  return text
2379
2490
 
2380
2491
  _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
2381
-
2382
2492
  def _auto_link_sub(self, match):
2383
2493
  g1 = match.group(1)
2384
- return '<a href="%s">%s</a>' % (g1, g1)
2494
+ return '<a href="%s">%s</a>' % (self._protect_url(g1), g1)
2385
2495
 
2386
2496
  _auto_email_link_re = re.compile(r"""
2387
2497
  <
@@ -2393,7 +2503,6 @@ class Markdown(object):
2393
2503
  )
2394
2504
  >
2395
2505
  """, re.I | re.X | re.U)
2396
-
2397
2506
  def _auto_email_link_sub(self, match):
2398
2507
  return self._encode_email_address(
2399
2508
  self._unescape_special_chars(match.group(1)))
@@ -2424,12 +2533,14 @@ class Markdown(object):
2424
2533
  return addr
2425
2534
 
2426
2535
  _basic_link_re = re.compile(r'!?\[.*?\]\(.*?\)')
2427
-
2428
2536
  def _do_link_patterns(self, text):
2429
2537
  link_from_hash = {}
2430
2538
  for regex, repl in self.link_patterns:
2431
2539
  replacements = []
2432
2540
  for match in regex.finditer(text):
2541
+ if any(self._match_overlaps_substr(text, match, h) for h in link_from_hash):
2542
+ continue
2543
+
2433
2544
  if hasattr(repl, "__call__"):
2434
2545
  href = repl(match)
2435
2546
  else:
@@ -2468,9 +2579,9 @@ class Markdown(object):
2468
2579
 
2469
2580
  escaped_href = (
2470
2581
  href.replace('"', '&quot;') # b/c of attr quote
2471
- # To avoid markdown <em> and <strong>:
2472
- .replace('*', self._escape_table['*'])
2473
- .replace('_', self._escape_table['_']))
2582
+ # To avoid markdown <em> and <strong>:
2583
+ .replace('*', self._escape_table['*'])
2584
+ .replace('_', self._escape_table['_']))
2474
2585
  link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
2475
2586
  hash = _hash_text(link)
2476
2587
  link_from_hash[hash] = link
@@ -2481,56 +2592,57 @@ class Markdown(object):
2481
2592
 
2482
2593
  def _unescape_special_chars(self, text):
2483
2594
  # Swap back in all the special characters we've hidden.
2484
- for ch, hash in list(self._escape_table.items()) + list(self._code_table.items()):
2485
- text = text.replace(hash, ch)
2595
+ while True:
2596
+ orig_text = text
2597
+ for ch, hash in list(self._escape_table.items()) + list(self._code_table.items()):
2598
+ text = text.replace(hash, ch)
2599
+ if text == orig_text:
2600
+ break
2486
2601
  return text
2487
2602
 
2488
2603
  def _outdent(self, text):
2489
2604
  # Remove one level of line-leading tabs or spaces
2490
2605
  return self._outdent_re.sub('', text)
2491
2606
 
2492
- def _uniform_outdent(self, text, min_outdent=None):
2493
- # Removes the smallest common leading indentation from each line
2494
- # of `text` and returns said indent along with the outdented text.
2495
- # The `min_outdent` kwarg only outdents lines that start with at
2496
- # least this level of indentation or more.
2497
-
2498
- # Find leading indentation of each line
2499
- ws = re.findall(r'(^[ \t]*)(?:[^ \t\n])', text, re.MULTILINE)
2500
- # Sort the indents within bounds
2501
- if min_outdent:
2502
- # dont use "is not None" here so we avoid iterating over ws
2503
- # if min_outdent == '', which would do nothing
2504
- ws = [i for i in ws if len(min_outdent) <= len(i)]
2505
- if not ws:
2607
+ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):
2608
+ # Removes the smallest common leading indentation from each (non empty)
2609
+ # line of `text` and returns said indent along with the outdented text.
2610
+ # The `min_outdent` kwarg makes sure the smallest common whitespace
2611
+ # must be at least this size
2612
+ # The `max_outdent` sets the maximum amount a line can be
2613
+ # outdented by
2614
+
2615
+ # find the leading whitespace for every line
2616
+ whitespace = [
2617
+ re.findall(r'^[ \t]*', line)[0] if line else None
2618
+ for line in text.splitlines()
2619
+ ]
2620
+ whitespace_not_empty = [i for i in whitespace if i is not None]
2621
+
2622
+ # if no whitespace detected (ie: no lines in code block, issue #505)
2623
+ if not whitespace_not_empty:
2506
2624
  return '', text
2507
- # Get smallest common leading indent
2508
- ws = sorted(ws)[0]
2509
- # Dedent every line by smallest common indent
2510
- return ws, ''.join(
2511
- (line.replace(ws, '', 1) if line.startswith(ws) else line)
2512
- for line in text.splitlines(True)
2513
- )
2514
2625
 
2515
- def _uniform_outdent_limit(self, text, outdent):
2516
- # Outdents up to `outdent`. Similar to `_uniform_outdent`, but
2517
- # will leave some indentation on the line with the smallest common
2518
- # leading indentation depending on the amount specified.
2519
- # If the smallest leading indentation is less than `outdent`, it will
2520
- # perform identical to `_uniform_outdent`
2521
-
2522
- # Find leading indentation of each line
2523
- ws = re.findall(r'(^[ \t]*)(?:[^ \t\n])', text, re.MULTILINE)
2524
- if not ws:
2525
- return outdent, text
2526
- # Get smallest common leading indent
2527
- ws = sorted(ws)[0]
2528
- if len(outdent) > len(ws):
2529
- outdent = ws
2530
- return outdent, ''.join(
2531
- (line.replace(outdent, '', 1) if line.startswith(outdent) else line)
2532
- for line in text.splitlines(True)
2533
- )
2626
+ # get minimum common whitespace
2627
+ outdent = min(whitespace_not_empty)
2628
+ # adjust min common ws to be within bounds
2629
+ if min_outdent is not None:
2630
+ outdent = min([i for i in whitespace_not_empty if i >= min_outdent] or [min_outdent])
2631
+ if max_outdent is not None:
2632
+ outdent = min(outdent, max_outdent)
2633
+
2634
+ outdented = []
2635
+ for line_ws, line in zip(whitespace, text.splitlines(True)):
2636
+ if line.startswith(outdent):
2637
+ # if line starts with smallest common ws, dedent it
2638
+ outdented.append(line.replace(outdent, '', 1))
2639
+ elif line_ws is not None and line_ws < outdent:
2640
+ # if less indented than min common whitespace then outdent as much as possible
2641
+ outdented.append(line.replace(line_ws, '', 1))
2642
+ else:
2643
+ outdented.append(line)
2644
+
2645
+ return outdent, ''.join(outdented)
2534
2646
 
2535
2647
  def _uniform_indent(self, text, indent, include_empty_lines=False):
2536
2648
  return ''.join(
@@ -2538,12 +2650,25 @@ class Markdown(object):
2538
2650
  for line in text.splitlines(True)
2539
2651
  )
2540
2652
 
2653
+ @staticmethod
2654
+ def _match_overlaps_substr(text, match, substr):
2655
+ '''
2656
+ Checks if a regex match overlaps with a substring in the given text.
2657
+ '''
2658
+ for instance in re.finditer(re.escape(substr), text):
2659
+ start, end = instance.span()
2660
+ if start <= match.start() <= end:
2661
+ return True
2662
+ if start <= match.end() <= end:
2663
+ return True
2664
+ return False
2665
+
2541
2666
 
2542
2667
  class MarkdownWithExtras(Markdown):
2543
2668
  """A markdowner class that enables most extras:
2544
2669
 
2545
2670
  - footnotes
2546
- - code-color (only has effect if 'pygments' Python module on path)
2671
+ - fenced-code-blocks (only highlights code if 'pygments' Python module on path)
2547
2672
 
2548
2673
  These are not included:
2549
2674
  - pyshell (specific to Python-related documenting)
@@ -2551,7 +2676,7 @@ class MarkdownWithExtras(Markdown):
2551
2676
  - link-patterns (because you need to specify some actual
2552
2677
  link-patterns anyway)
2553
2678
  """
2554
- extras = ["footnotes", "code-color"]
2679
+ extras = ["footnotes", "fenced-code-blocks"]
2555
2680
 
2556
2681
 
2557
2682
  # ---- internal support functions
@@ -2567,9 +2692,8 @@ def calculate_toc_html(toc):
2567
2692
 
2568
2693
  def indent():
2569
2694
  return ' ' * (len(h_stack) - 1)
2570
-
2571
2695
  lines = []
2572
- h_stack = [0] # stack of header-level numbers
2696
+ h_stack = [0] # stack of header-level numbers
2573
2697
  for level, id, name in toc:
2574
2698
  if level > h_stack[-1]:
2575
2699
  lines.append("%s<ul>" % indent())
@@ -2600,12 +2724,9 @@ class UnicodeWithAttrs(str):
2600
2724
  metadata = None
2601
2725
  toc_html = None
2602
2726
 
2603
-
2604
2727
  ## {{{ http://code.activestate.com/recipes/577257/ (r1)
2605
2728
  _slugify_strip_re = re.compile(r'[^\w\s-]')
2606
2729
  _slugify_hyphenate_re = re.compile(r'[-\s]+')
2607
-
2608
-
2609
2730
  def _slugify(value):
2610
2731
  """
2611
2732
  Normalizes string, converts to lowercase, removes non-alpha characters,
@@ -2617,20 +2738,16 @@ def _slugify(value):
2617
2738
  value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode()
2618
2739
  value = _slugify_strip_re.sub('', value).strip().lower()
2619
2740
  return _slugify_hyphenate_re.sub('-', value)
2620
-
2621
-
2622
2741
  ## end of http://code.activestate.com/recipes/577257/ }}}
2623
2742
 
2624
2743
 
2625
2744
  # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
2626
2745
  def _curry(*args, **kwargs):
2627
2746
  function, args = args[0], args[1:]
2628
-
2629
2747
  def result(*rest, **kwrest):
2630
2748
  combined = kwargs.copy()
2631
2749
  combined.update(kwrest)
2632
2750
  return function(*args + rest, **combined)
2633
-
2634
2751
  return result
2635
2752
 
2636
2753
 
@@ -2643,7 +2760,7 @@ def _regex_from_encoded_pattern(s):
2643
2760
  if s.startswith('/') and s.rfind('/') != 0:
2644
2761
  # Parse it: /PATTERN/FLAGS
2645
2762
  idx = s.rfind('/')
2646
- _, flags_str = s[1:idx], s[idx + 1:]
2763
+ _, flags_str = s[1:idx], s[idx+1:]
2647
2764
  flag_from_char = {
2648
2765
  "i": re.IGNORECASE,
2649
2766
  "l": re.LOCALE,
@@ -2679,7 +2796,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
2679
2796
  """
2680
2797
  DEBUG = False
2681
2798
  if DEBUG:
2682
- print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)" \
2799
+ print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
2683
2800
  % (tabsize, skip_first_line))
2684
2801
  margin = None
2685
2802
  for i, line in enumerate(lines):
@@ -2721,13 +2838,13 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
2721
2838
  "line %r while removing %d-space margin"
2722
2839
  % (ch, line, margin))
2723
2840
  if DEBUG:
2724
- print("dedent: %r: %r -> removed %d/%d" \
2841
+ print("dedent: %r: %r -> removed %d/%d"\
2725
2842
  % (line, ch, removed, margin))
2726
2843
  if removed == margin:
2727
- lines[i] = lines[i][j + 1:]
2844
+ lines[i] = lines[i][j+1:]
2728
2845
  break
2729
2846
  elif removed > margin:
2730
- lines[i] = ' ' * (removed - margin) + lines[i][j + 1:]
2847
+ lines[i] = ' '*(removed-margin) + lines[i][j+1:]
2731
2848
  break
2732
2849
  else:
2733
2850
  if removed:
@@ -2758,7 +2875,6 @@ class _memoized(object):
2758
2875
 
2759
2876
  http://wiki.python.org/moin/PythonDecoratorLibrary
2760
2877
  """
2761
-
2762
2878
  def __init__(self, func):
2763
2879
  self.func = func
2764
2880
  self.cache = {}
@@ -2798,8 +2914,6 @@ def _xml_oneliner_re_from_tab_width(tab_width):
2798
2914
  (?=\n{2,}|\Z) # followed by a blank line or end of document
2799
2915
  )
2800
2916
  """ % (tab_width - 1), re.X)
2801
-
2802
-
2803
2917
  _xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
2804
2918
 
2805
2919
 
@@ -2820,8 +2934,6 @@ def _hr_tag_re_from_tab_width(tab_width):
2820
2934
  (?=\n{2,}|\Z) # followed by a blank line or end of document
2821
2935
  )
2822
2936
  """ % (tab_width - 1), re.X)
2823
-
2824
-
2825
2937
  _hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
2826
2938
 
2827
2939
 
@@ -2834,9 +2946,9 @@ def _xml_escape_attr(attr, skip_single_quote=True):
2834
2946
  escaped = _AMPERSAND_RE.sub('&amp;', attr)
2835
2947
 
2836
2948
  escaped = (attr
2837
- .replace('"', '&quot;')
2838
- .replace('<', '&lt;')
2839
- .replace('>', '&gt;'))
2949
+ .replace('"', '&quot;')
2950
+ .replace('<', '&lt;')
2951
+ .replace('>', '&gt;'))
2840
2952
  if not skip_single_quote:
2841
2953
  escaped = escaped.replace("'", "&#39;")
2842
2954
  return escaped
@@ -2859,9 +2971,9 @@ def _xml_encode_email_char_at_random(ch):
2859
2971
  def _html_escape_url(attr, safe_mode=False):
2860
2972
  """Replace special characters that are potentially malicious in url string."""
2861
2973
  escaped = (attr
2862
- .replace('"', '&quot;')
2863
- .replace('<', '&lt;')
2864
- .replace('>', '&gt;'))
2974
+ .replace('"', '&quot;')
2975
+ .replace('<', '&lt;')
2976
+ .replace('>', '&gt;'))
2865
2977
  if safe_mode:
2866
2978
  escaped = escaped.replace('+', ' ')
2867
2979
  escaped = escaped.replace("'", "&#39;")
@@ -2870,9 +2982,8 @@ def _html_escape_url(attr, safe_mode=False):
2870
2982
 
2871
2983
  # ---- mainline
2872
2984
 
2873
- class _NoReflowFormatter(optparse.IndentedHelpFormatter):
2874
- """An optparse formatter that does NOT reflow the description."""
2875
-
2985
+ class _NoReflowFormatter(argparse.RawDescriptionHelpFormatter):
2986
+ """An argparse formatter that does NOT reflow the description."""
2876
2987
  def format_description(self, description):
2877
2988
  return description or ""
2878
2989
 
@@ -2888,38 +2999,45 @@ def main(argv=None):
2888
2999
  if not logging.root.handlers:
2889
3000
  logging.basicConfig()
2890
3001
 
2891
- usage = "usage: %prog [PATHS...]"
2892
- version = "%prog " + __version__
2893
- parser = optparse.OptionParser(prog="markdown2", usage=usage,
2894
- version=version, description=cmdln_desc,
2895
- formatter=_NoReflowFormatter())
2896
- parser.add_option("-v", "--verbose", dest="log_level",
3002
+ parser = argparse.ArgumentParser(
3003
+ prog="markdown2", description=cmdln_desc, usage='%(prog)s [PATHS...]',
3004
+ formatter_class=_NoReflowFormatter
3005
+ )
3006
+ parser.add_argument('--version', action='version',
3007
+ version='%(prog)s {version}'.format(version=__version__))
3008
+ parser.add_argument('paths', nargs='*',
3009
+ help=(
3010
+ 'optional list of files to convert.'
3011
+ 'If none are given, stdin will be used'
3012
+ ))
3013
+ parser.add_argument("-v", "--verbose", dest="log_level",
2897
3014
  action="store_const", const=logging.DEBUG,
2898
3015
  help="more verbose output")
2899
- parser.add_option("--encoding",
3016
+ parser.add_argument("--encoding",
2900
3017
  help="specify encoding of text content")
2901
- parser.add_option("--html4tags", action="store_true", default=False,
3018
+ parser.add_argument("--html4tags", action="store_true", default=False,
2902
3019
  help="use HTML 4 style for empty element tags")
2903
- parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode",
3020
+ parser.add_argument("-s", "--safe", metavar="MODE", dest="safe_mode",
2904
3021
  help="sanitize literal HTML: 'escape' escapes "
2905
3022
  "HTML meta chars, 'replace' replaces with an "
2906
3023
  "[HTML_REMOVED] note")
2907
- parser.add_option("-x", "--extras", action="append",
3024
+ parser.add_argument("-x", "--extras", action="append",
2908
3025
  help="Turn on specific extra features (not part of "
2909
3026
  "the core Markdown spec). See above.")
2910
- parser.add_option("--use-file-vars",
3027
+ parser.add_argument("--use-file-vars",
2911
3028
  help="Look for and use Emacs-style 'markdown-extras' "
2912
3029
  "file var to turn on extras. See "
2913
3030
  "<https://github.com/trentm/python-markdown2/wiki/Extras>")
2914
- parser.add_option("--link-patterns-file",
3031
+ parser.add_argument("--link-patterns-file",
2915
3032
  help="path to a link pattern file")
2916
- parser.add_option("--self-test", action="store_true",
3033
+ parser.add_argument("--self-test", action="store_true",
2917
3034
  help="run internal self-tests (some doctests)")
2918
- parser.add_option("--compare", action="store_true",
3035
+ parser.add_argument("--compare", action="store_true",
2919
3036
  help="run against Markdown.pl as well (for testing)")
2920
3037
  parser.set_defaults(log_level=logging.INFO, compare=False,
2921
3038
  encoding="utf-8", safe_mode=None, use_file_vars=False)
2922
- opts, paths = parser.parse_args()
3039
+ opts = parser.parse_args()
3040
+ paths = opts.paths
2923
3041
  log.setLevel(opts.log_level)
2924
3042
 
2925
3043
  if opts.self_test:
@@ -2953,7 +3071,7 @@ def main(argv=None):
2953
3071
  pat, href = line.rstrip().rsplit(None, 1)
2954
3072
  except ValueError:
2955
3073
  raise MarkdownError("%s:%d: invalid link pattern line: %r"
2956
- % (opts.link_patterns_file, i + 1, line))
3074
+ % (opts.link_patterns_file, i+1, line))
2957
3075
  link_patterns.append(
2958
3076
  (_regex_from_encoded_pattern(pat), href))
2959
3077
  finally:
@@ -2961,7 +3079,7 @@ def main(argv=None):
2961
3079
  else:
2962
3080
  link_patterns = None
2963
3081
 
2964
- from os.path import join, dirname, abspath, exists
3082
+ from os.path import abspath, dirname, exists, join
2965
3083
  markdown_pl = join(dirname(dirname(abspath(__file__))), "test",
2966
3084
  "Markdown.pl")
2967
3085
  if not paths:
@@ -2974,7 +3092,7 @@ def main(argv=None):
2974
3092
  text = fp.read()
2975
3093
  fp.close()
2976
3094
  if opts.compare:
2977
- from subprocess import Popen, PIPE
3095
+ from subprocess import PIPE, Popen
2978
3096
  print("==== Markdown.pl ====")
2979
3097
  p = Popen('perl %s' % markdown_pl, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True)
2980
3098
  p.stdin.write(text.encode('utf-8'))
@@ -2983,15 +3101,15 @@ def main(argv=None):
2983
3101
  sys.stdout.write(perl_html)
2984
3102
  print("==== markdown2.py ====")
2985
3103
  html = markdown(text,
2986
- html4tags=opts.html4tags,
2987
- safe_mode=opts.safe_mode,
2988
- extras=extras, link_patterns=link_patterns,
2989
- use_file_vars=opts.use_file_vars,
2990
- cli=True)
3104
+ html4tags=opts.html4tags,
3105
+ safe_mode=opts.safe_mode,
3106
+ extras=extras, link_patterns=link_patterns,
3107
+ use_file_vars=opts.use_file_vars,
3108
+ cli=True)
2991
3109
  sys.stdout.write(html)
2992
3110
  if extras and "toc" in extras:
2993
3111
  log.debug("toc_html: " +
2994
- str(html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')))
3112
+ str(html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')))
2995
3113
  if opts.compare:
2996
3114
  test_dir = join(dirname(dirname(abspath(__file__))), "test")
2997
3115
  if exists(join(test_dir, "test_markdown2.py")):