red_quilt 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/README.md +38 -0
- data/docs/api.md +11 -0
- data/lib/red_quilt/arena.rb +96 -0
- data/lib/red_quilt/block_parser.rb +22 -384
- data/lib/red_quilt/cli.rb +8 -2
- data/lib/red_quilt/code_block.rb +139 -0
- data/lib/red_quilt/document.rb +18 -4
- data/lib/red_quilt/footnote_anchors.rb +24 -0
- data/lib/red_quilt/footnote_pass.rb +6 -2
- data/lib/red_quilt/frontmatter.rb +54 -0
- data/lib/red_quilt/html_block.rb +161 -0
- data/lib/red_quilt/indentation.rb +35 -0
- data/lib/red_quilt/inline/builder.rb +9 -186
- data/lib/red_quilt/inline/emphasis_resolver.rb +184 -0
- data/lib/red_quilt/inline/url_sanitizer.rb +64 -0
- data/lib/red_quilt/line.rb +6 -1
- data/lib/red_quilt/lint_pass.rb +2 -2
- data/lib/red_quilt/node_ref.rb +20 -11
- data/lib/red_quilt/renderer/html.rb +32 -20
- data/lib/red_quilt/renderer/mdast.rb +11 -11
- data/lib/red_quilt/table.rb +97 -0
- data/lib/red_quilt/version.rb +1 -1
- data/lib/red_quilt.rb +19 -4
- data/sig/red_quilt.rbs +18 -0
- metadata +9 -2
data/lib/red_quilt/document.rb
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module RedQuilt
|
|
4
4
|
class Document
|
|
5
|
-
attr_reader :source, :arena, :root_id, :references, :footnotes
|
|
5
|
+
attr_reader :source, :arena, :root_id, :references, :footnotes, :frontmatter
|
|
6
6
|
|
|
7
|
-
def initialize(source, arena, root_id, allow_html: false, disallow_raw_html: false, references: {}, footnotes: nil)
|
|
7
|
+
def initialize(source, arena, root_id, allow_html: false, disallow_raw_html: false, references: {}, footnotes: nil, frontmatter: nil)
|
|
8
8
|
@source = source
|
|
9
9
|
@arena = arena
|
|
10
10
|
@root_id = root_id
|
|
@@ -12,6 +12,7 @@ module RedQuilt
|
|
|
12
12
|
@disallow_raw_html = disallow_raw_html
|
|
13
13
|
@references = references
|
|
14
14
|
@footnotes = footnotes
|
|
15
|
+
@frontmatter = frontmatter
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def allow_html?
|
|
@@ -51,11 +52,18 @@ module RedQuilt
|
|
|
51
52
|
# `<pre class="mermaid">` containers instead of `<pre><code>`. In
|
|
52
53
|
# standalone mode the mermaid.js runtime is also loaded from a CDN so
|
|
53
54
|
# the diagrams render in the browser without further setup.
|
|
54
|
-
|
|
55
|
+
#
|
|
56
|
+
# When standalone and the document was parsed with `frontmatter: true`,
|
|
57
|
+
# the frontmatter's `title` / `lang` keys fill in the corresponding
|
|
58
|
+
# `<title>` / `<html lang>` if no explicit argument was given
|
|
59
|
+
# (explicit argument > frontmatter > default).
|
|
60
|
+
def to_html(standalone: false, title: nil, lang: nil, css: nil, theme: :none, heading_ids: false, mermaid: false)
|
|
55
61
|
body = Renderer::HTML.new(self, heading_ids: heading_ids, mermaid: mermaid).render
|
|
56
62
|
return body unless standalone
|
|
57
63
|
|
|
58
|
-
|
|
64
|
+
effective_title = title || frontmatter_value("title")
|
|
65
|
+
effective_lang = lang || frontmatter_value("lang") || "en"
|
|
66
|
+
wrap_standalone_html(body, title: effective_title.to_s, lang: effective_lang.to_s, css: css, theme: Theme.css(theme), mermaid: mermaid)
|
|
59
67
|
end
|
|
60
68
|
|
|
61
69
|
def to_ast
|
|
@@ -91,6 +99,12 @@ module RedQuilt
|
|
|
91
99
|
|
|
92
100
|
private
|
|
93
101
|
|
|
102
|
+
def frontmatter_value(key)
|
|
103
|
+
return nil unless @frontmatter.is_a?(Hash)
|
|
104
|
+
|
|
105
|
+
@frontmatter[key]
|
|
106
|
+
end
|
|
107
|
+
|
|
94
108
|
# Self-contained assets embedded in standalone output when mermaid
|
|
95
109
|
# support is enabled. Loads the mermaid.js runtime from a CDN as an ES
|
|
96
110
|
# module, renders every `<pre class="mermaid">` container, then makes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# Single source of truth for the HTML element ids used to wire footnote
|
|
5
|
+
# references to their definitions and back. Both the reference (`<sup>`),
|
|
6
|
+
# the definition (`<li>`), and the back-reference links must agree on
|
|
7
|
+
# these strings, so they live in one place rather than being rebuilt at
|
|
8
|
+
# each call site.
|
|
9
|
+
module FootnoteAnchors
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
# Id of the definition `<li>` and the target of a reference link.
|
|
13
|
+
def definition_id(number)
|
|
14
|
+
"fn-#{number}"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Id of a reference `<sup>` and the target of a back-reference link.
|
|
18
|
+
# A repeated reference (occurrence > 1) gets a `-N` suffix so every
|
|
19
|
+
# back-reference has a unique anchor.
|
|
20
|
+
def reference_id(number, occurrence)
|
|
21
|
+
occurrence > 1 ? "fnref-#{number}-#{occurrence}" : "fnref-#{number}"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -29,10 +29,14 @@ module RedQuilt
|
|
|
29
29
|
|
|
30
30
|
# Re-append referenced definitions in first-reference order; detaching
|
|
31
31
|
# all current children first means unreferenced definitions are left
|
|
32
|
-
# orphaned (and so never rendered).
|
|
32
|
+
# orphaned (and so never rendered). The number and reference count are
|
|
33
|
+
# materialized onto each definition node so the renderer reads them off
|
|
34
|
+
# the arena rather than consulting the registry.
|
|
33
35
|
@arena.child_ids(section_id).to_a.each { |child| @arena.detach(child) }
|
|
34
36
|
@registry.referenced_labels.each do |label|
|
|
35
|
-
|
|
37
|
+
def_id = @registry.definition_node(label)
|
|
38
|
+
@arena.resolve_footnote_definition(def_id, @registry.number(label), @registry.occurrences(label))
|
|
39
|
+
@arena.append_child(section_id, def_id)
|
|
36
40
|
end
|
|
37
41
|
end
|
|
38
42
|
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "psych"
|
|
4
|
+
require "date"
|
|
5
|
+
|
|
6
|
+
module RedQuilt
|
|
7
|
+
# Extracts a leading YAML frontmatter block from a Markdown source.
|
|
8
|
+
module Frontmatter
|
|
9
|
+
# Matches a frontmatter block at the very start of the document.
|
|
10
|
+
PATTERN = /\A---\n(.*?)\n(?:---|\.\.\.)[ \t]*(?:\n|\z)/m
|
|
11
|
+
private_constant :PATTERN
|
|
12
|
+
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
# Extracts frontmatter from +source+, returning a two-element array:
|
|
16
|
+
# [data, body]. +data+ is the parsed Hash (or nil when there is no
|
|
17
|
+
# frontmatter), and +body+ is the source with the frontmatter region
|
|
18
|
+
# blanked out.
|
|
19
|
+
#
|
|
20
|
+
# +diagnostics+ is an optional array; on a YAML syntax error a warning
|
|
21
|
+
# Diagnostic is appended and +data+ is returned as nil.
|
|
22
|
+
def extract(source, diagnostics: nil)
|
|
23
|
+
match = PATTERN.match(source)
|
|
24
|
+
return [nil, source] unless match
|
|
25
|
+
|
|
26
|
+
data = parse_yaml(match[1], diagnostics: diagnostics)
|
|
27
|
+
body = blank_out(source, match.end(0))
|
|
28
|
+
[data, body]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Parses the YAML body with a restricted loader (no arbitrary object
|
|
32
|
+
# instantiation; Date / Time permitted for common frontmatter dates).
|
|
33
|
+
# Returns the parsed value, or nil on a syntax error.
|
|
34
|
+
def parse_yaml(yaml, diagnostics: nil)
|
|
35
|
+
Psych.safe_load(yaml, permitted_classes: [Date, Time], aliases: false)
|
|
36
|
+
rescue Psych::SyntaxError => e
|
|
37
|
+
diagnostics&.push(
|
|
38
|
+
Diagnostic.new(
|
|
39
|
+
severity: :warning,
|
|
40
|
+
rule: :frontmatter,
|
|
41
|
+
message: "invalid YAML frontmatter: #{e.message}",
|
|
42
|
+
),
|
|
43
|
+
)
|
|
44
|
+
nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Replaces every character before +offset+ with a blank line for each
|
|
48
|
+
# consumed source line, keeping later line numbers intact.
|
|
49
|
+
def blank_out(source, offset)
|
|
50
|
+
consumed = source[0, offset]
|
|
51
|
+
("\n" * consumed.count("\n")) + source[offset..]
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# CommonMark HTML-block classification (spec 4.6). Pure functions over a
|
|
5
|
+
# line's text: given the raw line they decide whether it opens an HTML
|
|
6
|
+
# block and of which of the seven types. No arena or parser state is
|
|
7
|
+
# involved, so this lives apart from BlockParser's node construction.
|
|
8
|
+
module HtmlBlock
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
# True when `text` opens an HTML block (any of the 7 types). Indented
|
|
12
|
+
# code (4+ leading spaces) takes precedence and is never an HTML block.
|
|
13
|
+
def start?(text)
|
|
14
|
+
return false if text.start_with?(" ")
|
|
15
|
+
|
|
16
|
+
!type(text).nil?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# The HTML block type (1..7) opened by `text`, or nil if it opens none.
|
|
20
|
+
def type(text)
|
|
21
|
+
# Fast reject: every HTML block starts with `<`. lstrip strips
|
|
22
|
+
# 0-3 indent spaces (more would already be indented code), so peek
|
|
23
|
+
# the leading non-space byte before doing any allocations.
|
|
24
|
+
i = 0
|
|
25
|
+
# CommonMark: HTML block lines may have 0-3 spaces of indent.
|
|
26
|
+
while i < 3 && i < text.length && text.getbyte(i) == 0x20
|
|
27
|
+
i += 1
|
|
28
|
+
end
|
|
29
|
+
return nil unless i < text.length && text.getbyte(i) == 0x3C
|
|
30
|
+
|
|
31
|
+
stripped = i.zero? ? text : text[i..]
|
|
32
|
+
|
|
33
|
+
# Type 1: <script|pre|style|textarea (case-insensitive) followed by
|
|
34
|
+
# space/tab/end-of-line or `>`. CommonMark restricts the separator
|
|
35
|
+
# to space, tab, or a line ending (not any whitespace class).
|
|
36
|
+
return 1 if stripped.match?(%r{\A<(script|pre|style|textarea)(?:[ \t]|>|$)}i)
|
|
37
|
+
|
|
38
|
+
# Type 2: <!--
|
|
39
|
+
return 2 if stripped.start_with?("<!--")
|
|
40
|
+
|
|
41
|
+
# Type 3: <?
|
|
42
|
+
return 3 if stripped.start_with?("<?")
|
|
43
|
+
|
|
44
|
+
# Type 4: <! followed by uppercase ASCII letter
|
|
45
|
+
return 4 if stripped.match?(%r{\A<![A-Z]})
|
|
46
|
+
|
|
47
|
+
# Type 5: <![CDATA[
|
|
48
|
+
return 5 if stripped.start_with?("<![CDATA[")
|
|
49
|
+
|
|
50
|
+
# Type 6: line opens with one of the listed block-level tags.
|
|
51
|
+
return 6 if stripped.match?(TYPE_6_RE)
|
|
52
|
+
|
|
53
|
+
# Type 7: a complete open or closing tag spanning the line.
|
|
54
|
+
return 7 if valid_tag?(stripped)
|
|
55
|
+
|
|
56
|
+
nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
TYPE_6_NAMES = %w[
|
|
60
|
+
address article aside base basefont blockquote body caption center
|
|
61
|
+
col colgroup dd details dialog dir div dl dt fieldset figcaption
|
|
62
|
+
figure footer form frame frameset h1 h2 h3 h4 h5 h6 head header
|
|
63
|
+
hr html iframe legend li link main menu menuitem nav noframes ol
|
|
64
|
+
optgroup option p param search section summary table tbody td
|
|
65
|
+
tfoot th thead title tr track ul
|
|
66
|
+
].freeze
|
|
67
|
+
TYPE_6_RE = %r{\A</?(?:#{TYPE_6_NAMES.join('|')})(?:[ \t]|>|/>|\z)}i
|
|
68
|
+
private_constant :TYPE_6_NAMES, :TYPE_6_RE
|
|
69
|
+
|
|
70
|
+
# Type 7: a complete open or closing tag on its own line.
|
|
71
|
+
# Closing tags must not have attributes.
|
|
72
|
+
#
|
|
73
|
+
# HTML tag separators per CommonMark 6.6 are space, tab, or up to one
|
|
74
|
+
# line ending -- not the broader \s class (which would include form
|
|
75
|
+
# feed and vertical tab).
|
|
76
|
+
TYPE_7_OPEN_TAG_RE = %r{
|
|
77
|
+
\A
|
|
78
|
+
<[A-Za-z][A-Za-z0-9-]*
|
|
79
|
+
(?:[ \t\r\n]+[A-Za-z_:][A-Za-z0-9_.:-]*(?:[ \t\r\n]*=[ \t\r\n]*(?:"[^"\n]*"|'[^'\n]*'|[^ \t\r\n"'=<>`]+))?)*
|
|
80
|
+
[ \t\r\n]*/?>
|
|
81
|
+
\z
|
|
82
|
+
}x
|
|
83
|
+
TYPE_7_CLOSING_TAG_RE = %r{\A</[A-Za-z][A-Za-z0-9-]*[ \t\r\n]*>\z}
|
|
84
|
+
private_constant :TYPE_7_OPEN_TAG_RE, :TYPE_7_CLOSING_TAG_RE
|
|
85
|
+
|
|
86
|
+
def valid_tag?(text)
|
|
87
|
+
# Fast reject: every type-7 tag must begin with `<`.
|
|
88
|
+
return false unless text.start_with?("<")
|
|
89
|
+
|
|
90
|
+
TYPE_7_OPEN_TAG_RE.match?(text) || TYPE_7_CLOSING_TAG_RE.match?(text)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Closing-condition strings for HTML block types 2-5 (types 1, 6, 7 use
|
|
94
|
+
# dynamic / blank-line termination).
|
|
95
|
+
FIXED_TERMINATORS = { 2 => "-->", 3 => "?>", 4 => ">", 5 => "]]>" }.freeze
|
|
96
|
+
private_constant :FIXED_TERMINATORS
|
|
97
|
+
|
|
98
|
+
# Cached collaborator for BlockParser. A single instance is created in
|
|
99
|
+
# BlockParser#initialize and reused; per-call state lives in method
|
|
100
|
+
# locals so reentrant calls are safe.
|
|
101
|
+
class Parser
|
|
102
|
+
def initialize(block_parser)
|
|
103
|
+
@arena = block_parser.arena
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Parses the HTML block starting at lines[index] (its type already
|
|
107
|
+
# confirmed by HtmlBlock.start?). Returns the index past the block.
|
|
108
|
+
def parse(parent_id, lines, index)
|
|
109
|
+
start_index = index
|
|
110
|
+
type = HtmlBlock.type(lines[index].content)
|
|
111
|
+
end_index = locate_end(lines, index, type)
|
|
112
|
+
|
|
113
|
+
start_byte = lines[start_index].start_byte
|
|
114
|
+
end_byte = lines[end_index].end_byte
|
|
115
|
+
html_lines = (start_index..end_index).map { |i| lines[i].content }
|
|
116
|
+
html_id = @arena.add_node(NodeType::HTML_BLOCK,
|
|
117
|
+
source_start: start_byte,
|
|
118
|
+
source_len: end_byte - start_byte,
|
|
119
|
+
str1: html_lines.join("\n"))
|
|
120
|
+
@arena.append_child(parent_id, html_id)
|
|
121
|
+
end_index + 1
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
private
|
|
125
|
+
|
|
126
|
+
def locate_end(lines, index, type)
|
|
127
|
+
terminator = terminator_for(type, lines[index].content)
|
|
128
|
+
|
|
129
|
+
if terminator
|
|
130
|
+
case_insensitive = (type == 1)
|
|
131
|
+
while index < lines.length
|
|
132
|
+
line = lines[index].content
|
|
133
|
+
haystack = case_insensitive ? line.downcase : line
|
|
134
|
+
return index if haystack.include?(terminator)
|
|
135
|
+
|
|
136
|
+
index += 1
|
|
137
|
+
end
|
|
138
|
+
lines.length - 1
|
|
139
|
+
else
|
|
140
|
+
# Types 6 & 7: terminated by blank line (or end of input)
|
|
141
|
+
index += 1 while index < lines.length && !lines[index].blank
|
|
142
|
+
index - 1
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def terminator_for(type, first_line)
|
|
147
|
+
case type
|
|
148
|
+
when 1
|
|
149
|
+
"</#{closing_tag_name(first_line)}>"
|
|
150
|
+
when 2..5
|
|
151
|
+
FIXED_TERMINATORS[type]
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def closing_tag_name(text)
|
|
156
|
+
match = /\A<(script|pre|style|textarea)/i.match(text)
|
|
157
|
+
match ? match[1].downcase : "script"
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
@@ -57,6 +57,41 @@ module RedQuilt
|
|
|
57
57
|
end
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
+
# Strips up to `max` leading 0x20 (space) bytes from `text`, returning
|
|
61
|
+
# the rest. Unlike #strip_columns this is a plain byte strip (tabs are
|
|
62
|
+
# not expanded); used where the spec counts literal spaces, e.g. a
|
|
63
|
+
# fenced code block stripping its own opening indent. No-alloc return
|
|
64
|
+
# when `text` already starts at a non-space byte.
|
|
65
|
+
def strip_leading_spaces(text, max)
|
|
66
|
+
return text if max <= 0
|
|
67
|
+
|
|
68
|
+
bytes = text.bytesize
|
|
69
|
+
i = 0
|
|
70
|
+
while i < max && i < bytes && text.getbyte(i) == 0x20
|
|
71
|
+
i += 1
|
|
72
|
+
end
|
|
73
|
+
return text if i.zero?
|
|
74
|
+
|
|
75
|
+
text.byteslice(i..)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Strips all leading 0x20 / 0x09 bytes from `text` (spaces and tabs,
|
|
79
|
+
# no column cap). Same no-alloc return as #strip_leading_spaces when
|
|
80
|
+
# `text` already starts at a non-whitespace byte.
|
|
81
|
+
def strip_leading_whitespace(text)
|
|
82
|
+
bytes = text.bytesize
|
|
83
|
+
i = 0
|
|
84
|
+
while i < bytes
|
|
85
|
+
b = text.getbyte(i)
|
|
86
|
+
break unless b == 0x20 || b == 0x09
|
|
87
|
+
|
|
88
|
+
i += 1
|
|
89
|
+
end
|
|
90
|
+
return text if i.zero?
|
|
91
|
+
|
|
92
|
+
text.byteslice(i..)
|
|
93
|
+
end
|
|
94
|
+
|
|
60
95
|
# Bytes of literal leading 0x20 / 0x09 in `text`.
|
|
61
96
|
def leading_ws_bytes(text)
|
|
62
97
|
i = 0
|
|
@@ -9,22 +9,10 @@ module RedQuilt
|
|
|
9
9
|
# 1. linear_pass — code spans, brackets (link/image), autolinks,
|
|
10
10
|
# HTML, simple inlines. Emphasis delimiter runs are added as
|
|
11
11
|
# provisional TEXT nodes and pushed onto a delimiter stack.
|
|
12
|
-
# 2.
|
|
13
|
-
# delimiter stack entries into EMPHASIS / STRONG nodes
|
|
12
|
+
# 2. EmphasisResolver#resolve — CommonMark spec 6.2 algorithm pairs
|
|
13
|
+
# up delimiter stack entries into EMPHASIS / STRONG nodes
|
|
14
|
+
# (delegated to Inline::EmphasisResolver).
|
|
14
15
|
class Builder
|
|
15
|
-
SAFE_SCHEMES = %w[http https mailto ftp tel ssh].freeze
|
|
16
|
-
# Autolinks (`<scheme:...>`) are not run through the SAFE_SCHEMES
|
|
17
|
-
# allowlist: CommonMark permits arbitrary schemes there (e.g.
|
|
18
|
-
# `<made-up-scheme://x>`), and an allowlist would break that
|
|
19
|
-
# conformance. Only the schemes that execute script when the link
|
|
20
|
-
# is navigated are denied.
|
|
21
|
-
UNSAFE_AUTOLINK_SCHEMES = %w[javascript vbscript data].freeze
|
|
22
|
-
|
|
23
|
-
# `count` is the CommonMark delimiter-run length; a Delimiter is
|
|
24
|
-
# never enumerated, so shadowing Struct#count (from Enumerable) is
|
|
25
|
-
# intentional rather than a footgun.
|
|
26
|
-
Delimiter = Struct.new(:node_id, :char, :count, :can_open, :can_close) # rubocop:disable Lint/StructNewOverride
|
|
27
|
-
|
|
28
16
|
Bracket = Struct.new(:token_id, :node_id, :image, :active, :delim_stack_size)
|
|
29
17
|
|
|
30
18
|
# track_source: when true, arena nodes carry the byte ranges supplied
|
|
@@ -49,6 +37,7 @@ module RedQuilt
|
|
|
49
37
|
@diagnostics = diagnostics
|
|
50
38
|
@footnotes = footnotes
|
|
51
39
|
@link_scanner = LinkScanner.new(source)
|
|
40
|
+
@emphasis = EmphasisResolver.new(arena, track_source: track_source)
|
|
52
41
|
end
|
|
53
42
|
|
|
54
43
|
def build(parent_id, tokens)
|
|
@@ -58,7 +47,7 @@ module RedQuilt
|
|
|
58
47
|
@bracket_stack = []
|
|
59
48
|
@provisional_nodes = {}
|
|
60
49
|
linear_pass
|
|
61
|
-
|
|
50
|
+
@emphasis.resolve(@delimiter_stack, @provisional_nodes)
|
|
62
51
|
end
|
|
63
52
|
|
|
64
53
|
private
|
|
@@ -228,29 +217,12 @@ module RedQuilt
|
|
|
228
217
|
link_id = add_arena_node(
|
|
229
218
|
NodeType::LINK,
|
|
230
219
|
@tokens.start_byte(id), @tokens.end_byte(id),
|
|
231
|
-
str1: block_unsafe_autolink(@link_scanner.normalize_uri(destination)),
|
|
220
|
+
str1: UrlSanitizer.block_unsafe_autolink(@link_scanner.normalize_uri(destination), @diagnostics),
|
|
232
221
|
)
|
|
233
222
|
@arena.append_child(@parent_id, link_id)
|
|
234
223
|
@arena.append_child(link_id, @arena.add_node(NodeType::TEXT, str1: label))
|
|
235
224
|
end
|
|
236
225
|
|
|
237
|
-
# Returns "" (blocking the href) for autolink destinations whose
|
|
238
|
-
# scheme executes script on navigation; otherwise the destination
|
|
239
|
-
# is returned unchanged. Unlike sanitize_destination this is a
|
|
240
|
-
# denylist, to stay CommonMark-conformant for benign custom schemes.
|
|
241
|
-
def block_unsafe_autolink(destination)
|
|
242
|
-
scheme = destination[%r{\A([a-zA-Z][a-zA-Z0-9+\-.]*):}, 1]
|
|
243
|
-
return destination if scheme.nil?
|
|
244
|
-
return destination unless UNSAFE_AUTOLINK_SCHEMES.include?(scheme.downcase)
|
|
245
|
-
|
|
246
|
-
report_diagnostic(
|
|
247
|
-
severity: :warning,
|
|
248
|
-
rule: :unsafe_url,
|
|
249
|
-
message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
|
|
250
|
-
)
|
|
251
|
-
""
|
|
252
|
-
end
|
|
253
|
-
|
|
254
226
|
# --------------------------- code spans -----------------------------
|
|
255
227
|
|
|
256
228
|
# Find the closing backtick run for a code span by scanning the
|
|
@@ -400,7 +372,7 @@ module RedQuilt
|
|
|
400
372
|
link_kind = opener.image ? NodeType::IMAGE : NodeType::LINK
|
|
401
373
|
link_id = add_arena_node(
|
|
402
374
|
link_kind, opener_start, match[:end_byte],
|
|
403
|
-
str1: sanitize_destination(match[:destination]),
|
|
375
|
+
str1: UrlSanitizer.sanitize_destination(match[:destination], @diagnostics),
|
|
404
376
|
str2: match[:title],
|
|
405
377
|
)
|
|
406
378
|
|
|
@@ -416,7 +388,7 @@ module RedQuilt
|
|
|
416
388
|
@arena.detach(opener.node_id)
|
|
417
389
|
|
|
418
390
|
inner_delims = @delimiter_stack.slice!(opener.delim_stack_size..) || []
|
|
419
|
-
|
|
391
|
+
@emphasis.resolve(inner_delims, @provisional_nodes)
|
|
420
392
|
|
|
421
393
|
@bracket_stack.delete_at(opener_index)
|
|
422
394
|
|
|
@@ -489,22 +461,6 @@ module RedQuilt
|
|
|
489
461
|
last
|
|
490
462
|
end
|
|
491
463
|
|
|
492
|
-
def sanitize_destination(destination)
|
|
493
|
-
return "" if destination.nil?
|
|
494
|
-
return destination if destination.start_with?("/", "#")
|
|
495
|
-
|
|
496
|
-
scheme = destination[%r{\A([a-zA-Z][a-zA-Z0-9+\-.]*):}, 1]
|
|
497
|
-
return destination if scheme.nil?
|
|
498
|
-
return destination if SAFE_SCHEMES.include?(scheme.downcase)
|
|
499
|
-
|
|
500
|
-
report_diagnostic(
|
|
501
|
-
severity: :warning,
|
|
502
|
-
rule: :unsafe_url,
|
|
503
|
-
message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
|
|
504
|
-
)
|
|
505
|
-
""
|
|
506
|
-
end
|
|
507
|
-
|
|
508
464
|
def report_diagnostic(severity:, rule:, message:, source_span: nil)
|
|
509
465
|
return unless @diagnostics
|
|
510
466
|
|
|
@@ -530,145 +486,12 @@ module RedQuilt
|
|
|
530
486
|
@arena.append_child(@parent_id, node_id)
|
|
531
487
|
@provisional_nodes[node_id] = true
|
|
532
488
|
|
|
533
|
-
@delimiter_stack << Delimiter.new(
|
|
489
|
+
@delimiter_stack << EmphasisResolver::Delimiter.new(
|
|
534
490
|
node_id, char, count,
|
|
535
491
|
(flags & 0b10) != 0,
|
|
536
492
|
(flags & 0b01) != 0,
|
|
537
493
|
)
|
|
538
494
|
end
|
|
539
|
-
|
|
540
|
-
def process_emphasis(stack)
|
|
541
|
-
# NB: the CommonMark spec describes an `openers_bottom`
|
|
542
|
-
# optimization keyed by closer character / length / flanking
|
|
543
|
-
# flags. Implementing that correctly is subtle (a single
|
|
544
|
-
# per-character bottom blocks valid matches like
|
|
545
|
-
# `*foo**bar**baz*`), so the implementation here just walks
|
|
546
|
-
# back to the start of the stack for every closer. This is
|
|
547
|
-
# O(stack^2) in the worst case but stacks are tiny in practice.
|
|
548
|
-
closer_idx = 0
|
|
549
|
-
|
|
550
|
-
while closer_idx < stack.length
|
|
551
|
-
closer = stack[closer_idx]
|
|
552
|
-
unless closer.can_close
|
|
553
|
-
closer_idx += 1
|
|
554
|
-
next
|
|
555
|
-
end
|
|
556
|
-
|
|
557
|
-
opener_idx = closer_idx - 1
|
|
558
|
-
found = false
|
|
559
|
-
while opener_idx >= 0
|
|
560
|
-
opener = stack[opener_idx]
|
|
561
|
-
if opener.can_open && opener.char == closer.char
|
|
562
|
-
skip = false
|
|
563
|
-
if (opener.can_close || closer.can_open) &&
|
|
564
|
-
((opener.count + closer.count) % 3).zero? &&
|
|
565
|
-
!((opener.count % 3).zero? && (closer.count % 3).zero?)
|
|
566
|
-
skip = true
|
|
567
|
-
end
|
|
568
|
-
unless skip
|
|
569
|
-
found = true
|
|
570
|
-
break
|
|
571
|
-
end
|
|
572
|
-
end
|
|
573
|
-
opener_idx -= 1
|
|
574
|
-
end
|
|
575
|
-
|
|
576
|
-
unless found
|
|
577
|
-
unless closer.can_open
|
|
578
|
-
@provisional_nodes.delete(closer.node_id)
|
|
579
|
-
stack.delete_at(closer_idx)
|
|
580
|
-
end
|
|
581
|
-
closer_idx += 1
|
|
582
|
-
next
|
|
583
|
-
end
|
|
584
|
-
|
|
585
|
-
opener = stack[opener_idx]
|
|
586
|
-
strength = [opener.count, closer.count].min >= 2 ? 2 : 1
|
|
587
|
-
if closer.char == "~"
|
|
588
|
-
# GFM strikethrough only forms on `~~` runs. A single `~`
|
|
589
|
-
# leaves the delimiter as text; advance the cursor so future
|
|
590
|
-
# `~~` pairs can still match.
|
|
591
|
-
if strength < 2
|
|
592
|
-
closer_idx += 1
|
|
593
|
-
next
|
|
594
|
-
end
|
|
595
|
-
kind = NodeType::STRIKETHROUGH
|
|
596
|
-
else
|
|
597
|
-
kind = strength == 2 ? NodeType::STRONG : NodeType::EMPHASIS
|
|
598
|
-
end
|
|
599
|
-
|
|
600
|
-
# CommonMark spec: any delimiters strictly between this opener and
|
|
601
|
-
# closer can't open or close anything in this scope, so drop them
|
|
602
|
-
# from the stack before we rebuild the tree. Their arena nodes
|
|
603
|
-
# stay where they are (they'll be reparented into the new emphasis
|
|
604
|
-
# alongside the surrounding content), but they must no longer be
|
|
605
|
-
# candidates for future iterations. Without this, the next
|
|
606
|
-
# iteration would try to pair stranded delimiters that have
|
|
607
|
-
# already been moved into a different parent, which corrupts the
|
|
608
|
-
# sibling chain (Arena#reparent walks into @parent[-1]).
|
|
609
|
-
if closer_idx > opener_idx + 1
|
|
610
|
-
removed = stack.slice!((opener_idx + 1)...closer_idx)
|
|
611
|
-
removed.each { |e| @provisional_nodes.delete(e.node_id) }
|
|
612
|
-
closer_idx = opener_idx + 1
|
|
613
|
-
closer = stack[closer_idx]
|
|
614
|
-
end
|
|
615
|
-
|
|
616
|
-
opener_node = opener.node_id
|
|
617
|
-
closer_node = closer.node_id
|
|
618
|
-
|
|
619
|
-
if @track_source
|
|
620
|
-
opener_match_start = @arena.source_end(opener_node) - strength
|
|
621
|
-
closer_match_end = @arena.source_start(closer_node) + strength
|
|
622
|
-
else
|
|
623
|
-
opener_match_start = -1
|
|
624
|
-
closer_match_end = 0
|
|
625
|
-
end
|
|
626
|
-
emphasis_id = add_arena_node(kind, opener_match_start, closer_match_end)
|
|
627
|
-
|
|
628
|
-
first_inside = @arena.raw_next_sibling_id(opener_node)
|
|
629
|
-
last_inside = @arena.raw_prev_sibling_id(closer_node)
|
|
630
|
-
if first_inside != -1 && last_inside != -1 &&
|
|
631
|
-
first_inside != closer_node && last_inside != opener_node
|
|
632
|
-
@arena.reparent(emphasis_id, first_inside, last_inside)
|
|
633
|
-
end
|
|
634
|
-
|
|
635
|
-
parent_id = @arena.raw_parent_id(opener_node)
|
|
636
|
-
@arena.insert_before(parent_id, closer_node, emphasis_id)
|
|
637
|
-
|
|
638
|
-
if opener.count == strength
|
|
639
|
-
@provisional_nodes.delete(opener_node)
|
|
640
|
-
@arena.detach(opener_node)
|
|
641
|
-
stack.delete_at(opener_idx)
|
|
642
|
-
closer_idx -= 1
|
|
643
|
-
else
|
|
644
|
-
opener.count -= strength
|
|
645
|
-
str = @arena.str1(opener_node)
|
|
646
|
-
@arena.update_str1(opener_node, str[0...-strength])
|
|
647
|
-
if @track_source
|
|
648
|
-
new_end = @arena.source_end(opener_node) - strength
|
|
649
|
-
@arena.update_span(opener_node, @arena.source_start(opener_node), new_end)
|
|
650
|
-
end
|
|
651
|
-
end
|
|
652
|
-
|
|
653
|
-
if closer.count == strength
|
|
654
|
-
@provisional_nodes.delete(closer_node)
|
|
655
|
-
@arena.detach(closer_node)
|
|
656
|
-
stack.delete_at(closer_idx)
|
|
657
|
-
else
|
|
658
|
-
closer.count -= strength
|
|
659
|
-
str = @arena.str1(closer_node)
|
|
660
|
-
@arena.update_str1(closer_node, str[strength..])
|
|
661
|
-
if @track_source
|
|
662
|
-
new_start = @arena.source_start(closer_node) + strength
|
|
663
|
-
new_end = @arena.source_end(closer_node)
|
|
664
|
-
@arena.update_span(closer_node, new_start, new_end)
|
|
665
|
-
end
|
|
666
|
-
end
|
|
667
|
-
end
|
|
668
|
-
|
|
669
|
-
stack.each { |e| @provisional_nodes.delete(e.node_id) }
|
|
670
|
-
stack.clear
|
|
671
|
-
end
|
|
672
495
|
end
|
|
673
496
|
end
|
|
674
497
|
end
|