red_quilt 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +109 -0
- data/.rubocop_todo.yml +7 -0
- data/CHANGELOG.md +57 -0
- data/README.md +284 -0
- data/Rakefile +8 -0
- data/ast-spec.md +1227 -0
- data/docs/architecture.md +81 -0
- data/docs/arena-usage.md +363 -0
- data/docs/commonmark-conformance.md +241 -0
- data/exe/redquilt +7 -0
- data/lib/red_quilt/arena.rb +366 -0
- data/lib/red_quilt/block_parser.rb +724 -0
- data/lib/red_quilt/blockquote.rb +151 -0
- data/lib/red_quilt/cli.rb +182 -0
- data/lib/red_quilt/diagnostic.rb +47 -0
- data/lib/red_quilt/document.rb +126 -0
- data/lib/red_quilt/extended_autolink_pass.rb +185 -0
- data/lib/red_quilt/footnote_definition.rb +147 -0
- data/lib/red_quilt/footnote_pass.rb +39 -0
- data/lib/red_quilt/footnote_registry.rb +68 -0
- data/lib/red_quilt/indentation.rb +73 -0
- data/lib/red_quilt/inline/builder.rb +674 -0
- data/lib/red_quilt/inline/flanking.rb +120 -0
- data/lib/red_quilt/inline/html_entities.rb +2180 -0
- data/lib/red_quilt/inline/lexer.rb +280 -0
- data/lib/red_quilt/inline/link_scanner.rb +315 -0
- data/lib/red_quilt/inline/token_kind.rb +39 -0
- data/lib/red_quilt/inline/tokens.rb +73 -0
- data/lib/red_quilt/inline.rb +34 -0
- data/lib/red_quilt/inline_pass.rb +53 -0
- data/lib/red_quilt/line.rb +14 -0
- data/lib/red_quilt/lint_pass.rb +71 -0
- data/lib/red_quilt/list.rb +317 -0
- data/lib/red_quilt/node_ref.rb +114 -0
- data/lib/red_quilt/node_type.rb +66 -0
- data/lib/red_quilt/plain_text.rb +46 -0
- data/lib/red_quilt/reference_definition.rb +309 -0
- data/lib/red_quilt/renderer/html.rb +279 -0
- data/lib/red_quilt/renderer/mdast.rb +152 -0
- data/lib/red_quilt/source_map.rb +29 -0
- data/lib/red_quilt/source_span.rb +26 -0
- data/lib/red_quilt/theme.rb +28 -0
- data/lib/red_quilt/themes/default.css +87 -0
- data/lib/red_quilt/version.rb +5 -0
- data/lib/red_quilt.rb +86 -0
- data/mise.toml +2 -0
- data/sig/red_quilt.rbs +45 -0
- metadata +91 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# Home of the inline-parsing namespace. Sub-components (Lexer, Builder,
|
|
5
|
+
# Tokens, Flanking, ...) live under lib/red_quilt/inline/. Module-level
|
|
6
|
+
# shared primitives that several of them need live here.
|
|
7
|
+
module Inline
|
|
8
|
+
# CommonMark ASCII punctuation: the four byte ranges 0x21-0x2F,
|
|
9
|
+
# 0x3A-0x40, 0x5B-0x60, 0x7B-0x7E. Used for backslash-escape
|
|
10
|
+
# recognition (lexer, builder) and flanking-run boundary detection
|
|
11
|
+
# (flanking). A frozen 256-entry lookup table keeps the hot-path
|
|
12
|
+
# check to a single array index.
|
|
13
|
+
ASCII_PUNCT = begin
|
|
14
|
+
a = Array.new(256, false)
|
|
15
|
+
(0x21..0x2F).each { |b| a[b] = true }
|
|
16
|
+
(0x3A..0x40).each { |b| a[b] = true }
|
|
17
|
+
(0x5B..0x60).each { |b| a[b] = true }
|
|
18
|
+
(0x7B..0x7E).each { |b| a[b] = true }
|
|
19
|
+
a.freeze
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Frozen single-byte strings indexed by byte value, so hot paths that
|
|
23
|
+
# need the 1-char string for a byte (flanking classification, delimiter
|
|
24
|
+
# run chars) can reuse a shared object instead of allocating via
|
|
25
|
+
# Integer#chr on every call. Semantics match `byte.chr` exactly.
|
|
26
|
+
BYTE_CHR = Array.new(256) { |b| b.chr.freeze }.freeze
|
|
27
|
+
|
|
28
|
+
module_function
|
|
29
|
+
|
|
30
|
+
def ascii_punct_byte?(byte)
|
|
31
|
+
ASCII_PUNCT[byte]
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
class InlinePass
|
|
5
|
+
INLINE_TARGETS = [NodeType::PARAGRAPH, NodeType::HEADING, NodeType::TABLE_CELL].freeze
|
|
6
|
+
|
|
7
|
+
def initialize(document)
|
|
8
|
+
@document = document
|
|
9
|
+
@arena = document.arena
|
|
10
|
+
@lexer = Inline::Lexer.new(@document.source)
|
|
11
|
+
@tokens = Inline::Tokens.new
|
|
12
|
+
@builder = Inline::Builder.new(@arena, @document.source, @document.references,
|
|
13
|
+
diagnostics: @document.diagnostics,
|
|
14
|
+
footnotes: @document.footnotes)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def apply
|
|
18
|
+
visit(@document.root_id)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def visit(node_id)
|
|
24
|
+
if INLINE_TARGETS.include?(@arena.type(node_id))
|
|
25
|
+
@tokens.clear
|
|
26
|
+
if (literal = @arena.str1(node_id))
|
|
27
|
+
# Heading / paragraph with a materialized literal source (e.g.
|
|
28
|
+
# block-quote / list lines stripped of their continuation prefix).
|
|
29
|
+
# In that case the byte ranges produced by the lexer are relative
|
|
30
|
+
# to `literal`, not the document source, so we build with a
|
|
31
|
+
# dedicated builder that suppresses span tracking.
|
|
32
|
+
Inline::Lexer.new(literal).lex_into(@tokens, 0, literal.bytesize)
|
|
33
|
+
Inline::Builder.new(@arena, literal, @document.references,
|
|
34
|
+
track_source: false,
|
|
35
|
+
diagnostics: @document.diagnostics,
|
|
36
|
+
footnotes: @document.footnotes).build(node_id, @tokens)
|
|
37
|
+
else
|
|
38
|
+
start_byte = @arena.source_start(node_id)
|
|
39
|
+
end_byte = @arena.source_end(node_id)
|
|
40
|
+
@lexer.lex_into(@tokens, start_byte, end_byte)
|
|
41
|
+
@builder.build(node_id, @tokens)
|
|
42
|
+
end
|
|
43
|
+
return
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
child_id = @arena.raw_first_child_id(node_id)
|
|
47
|
+
until child_id == -1
|
|
48
|
+
visit(child_id)
|
|
49
|
+
child_id = @arena.raw_next_sibling_id(child_id)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# A single source line as seen by the block parser and its
|
|
5
|
+
# collaborators (Blockquote, List). `content` is the line text with any
|
|
6
|
+
# container prefix already stripped; `start_byte`/`end_byte` locate it
|
|
7
|
+
# in the original source. `blank` marks whitespace-only lines and
|
|
8
|
+
# `lazy_continuation` flags lines folded into an open paragraph.
|
|
9
|
+
#
|
|
10
|
+
# Positional (not keyword_init): one Line is built per source line, so
|
|
11
|
+
# the ~2.5x faster positional constructor matters on large documents.
|
|
12
|
+
# Argument order: content, start_byte, end_byte, blank, lazy_continuation.
|
|
13
|
+
Line = Struct.new(:content, :start_byte, :end_byte, :blank, :lazy_continuation)
|
|
14
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# Optional second-pass linter. Runs AFTER InlinePass when callers pass
|
|
5
|
+
# `lint: true` to RedQuilt.parse / .render_html. Walks the assembled
|
|
6
|
+
# tree once and appends warnings / info diagnostics to
|
|
7
|
+
# Document#diagnostics for lint-style issues that the parser cannot
|
|
8
|
+
# reasonably detect inline (heading-level skips, empty link
|
|
9
|
+
# destinations, images without alt text, ...).
|
|
10
|
+
#
|
|
11
|
+
# Each rule is keyed by a Symbol on Diagnostic#rule so callers can
|
|
12
|
+
# filter or silence individually.
|
|
13
|
+
class LintPass
|
|
14
|
+
def initialize(document)
|
|
15
|
+
@document = document
|
|
16
|
+
@arena = document.arena
|
|
17
|
+
@diagnostics = document.diagnostics
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def apply
|
|
21
|
+
last_heading_level = 0
|
|
22
|
+
walk(@document.root_id) do |id|
|
|
23
|
+
case @arena.type(id)
|
|
24
|
+
when NodeType::HEADING
|
|
25
|
+
level = @arena.int1(id)
|
|
26
|
+
if last_heading_level.positive? && level > last_heading_level + 1
|
|
27
|
+
push(:info, :heading_level_skip,
|
|
28
|
+
"Heading jumps from h#{last_heading_level} to h#{level}",
|
|
29
|
+
@arena.source_span(id))
|
|
30
|
+
end
|
|
31
|
+
last_heading_level = level
|
|
32
|
+
when NodeType::LINK
|
|
33
|
+
check_empty_link(id)
|
|
34
|
+
when NodeType::IMAGE
|
|
35
|
+
check_missing_alt(id)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def walk(node_id, &block)
|
|
43
|
+
yield node_id
|
|
44
|
+
@arena.each_child(node_id) { |child_id| walk(child_id, &block) }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def check_empty_link(node_id)
|
|
48
|
+
return unless @arena.str1(node_id).to_s.empty?
|
|
49
|
+
|
|
50
|
+
push(:warning, :empty_link,
|
|
51
|
+
"Link has no destination",
|
|
52
|
+
@arena.source_span(node_id))
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def check_missing_alt(node_id)
|
|
56
|
+
# IMAGE's str1 holds the destination URL, so NodeRef#text would
|
|
57
|
+
# report the URL as "alt text" for a childless image. PlainText
|
|
58
|
+
# walks descendants only, so a childless image yields "".
|
|
59
|
+
return unless PlainText.from(@arena, node_id).strip.empty?
|
|
60
|
+
|
|
61
|
+
push(:info, :missing_alt,
|
|
62
|
+
"Image has no alt text",
|
|
63
|
+
@arena.source_span(node_id))
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def push(severity, rule, message, source_span)
|
|
67
|
+
@diagnostics << Diagnostic.new(severity: severity, rule: rule,
|
|
68
|
+
message: message, source_span: source_span)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# CommonMark spec 5.2 lists.
|
|
5
|
+
#
|
|
6
|
+
# Module-level functions are stateless predicates used by BlockParser's
|
|
7
|
+
# dispatch and paragraph-interruption logic. `List::Parser` holds a
|
|
8
|
+
# cached reference to its owning BlockParser (for parse_lines recursion
|
|
9
|
+
# and shared helpers) but no per-list state — a single Parser instance
|
|
10
|
+
# is reused for every list in the document, including nested ones, and
|
|
11
|
+
# the per-call state lives in method locals so reentrant calls are
|
|
12
|
+
# safe.
|
|
13
|
+
module List
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
# Recognises the start of a list item per CommonMark spec section 5.2.
|
|
17
|
+
#
|
|
18
|
+
# Returns nil if `text` is not a list-item start, otherwise a Hash:
|
|
19
|
+
#
|
|
20
|
+
# ordered: true (1. / 1)) or false (- / + / *)
|
|
21
|
+
# start_number: Integer (0 for unordered)
|
|
22
|
+
# marker: String, the marker character (".", ")", "-", "+", "*")
|
|
23
|
+
# content: String, the body of the line as it should appear
|
|
24
|
+
# inside the item (may include leading whitespace
|
|
25
|
+
# when the marker was followed by 5+ spaces -- that
|
|
26
|
+
# is the indented-code form).
|
|
27
|
+
# content_start: Integer, byte offset into `text` where `content`
|
|
28
|
+
# begins. Always (leading + marker_width + 1) in
|
|
29
|
+
# absolute terms, regardless of spec form.
|
|
30
|
+
# content_indent: Integer, the spec's N -- the indent level all
|
|
31
|
+
# subsequent continuation lines must reach to stay
|
|
32
|
+
# inside this item.
|
|
33
|
+
def match(text)
|
|
34
|
+
# Fast reject before touching the regex engine: a list item is at
|
|
35
|
+
# most 3 leading spaces followed by a bullet (`* + -`) or a digit.
|
|
36
|
+
# This runs on every line, so bailing here avoids a MatchData (plus
|
|
37
|
+
# a `rest` substring and two marker-regex attempts) for the common
|
|
38
|
+
# non-list line.
|
|
39
|
+
i = 0
|
|
40
|
+
i += 1 while i < 3 && text.getbyte(i) == 0x20
|
|
41
|
+
c = text.getbyte(i)
|
|
42
|
+
return nil if c.nil?
|
|
43
|
+
return nil unless c == 0x2A || c == 0x2B || c == 0x2D || (c >= 0x30 && c <= 0x39)
|
|
44
|
+
|
|
45
|
+
m = /\A( {0,3})/.match(text)
|
|
46
|
+
leading = m[1].length
|
|
47
|
+
rest = text[leading..]
|
|
48
|
+
|
|
49
|
+
if (bm = /\A([*+-])(?:([ \t]+)(.*)|([ \t]*)\z)/.match(rest))
|
|
50
|
+
marker = bm[1]
|
|
51
|
+
if bm[2]
|
|
52
|
+
# `spaces_after` is column width, not byte length, so a tab
|
|
53
|
+
# after the marker is billed as the number of cols needed to
|
|
54
|
+
# reach the next tab stop (CommonMark Tabs section).
|
|
55
|
+
spaces_after = column_width(bm[2], leading + 1)
|
|
56
|
+
body = bm[3]
|
|
57
|
+
else
|
|
58
|
+
spaces_after = 0
|
|
59
|
+
body = ""
|
|
60
|
+
end
|
|
61
|
+
return build_match(leading, 1, marker, spaces_after, body,
|
|
62
|
+
ordered: false, start_number: 0)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
if (om = /\A(\d{1,9})([.)])(?:([ \t]+)(.*)|([ \t]*)\z)/.match(rest))
|
|
66
|
+
digits = om[1]
|
|
67
|
+
marker = om[2]
|
|
68
|
+
if om[3]
|
|
69
|
+
spaces_after = column_width(om[3], leading + digits.length + 1)
|
|
70
|
+
body = om[4]
|
|
71
|
+
else
|
|
72
|
+
spaces_after = 0
|
|
73
|
+
body = ""
|
|
74
|
+
end
|
|
75
|
+
return build_match(leading, digits.length + 1, marker, spaces_after, body,
|
|
76
|
+
ordered: true, start_number: digits.to_i)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def same_group?(expected, actual)
|
|
83
|
+
expected[:ordered] == actual[:ordered] && expected[:marker] == actual[:marker]
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# CommonMark spec: a list item can only interrupt an open paragraph
|
|
87
|
+
# if it has visible content, and (for ordered lists) only if the
|
|
88
|
+
# start number is 1.
|
|
89
|
+
def interrupts_paragraph?(li_match)
|
|
90
|
+
return false if li_match[:content].empty?
|
|
91
|
+
return false if li_match[:ordered] && li_match[:start_number] != 1
|
|
92
|
+
|
|
93
|
+
true
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Returns the column width of `whitespace` if it begins at the
|
|
97
|
+
# absolute column `start_col`, expanding tabs to the next tab stop
|
|
98
|
+
# of 4. `whitespace` must contain only 0x20/0x09 bytes.
|
|
99
|
+
def column_width(whitespace, start_col)
|
|
100
|
+
col = start_col
|
|
101
|
+
whitespace.each_byte do |b|
|
|
102
|
+
if b == 0x20
|
|
103
|
+
col += 1
|
|
104
|
+
elsif b == 0x09
|
|
105
|
+
col = ((col / 4) + 1) * 4
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
col - start_col
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def build_match(leading, marker_width, marker, spaces_after, body, ordered:, start_number:)
|
|
112
|
+
if body.empty?
|
|
113
|
+
# Marker followed by EOL: empty item content.
|
|
114
|
+
content_indent = leading + marker_width + 1
|
|
115
|
+
content = ""
|
|
116
|
+
elsif spaces_after >= 5
|
|
117
|
+
# Indented-code form: keep (spaces_after - 1) of the spaces in
|
|
118
|
+
# the content so the body of the item is recognised as an
|
|
119
|
+
# indented code block.
|
|
120
|
+
content_indent = leading + marker_width + 1
|
|
121
|
+
content = (" " * (spaces_after - 1)) + body
|
|
122
|
+
else
|
|
123
|
+
content_indent = leading + marker_width + spaces_after
|
|
124
|
+
content = body
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
{
|
|
128
|
+
ordered: ordered,
|
|
129
|
+
start_number: start_number,
|
|
130
|
+
marker: marker,
|
|
131
|
+
content: content,
|
|
132
|
+
content_start: leading + marker_width + 1,
|
|
133
|
+
content_indent: content_indent,
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Cached collaborator for BlockParser. A single instance is created
|
|
138
|
+
# in BlockParser#initialize and reused for every list (including
|
|
139
|
+
# nested ones) — the per-call state lives in method locals so
|
|
140
|
+
# reentrant `#parse` calls are safe.
|
|
141
|
+
class Parser
|
|
142
|
+
def initialize(block_parser)
|
|
143
|
+
@block_parser = block_parser
|
|
144
|
+
@arena = block_parser.arena
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def parse(parent_id, lines, index)
|
|
148
|
+
first_match = List.match(lines[index].content)
|
|
149
|
+
list_id = @arena.add_node(NodeType::LIST,
|
|
150
|
+
source_start: lines[index].start_byte,
|
|
151
|
+
source_len: 0,
|
|
152
|
+
int1: first_match[:ordered] ? 1 : 0,
|
|
153
|
+
int2: first_match[:start_number],
|
|
154
|
+
int3: 1,
|
|
155
|
+
str1: first_match[:marker])
|
|
156
|
+
@arena.append_child(parent_id, list_id)
|
|
157
|
+
start_byte = lines[index].start_byte
|
|
158
|
+
end_byte = lines[index].end_byte
|
|
159
|
+
loose = false
|
|
160
|
+
|
|
161
|
+
while index < lines.length
|
|
162
|
+
# Thematic break beats list-item continuation per CommonMark:
|
|
163
|
+
# a line like `* * *` ends the list and starts an <hr />.
|
|
164
|
+
break if @block_parser.thematic_break?(lines[index].content)
|
|
165
|
+
|
|
166
|
+
match = List.match(lines[index].content)
|
|
167
|
+
break unless match
|
|
168
|
+
break unless List.same_group?(first_match, match)
|
|
169
|
+
|
|
170
|
+
item_lines, index = collect_item(lines, index, match)
|
|
171
|
+
end_byte = item_lines.last.end_byte
|
|
172
|
+
item_id = @arena.add_node(NodeType::LIST_ITEM,
|
|
173
|
+
source_start: item_lines.first.start_byte,
|
|
174
|
+
source_len: item_lines.last.end_byte - item_lines.first.start_byte)
|
|
175
|
+
@arena.append_child(list_id, item_id)
|
|
176
|
+
# CommonMark: an item is loose when "two block-level elements
|
|
177
|
+
# with a blank line between them" appear at its top level.
|
|
178
|
+
# parse_lines reports that directly — a blank line followed by
|
|
179
|
+
# ANY block-level construct it processed at this scope. That
|
|
180
|
+
# captures cases the arena walk would miss (a ref-def after a
|
|
181
|
+
# blank line consumes a line but emits no arena child).
|
|
182
|
+
#
|
|
183
|
+
# NB: must NOT collapse into `loose ||= parse_lines(...)` — if
|
|
184
|
+
# `loose` is already true from a previous iteration, `||=`
|
|
185
|
+
# would skip the call and the item would never receive its
|
|
186
|
+
# children.
|
|
187
|
+
item_blank_between_blocks = @block_parser.parse_lines(item_id, item_lines, transformed: true)
|
|
188
|
+
loose = true if item_blank_between_blocks
|
|
189
|
+
|
|
190
|
+
blank_count = 0
|
|
191
|
+
while index < lines.length && lines[index].blank
|
|
192
|
+
blank_count += 1
|
|
193
|
+
index += 1
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
next unless blank_count.positive?
|
|
197
|
+
|
|
198
|
+
next_match = index < lines.length ? List.match(lines[index].content) : nil
|
|
199
|
+
if next_match && List.same_group?(first_match, next_match)
|
|
200
|
+
loose = true
|
|
201
|
+
else
|
|
202
|
+
# Rewind so the caller's parse_lines sees the blank line.
|
|
203
|
+
# When this parse was itself processing an item's
|
|
204
|
+
# continuation lines, the caller needs the blank to detect
|
|
205
|
+
# "blank between block-level elements" → loose-makes-item.
|
|
206
|
+
index -= blank_count
|
|
207
|
+
break
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
@arena.update_span(list_id, start_byte, end_byte)
|
|
212
|
+
@arena.update_int3(list_id, loose ? 0 : 1)
|
|
213
|
+
index
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
private
|
|
217
|
+
|
|
218
|
+
def collect_item(lines, index, match)
|
|
219
|
+
item_lines = []
|
|
220
|
+
n = match[:content_indent]
|
|
221
|
+
first_line = lines[index]
|
|
222
|
+
item_lines << Line.new(
|
|
223
|
+
match[:content],
|
|
224
|
+
first_line.start_byte + match[:content_start],
|
|
225
|
+
first_line.end_byte,
|
|
226
|
+
match[:content].strip.empty?,
|
|
227
|
+
)
|
|
228
|
+
index += 1
|
|
229
|
+
|
|
230
|
+
# If the marker line itself was empty (`-` followed by EOL) and
|
|
231
|
+
# the very next line is also blank, the item is empty and ends
|
|
232
|
+
# now. This matches CommonMark spec 5.2: an empty list item
|
|
233
|
+
# cannot grow by absorbing arbitrary blank lines.
|
|
234
|
+
if match[:content].strip.empty? && index < lines.length && lines[index].blank
|
|
235
|
+
return [item_lines, index]
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
pending_blanks = []
|
|
239
|
+
|
|
240
|
+
while index < lines.length
|
|
241
|
+
current = lines[index]
|
|
242
|
+
|
|
243
|
+
if current.blank
|
|
244
|
+
pending_blanks << Line.new("", current.start_byte, current.end_byte, true)
|
|
245
|
+
index += 1
|
|
246
|
+
next
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# CommonMark: continuation requires the line's leading
|
|
250
|
+
# whitespace to span at least `n` columns, with tabs expanding
|
|
251
|
+
# to multiples of 4.
|
|
252
|
+
if Indentation.leading_columns(current.content) >= n
|
|
253
|
+
item_lines.concat(pending_blanks)
|
|
254
|
+
pending_blanks = []
|
|
255
|
+
stripped_content = Indentation.strip_columns(current.content, n)
|
|
256
|
+
# When strip_columns synthesises spaces for a partially-
|
|
257
|
+
# consumed tab, the result can be longer than the original
|
|
258
|
+
# bytes — pretending we "consumed bytes" then yields a bogus
|
|
259
|
+
# negative offset. Keep start_byte at the original line
|
|
260
|
+
# start so downstream source-range arithmetic stays
|
|
261
|
+
# monotonic.
|
|
262
|
+
ws_bytes = Indentation.leading_ws_bytes(current.content)
|
|
263
|
+
start_advance = [ws_bytes, current.content.bytesize - stripped_content.bytesize].min
|
|
264
|
+
start_advance = 0 if start_advance.negative?
|
|
265
|
+
item_lines << Line.new(
|
|
266
|
+
stripped_content,
|
|
267
|
+
current.start_byte + start_advance,
|
|
268
|
+
current.end_byte,
|
|
269
|
+
false,
|
|
270
|
+
)
|
|
271
|
+
index += 1
|
|
272
|
+
next
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Less-indented non-blank line: a new list item (any group)
|
|
276
|
+
# ends this item regardless of paragraph state.
|
|
277
|
+
break if List.match(current.content)
|
|
278
|
+
|
|
279
|
+
# Otherwise it may be lazy paragraph continuation. Requires:
|
|
280
|
+
# - no pending blanks (blanks always end the paragraph that
|
|
281
|
+
# could've been continued)
|
|
282
|
+
# - the previous in-item line is non-blank paragraph content
|
|
283
|
+
# - the new line is not itself a block-level interrupter
|
|
284
|
+
if pending_blanks.empty? &&
|
|
285
|
+
item_lines.last && !item_lines.last.blank &&
|
|
286
|
+
!@block_parser.lazy_break?(lines, index)
|
|
287
|
+
# Lazy continuation lines are joined into the open paragraph;
|
|
288
|
+
# their leading indentation is dropped (CommonMark spec).
|
|
289
|
+
# The `lazy` flag tells parse_paragraph to absorb the line
|
|
290
|
+
# even when its stripped form would otherwise look like a
|
|
291
|
+
# fresh block start (e.g. ` - e` becoming `- e`).
|
|
292
|
+
stripped = current.content.sub(/\A[ \t]+/, "")
|
|
293
|
+
strip_len = current.content.length - stripped.length
|
|
294
|
+
item_lines << Line.new(
|
|
295
|
+
stripped,
|
|
296
|
+
current.start_byte + strip_len,
|
|
297
|
+
current.end_byte,
|
|
298
|
+
false,
|
|
299
|
+
true,
|
|
300
|
+
)
|
|
301
|
+
index += 1
|
|
302
|
+
next
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
break
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# If we stopped with held blanks (item ended at a less-indented
|
|
309
|
+
# line), rewind so #parse sees the blanks and decides loose vs
|
|
310
|
+
# tight.
|
|
311
|
+
index -= pending_blanks.length unless pending_blanks.empty?
|
|
312
|
+
|
|
313
|
+
[item_lines, index]
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
class NodeRef
|
|
5
|
+
include Enumerable
|
|
6
|
+
|
|
7
|
+
attr_reader :document, :node_id
|
|
8
|
+
|
|
9
|
+
def initialize(document, node_id)
|
|
10
|
+
@document = document
|
|
11
|
+
@arena = document.arena
|
|
12
|
+
@node_id = node_id
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def each(&)
|
|
16
|
+
walk(&)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def type
|
|
20
|
+
@arena.type_name(@node_id)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def children
|
|
24
|
+
@arena.child_ids(@node_id).map { |child_id| NodeRef.new(@document, child_id) }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def walk(&block)
|
|
28
|
+
return enum_for(:walk) unless block_given?
|
|
29
|
+
|
|
30
|
+
yield self
|
|
31
|
+
@arena.child_ids(@node_id).each do |child_id|
|
|
32
|
+
NodeRef.new(@document, child_id).walk(&block)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def text
|
|
37
|
+
first_child_id = @arena.raw_first_child_id(@node_id)
|
|
38
|
+
return @arena.text(@node_id) if first_child_id == -1
|
|
39
|
+
|
|
40
|
+
text = +""
|
|
41
|
+
@arena.child_ids(@node_id).each do |child_id|
|
|
42
|
+
child = NodeRef.new(@document, child_id)
|
|
43
|
+
fragment = child.text
|
|
44
|
+
text << fragment.to_s unless fragment.nil?
|
|
45
|
+
end
|
|
46
|
+
text
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def source_span
|
|
50
|
+
@arena.source_span(@node_id)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def find_all(type)
|
|
54
|
+
walk.select { |node| node.type == type }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def source_location
|
|
58
|
+
span = source_span
|
|
59
|
+
return nil unless span
|
|
60
|
+
|
|
61
|
+
start_loc = @document.source_map.line_column(span.start_byte)
|
|
62
|
+
end_loc = @document.source_map.line_column(span.end_byte)
|
|
63
|
+
|
|
64
|
+
{
|
|
65
|
+
start_line: start_loc[:line],
|
|
66
|
+
start_column: start_loc[:column],
|
|
67
|
+
end_line: end_loc[:line],
|
|
68
|
+
end_column: end_loc[:column],
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def to_h
|
|
73
|
+
ast = {
|
|
74
|
+
type: type,
|
|
75
|
+
source_span: source_span,
|
|
76
|
+
children: children.map(&:to_h),
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
attributes = ast_attributes
|
|
80
|
+
ast[:attributes] = attributes unless attributes.empty?
|
|
81
|
+
ast
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
def ast_attributes
|
|
87
|
+
case @arena.type(@node_id)
|
|
88
|
+
when NodeType::HEADING
|
|
89
|
+
{ level: @arena.int1(@node_id), text: text }
|
|
90
|
+
when NodeType::LIST
|
|
91
|
+
{
|
|
92
|
+
ordered: @arena.int1(@node_id) == 1,
|
|
93
|
+
start_number: @arena.int2(@node_id),
|
|
94
|
+
tight: @arena.int3(@node_id) == 1,
|
|
95
|
+
delimiter: @arena.str1(@node_id),
|
|
96
|
+
}
|
|
97
|
+
when NodeType::TABLE_ROW, NodeType::TABLE_CELL
|
|
98
|
+
{ header: @arena.int1(@node_id) == 1, text: text }
|
|
99
|
+
when NodeType::TEXT, NodeType::CODE_SPAN, NodeType::HTML_BLOCK, NodeType::HTML_INLINE, NodeType::PARAGRAPH
|
|
100
|
+
{ text: text }
|
|
101
|
+
when NodeType::CODE_BLOCK
|
|
102
|
+
{ text: @arena.text(@node_id), info: @arena.str2(@node_id) }
|
|
103
|
+
when NodeType::LINK, NodeType::IMAGE
|
|
104
|
+
{ destination: @arena.str1(@node_id), title: @arena.str2(@node_id), text: text }
|
|
105
|
+
when NodeType::FOOTNOTE_REFERENCE
|
|
106
|
+
{ label: @arena.str1(@node_id), number: @arena.int1(@node_id) }
|
|
107
|
+
when NodeType::FOOTNOTE_DEFINITION
|
|
108
|
+
{ label: @arena.str1(@node_id) }
|
|
109
|
+
else
|
|
110
|
+
{}
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
module NodeType
|
|
5
|
+
DOCUMENT = 1
|
|
6
|
+
PARAGRAPH = 10
|
|
7
|
+
HEADING = 11
|
|
8
|
+
THEMATIC_BREAK = 12
|
|
9
|
+
BLOCKQUOTE = 13
|
|
10
|
+
LIST = 14
|
|
11
|
+
LIST_ITEM = 15
|
|
12
|
+
CODE_BLOCK = 16
|
|
13
|
+
HTML_BLOCK = 17
|
|
14
|
+
TABLE = 18
|
|
15
|
+
TABLE_ROW = 19
|
|
16
|
+
TABLE_CELL = 20
|
|
17
|
+
FOOTNOTE_DEFINITION = 21
|
|
18
|
+
FOOTNOTES_SECTION = 22
|
|
19
|
+
|
|
20
|
+
TEXT = 100
|
|
21
|
+
SOFTBREAK = 101
|
|
22
|
+
HARDBREAK = 102
|
|
23
|
+
EMPHASIS = 103
|
|
24
|
+
STRONG = 104
|
|
25
|
+
CODE_SPAN = 105
|
|
26
|
+
LINK = 106
|
|
27
|
+
IMAGE = 107
|
|
28
|
+
HTML_INLINE = 109
|
|
29
|
+
STRIKETHROUGH = 111
|
|
30
|
+
FOOTNOTE_REFERENCE = 112
|
|
31
|
+
|
|
32
|
+
TYPE_NAMES = {
|
|
33
|
+
DOCUMENT => :document,
|
|
34
|
+
PARAGRAPH => :paragraph,
|
|
35
|
+
HEADING => :heading,
|
|
36
|
+
THEMATIC_BREAK => :thematic_break,
|
|
37
|
+
BLOCKQUOTE => :blockquote,
|
|
38
|
+
LIST => :list,
|
|
39
|
+
LIST_ITEM => :list_item,
|
|
40
|
+
CODE_BLOCK => :code_block,
|
|
41
|
+
HTML_BLOCK => :html_block,
|
|
42
|
+
TABLE => :table,
|
|
43
|
+
TABLE_ROW => :table_row,
|
|
44
|
+
TABLE_CELL => :table_cell,
|
|
45
|
+
FOOTNOTE_DEFINITION => :footnote_definition,
|
|
46
|
+
FOOTNOTES_SECTION => :footnotes_section,
|
|
47
|
+
TEXT => :text,
|
|
48
|
+
SOFTBREAK => :softbreak,
|
|
49
|
+
HARDBREAK => :hardbreak,
|
|
50
|
+
EMPHASIS => :emphasis,
|
|
51
|
+
STRONG => :strong,
|
|
52
|
+
CODE_SPAN => :code_span,
|
|
53
|
+
LINK => :link,
|
|
54
|
+
IMAGE => :image,
|
|
55
|
+
HTML_INLINE => :html_inline,
|
|
56
|
+
STRIKETHROUGH => :strikethrough,
|
|
57
|
+
FOOTNOTE_REFERENCE => :footnote_reference,
|
|
58
|
+
}.freeze
|
|
59
|
+
|
|
60
|
+
module_function
|
|
61
|
+
|
|
62
|
+
def name_for(type)
|
|
63
|
+
TYPE_NAMES.fetch(type)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|