red_quilt 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +109 -0
- data/.rubocop_todo.yml +7 -0
- data/CHANGELOG.md +57 -0
- data/README.md +284 -0
- data/Rakefile +8 -0
- data/ast-spec.md +1227 -0
- data/docs/architecture.md +81 -0
- data/docs/arena-usage.md +363 -0
- data/docs/commonmark-conformance.md +241 -0
- data/exe/redquilt +7 -0
- data/lib/red_quilt/arena.rb +366 -0
- data/lib/red_quilt/block_parser.rb +724 -0
- data/lib/red_quilt/blockquote.rb +151 -0
- data/lib/red_quilt/cli.rb +182 -0
- data/lib/red_quilt/diagnostic.rb +47 -0
- data/lib/red_quilt/document.rb +126 -0
- data/lib/red_quilt/extended_autolink_pass.rb +185 -0
- data/lib/red_quilt/footnote_definition.rb +147 -0
- data/lib/red_quilt/footnote_pass.rb +39 -0
- data/lib/red_quilt/footnote_registry.rb +68 -0
- data/lib/red_quilt/indentation.rb +73 -0
- data/lib/red_quilt/inline/builder.rb +674 -0
- data/lib/red_quilt/inline/flanking.rb +120 -0
- data/lib/red_quilt/inline/html_entities.rb +2180 -0
- data/lib/red_quilt/inline/lexer.rb +280 -0
- data/lib/red_quilt/inline/link_scanner.rb +315 -0
- data/lib/red_quilt/inline/token_kind.rb +39 -0
- data/lib/red_quilt/inline/tokens.rb +73 -0
- data/lib/red_quilt/inline.rb +34 -0
- data/lib/red_quilt/inline_pass.rb +53 -0
- data/lib/red_quilt/line.rb +14 -0
- data/lib/red_quilt/lint_pass.rb +71 -0
- data/lib/red_quilt/list.rb +317 -0
- data/lib/red_quilt/node_ref.rb +114 -0
- data/lib/red_quilt/node_type.rb +66 -0
- data/lib/red_quilt/plain_text.rb +46 -0
- data/lib/red_quilt/reference_definition.rb +309 -0
- data/lib/red_quilt/renderer/html.rb +279 -0
- data/lib/red_quilt/renderer/mdast.rb +152 -0
- data/lib/red_quilt/source_map.rb +29 -0
- data/lib/red_quilt/source_span.rb +26 -0
- data/lib/red_quilt/theme.rb +28 -0
- data/lib/red_quilt/themes/default.css +87 -0
- data/lib/red_quilt/version.rb +5 -0
- data/lib/red_quilt.rb +86 -0
- data/mise.toml +2 -0
- data/sig/red_quilt.rbs +45 -0
- metadata +91 -0
data/exe/redquilt
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# Parallel-array storage for AST nodes.
|
|
5
|
+
#
|
|
6
|
+
# Each node has a single integer id (its position in the columns).
|
|
7
|
+
# All structural and payload fields are stored as columns keyed by id:
|
|
8
|
+
#
|
|
9
|
+
# structural parent, first_child, last_child, next_sibling, prev_sibling
|
|
10
|
+
# source span source_start (byte offset, -1 = no span), source_len
|
|
11
|
+
# payload int1..int3, str1, str2 (per-NodeType conventions)
|
|
12
|
+
#
|
|
13
|
+
# Invariants:
|
|
14
|
+
# - id starts at 0 and grows monotonically with #add_node. Ids are
|
|
15
|
+
# never reassigned and never reclaimed; a #detach'ed node keeps its
|
|
16
|
+
# row in the columns and stays addressable (its parent / siblings
|
|
17
|
+
# are reset to NO_NODE) but is no longer reachable from the tree.
|
|
18
|
+
# This means the arena's memory is monotone non-decreasing for the
|
|
19
|
+
# lifetime of a parse — a deliberate trade for allocation simplicity.
|
|
20
|
+
# - NO_NODE (= -1) is the sentinel value used for "no parent",
|
|
21
|
+
# "no sibling", and as a source_start to mean "this node has no
|
|
22
|
+
# span; its content is materialized in str1 instead".
|
|
23
|
+
# - @source is the original document string. It is treated as
|
|
24
|
+
# immutable: callers must not mutate it after constructing the
|
|
25
|
+
# arena, since byteslice positions stored in source_start/source_len
|
|
26
|
+
# refer to it directly.
|
|
27
|
+
class Arena
|
|
28
|
+
NO_NODE = -1
|
|
29
|
+
|
|
30
|
+
# Raised by #check_integrity! when a structural invariant is violated.
|
|
31
|
+
class IntegrityError < StandardError; end
|
|
32
|
+
|
|
33
|
+
attr_reader :source
|
|
34
|
+
|
|
35
|
+
def initialize(source)
|
|
36
|
+
@source = source
|
|
37
|
+
@type = []
|
|
38
|
+
@parent = []
|
|
39
|
+
@first_child = []
|
|
40
|
+
@last_child = []
|
|
41
|
+
@next_sibling = []
|
|
42
|
+
@prev_sibling = []
|
|
43
|
+
@source_start = []
|
|
44
|
+
@source_len = []
|
|
45
|
+
@int1 = []
|
|
46
|
+
@int2 = []
|
|
47
|
+
@int3 = []
|
|
48
|
+
@str1 = []
|
|
49
|
+
@str2 = []
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Appends a fresh node to the arena and returns its id.
|
|
53
|
+
# The node starts detached (parent = first_child = ... = NO_NODE).
|
|
54
|
+
def add_node(type, source_start: NO_NODE, source_len: 0, int1: 0, int2: 0, int3: 0, str1: nil, str2: nil)
|
|
55
|
+
id = @type.length
|
|
56
|
+
@type[id] = type
|
|
57
|
+
@parent[id] = NO_NODE
|
|
58
|
+
@first_child[id] = NO_NODE
|
|
59
|
+
@last_child[id] = NO_NODE
|
|
60
|
+
@next_sibling[id] = NO_NODE
|
|
61
|
+
@prev_sibling[id] = NO_NODE
|
|
62
|
+
@source_start[id] = source_start
|
|
63
|
+
@source_len[id] = source_len
|
|
64
|
+
@int1[id] = int1
|
|
65
|
+
@int2[id] = int2
|
|
66
|
+
@int3[id] = int3
|
|
67
|
+
@str1[id] = str1
|
|
68
|
+
@str2[id] = str2
|
|
69
|
+
id
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def append_child(parent_id, child_id)
|
|
73
|
+
@parent[child_id] = parent_id
|
|
74
|
+
if @first_child[parent_id] == NO_NODE
|
|
75
|
+
@first_child[parent_id] = child_id
|
|
76
|
+
@last_child[parent_id] = child_id
|
|
77
|
+
else
|
|
78
|
+
last = @last_child[parent_id]
|
|
79
|
+
@next_sibling[last] = child_id
|
|
80
|
+
@prev_sibling[child_id] = last
|
|
81
|
+
@last_child[parent_id] = child_id
|
|
82
|
+
end
|
|
83
|
+
child_id
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Inserts new_id immediately before ref_id in parent_id's child list.
|
|
87
|
+
def insert_before(parent_id, ref_id, new_id)
|
|
88
|
+
@parent[new_id] = parent_id
|
|
89
|
+
prev_ref = @prev_sibling[ref_id]
|
|
90
|
+
@prev_sibling[new_id] = prev_ref
|
|
91
|
+
@next_sibling[new_id] = ref_id
|
|
92
|
+
@prev_sibling[ref_id] = new_id
|
|
93
|
+
if prev_ref == NO_NODE
|
|
94
|
+
@first_child[parent_id] = new_id
|
|
95
|
+
else
|
|
96
|
+
@next_sibling[prev_ref] = new_id
|
|
97
|
+
end
|
|
98
|
+
new_id
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Removes child_id from its current parent. The node's row stays in
|
|
102
|
+
# the arena (its payload columns are untouched) but parent / siblings
|
|
103
|
+
# are reset to NO_NODE, so the node is no longer reachable through
|
|
104
|
+
# any tree walk. Detached rows are not reused by subsequent
|
|
105
|
+
# #add_node calls.
|
|
106
|
+
def detach(child_id)
|
|
107
|
+
parent_id = @parent[child_id]
|
|
108
|
+
prev_id = @prev_sibling[child_id]
|
|
109
|
+
next_id = @next_sibling[child_id]
|
|
110
|
+
|
|
111
|
+
if prev_id == NO_NODE
|
|
112
|
+
@first_child[parent_id] = next_id
|
|
113
|
+
else
|
|
114
|
+
@next_sibling[prev_id] = next_id
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
if next_id == NO_NODE
|
|
118
|
+
@last_child[parent_id] = prev_id
|
|
119
|
+
else
|
|
120
|
+
@prev_sibling[next_id] = prev_id
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
@parent[child_id] = NO_NODE
|
|
124
|
+
@prev_sibling[child_id] = NO_NODE
|
|
125
|
+
@next_sibling[child_id] = NO_NODE
|
|
126
|
+
child_id
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Moves a contiguous sibling range [first_id .. last_id] (both
|
|
130
|
+
# inclusive, walking #next_sibling from first to last) under
|
|
131
|
+
# new_parent_id, replacing any existing children there. The walk
|
|
132
|
+
# assumes the range is well-formed; passing nodes from different
|
|
133
|
+
# parents or a last_id not reachable from first_id is undefined
|
|
134
|
+
# behavior.
|
|
135
|
+
def reparent(new_parent_id, first_id, last_id)
|
|
136
|
+
return if first_id == NO_NODE || last_id == NO_NODE
|
|
137
|
+
|
|
138
|
+
original_parent = @parent[first_id]
|
|
139
|
+
prev_of_first = @prev_sibling[first_id]
|
|
140
|
+
next_of_last = @next_sibling[last_id]
|
|
141
|
+
|
|
142
|
+
if prev_of_first == NO_NODE
|
|
143
|
+
@first_child[original_parent] = next_of_last
|
|
144
|
+
else
|
|
145
|
+
@next_sibling[prev_of_first] = next_of_last
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
if next_of_last == NO_NODE
|
|
149
|
+
@last_child[original_parent] = prev_of_first
|
|
150
|
+
else
|
|
151
|
+
@prev_sibling[next_of_last] = prev_of_first
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
@prev_sibling[first_id] = NO_NODE
|
|
155
|
+
@next_sibling[last_id] = NO_NODE
|
|
156
|
+
|
|
157
|
+
id = first_id
|
|
158
|
+
loop do
|
|
159
|
+
@parent[id] = new_parent_id
|
|
160
|
+
break if id == last_id
|
|
161
|
+
|
|
162
|
+
id = @next_sibling[id]
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
@first_child[new_parent_id] = first_id
|
|
166
|
+
@last_child[new_parent_id] = last_id
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def type(id)
|
|
170
|
+
@type[id]
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def type_name(id)
|
|
174
|
+
NodeType.name_for(@type[id])
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Structural id accessors. The `raw_` prefix flags that these return
|
|
178
|
+
# raw column values that may be the NO_NODE sentinel, and the
|
|
179
|
+
# `_id` suffix flags that the returned integer is a node id
|
|
180
|
+
# (suitable for feeding back into other Arena methods).
|
|
181
|
+
def raw_parent_id(id)
|
|
182
|
+
@parent[id]
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def raw_first_child_id(id)
|
|
186
|
+
@first_child[id]
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def raw_last_child_id(id)
|
|
190
|
+
@last_child[id]
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def raw_next_sibling_id(id)
|
|
194
|
+
@next_sibling[id]
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def raw_prev_sibling_id(id)
|
|
198
|
+
@prev_sibling[id]
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def source_start(id)
|
|
202
|
+
@source_start[id]
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def source_len(id)
|
|
206
|
+
@source_len[id]
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Byte offset one past the node's source span (start + len).
|
|
210
|
+
def source_end(id)
|
|
211
|
+
@source_start[id] + @source_len[id]
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def int1(id)
|
|
215
|
+
@int1[id]
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def int2(id)
|
|
219
|
+
@int2[id]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def int3(id)
|
|
223
|
+
@int3[id]
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def str1(id)
|
|
227
|
+
@str1[id]
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def str2(id)
|
|
231
|
+
@str2[id]
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Returns a SourceSpan for the node, or nil when the node has no
|
|
235
|
+
# span (source_start < 0, meaning the content is held in str1).
|
|
236
|
+
def source_span(id)
|
|
237
|
+
start_byte = @source_start[id]
|
|
238
|
+
return nil if start_byte.nil? || start_byte.negative?
|
|
239
|
+
|
|
240
|
+
SourceSpan.new(start_byte, start_byte + @source_len[id])
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Returns the node's textual content. Prefers str1 (the literal
|
|
244
|
+
# form, e.g. an entity decoded to its character, or a reassembled
|
|
245
|
+
# blockquote line). Falls back to a byteslice of @source when only
|
|
246
|
+
# a span is recorded. Returns nil if neither is available.
|
|
247
|
+
def text(id)
|
|
248
|
+
literal = @str1[id]
|
|
249
|
+
return literal unless literal.nil?
|
|
250
|
+
|
|
251
|
+
start_byte = @source_start[id]
|
|
252
|
+
return nil if start_byte.nil? || start_byte.negative?
|
|
253
|
+
|
|
254
|
+
@source.byteslice(start_byte, @source_len[id])
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Yields each child id of `id` in order. Block form is preferred
|
|
258
|
+
# over #child_ids on hot paths (renderer, builder) because it
|
|
259
|
+
# avoids the Enumerator allocation.
|
|
260
|
+
def each_child(id)
|
|
261
|
+
child_id = @first_child[id]
|
|
262
|
+
until child_id == NO_NODE
|
|
263
|
+
yield child_id
|
|
264
|
+
child_id = @next_sibling[child_id]
|
|
265
|
+
end
|
|
266
|
+
self
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Returns an Enumerator yielding each child id. Kept for the
|
|
270
|
+
# external NodeRef API where Enumerator chaining (map, select, ...)
|
|
271
|
+
# is convenient.
|
|
272
|
+
def child_ids(id)
|
|
273
|
+
Enumerator.new do |yielder|
|
|
274
|
+
child_id = @first_child[id]
|
|
275
|
+
until child_id == NO_NODE
|
|
276
|
+
yielder << child_id
|
|
277
|
+
child_id = @next_sibling[child_id]
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def update_str1(id, value)
|
|
283
|
+
@str1[id] = value
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def update_int3(id, value)
|
|
287
|
+
@int3[id] = value
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def update_span(id, start_byte, end_byte)
|
|
291
|
+
@source_start[id] = start_byte
|
|
292
|
+
@source_len[id] = end_byte - start_byte
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Verifies the structural invariants of the tree rooted at root_id.
|
|
296
|
+
# Raises IntegrityError on the first violation, including the
|
|
297
|
+
# offending node id(s) and a description of the broken rule.
|
|
298
|
+
#
|
|
299
|
+
# Checked invariants:
|
|
300
|
+
# - root has parent = NO_NODE
|
|
301
|
+
# - for every reachable node `n` and its first_child / last_child
|
|
302
|
+
# `fc` / `lc`:
|
|
303
|
+
# * fc and lc are both NO_NODE, or both not NO_NODE
|
|
304
|
+
# * walking next_sibling from fc reaches lc and only lc
|
|
305
|
+
# * for each child `c`, @parent[c] == n
|
|
306
|
+
# * for each child `c`, @prev_sibling[c] equals the previously
|
|
307
|
+
# visited sibling (or NO_NODE for the first)
|
|
308
|
+
# - no node is reached twice (no shared subtrees, no cycles)
|
|
309
|
+
#
|
|
310
|
+
# Intended for development / debugging. Not called by the production
|
|
311
|
+
# parse / render path.
|
|
312
|
+
def check_integrity!(root_id)
|
|
313
|
+
raise IntegrityError, "root_id #{root_id} has no row" if root_id >= @type.length
|
|
314
|
+
if @parent[root_id] != NO_NODE
|
|
315
|
+
raise IntegrityError, "root #{root_id} has non-NO_NODE parent #{@parent[root_id]}"
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
visited = {}
|
|
319
|
+
walk_for_integrity(root_id, NO_NODE, visited)
|
|
320
|
+
self
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
private
|
|
324
|
+
|
|
325
|
+
def walk_for_integrity(id, expected_parent_id, visited)
|
|
326
|
+
if visited[id]
|
|
327
|
+
raise IntegrityError, "node #{id} reached twice (cycle or shared subtree)"
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
visited[id] = true
|
|
331
|
+
|
|
332
|
+
actual_parent = @parent[id]
|
|
333
|
+
if actual_parent != expected_parent_id
|
|
334
|
+
raise IntegrityError,
|
|
335
|
+
"node #{id} parent mismatch: expected #{expected_parent_id}, got #{actual_parent}"
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
fc = @first_child[id]
|
|
339
|
+
lc = @last_child[id]
|
|
340
|
+
if (fc == NO_NODE) != (lc == NO_NODE)
|
|
341
|
+
raise IntegrityError,
|
|
342
|
+
"node #{id} first_child=#{fc} but last_child=#{lc} (one is NO_NODE, the other isn't)"
|
|
343
|
+
end
|
|
344
|
+
return if fc == NO_NODE
|
|
345
|
+
|
|
346
|
+
prev_seen = NO_NODE
|
|
347
|
+
child_id = fc
|
|
348
|
+
tail = NO_NODE
|
|
349
|
+
until child_id == NO_NODE
|
|
350
|
+
if @prev_sibling[child_id] != prev_seen
|
|
351
|
+
raise IntegrityError,
|
|
352
|
+
"node #{child_id} prev_sibling=#{@prev_sibling[child_id]} but previous in chain was #{prev_seen}"
|
|
353
|
+
end
|
|
354
|
+
walk_for_integrity(child_id, id, visited)
|
|
355
|
+
prev_seen = child_id
|
|
356
|
+
tail = child_id
|
|
357
|
+
child_id = @next_sibling[child_id]
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
if tail != lc
|
|
361
|
+
raise IntegrityError,
|
|
362
|
+
"node #{id} last_child=#{lc} but sibling chain from first_child=#{fc} ends at #{tail}"
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
end
|
|
366
|
+
end
|