red_quilt 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +109 -0
  4. data/.rubocop_todo.yml +7 -0
  5. data/CHANGELOG.md +57 -0
  6. data/README.md +284 -0
  7. data/Rakefile +8 -0
  8. data/ast-spec.md +1227 -0
  9. data/docs/architecture.md +81 -0
  10. data/docs/arena-usage.md +363 -0
  11. data/docs/commonmark-conformance.md +241 -0
  12. data/exe/redquilt +7 -0
  13. data/lib/red_quilt/arena.rb +366 -0
  14. data/lib/red_quilt/block_parser.rb +724 -0
  15. data/lib/red_quilt/blockquote.rb +151 -0
  16. data/lib/red_quilt/cli.rb +182 -0
  17. data/lib/red_quilt/diagnostic.rb +47 -0
  18. data/lib/red_quilt/document.rb +126 -0
  19. data/lib/red_quilt/extended_autolink_pass.rb +185 -0
  20. data/lib/red_quilt/footnote_definition.rb +147 -0
  21. data/lib/red_quilt/footnote_pass.rb +39 -0
  22. data/lib/red_quilt/footnote_registry.rb +68 -0
  23. data/lib/red_quilt/indentation.rb +73 -0
  24. data/lib/red_quilt/inline/builder.rb +674 -0
  25. data/lib/red_quilt/inline/flanking.rb +120 -0
  26. data/lib/red_quilt/inline/html_entities.rb +2180 -0
  27. data/lib/red_quilt/inline/lexer.rb +280 -0
  28. data/lib/red_quilt/inline/link_scanner.rb +315 -0
  29. data/lib/red_quilt/inline/token_kind.rb +39 -0
  30. data/lib/red_quilt/inline/tokens.rb +73 -0
  31. data/lib/red_quilt/inline.rb +34 -0
  32. data/lib/red_quilt/inline_pass.rb +53 -0
  33. data/lib/red_quilt/line.rb +14 -0
  34. data/lib/red_quilt/lint_pass.rb +71 -0
  35. data/lib/red_quilt/list.rb +317 -0
  36. data/lib/red_quilt/node_ref.rb +114 -0
  37. data/lib/red_quilt/node_type.rb +66 -0
  38. data/lib/red_quilt/plain_text.rb +46 -0
  39. data/lib/red_quilt/reference_definition.rb +309 -0
  40. data/lib/red_quilt/renderer/html.rb +279 -0
  41. data/lib/red_quilt/renderer/mdast.rb +152 -0
  42. data/lib/red_quilt/source_map.rb +29 -0
  43. data/lib/red_quilt/source_span.rb +26 -0
  44. data/lib/red_quilt/theme.rb +28 -0
  45. data/lib/red_quilt/themes/default.css +87 -0
  46. data/lib/red_quilt/version.rb +5 -0
  47. data/lib/red_quilt.rb +86 -0
  48. data/mise.toml +2 -0
  49. data/sig/red_quilt.rbs +45 -0
  50. metadata +91 -0
data/exe/redquilt ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "red_quilt"
5
+ require "red_quilt/cli"
6
+
7
+ exit RedQuilt::CLI.run(ARGV)
@@ -0,0 +1,366 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # Parallel-array storage for AST nodes.
5
+ #
6
+ # Each node has a single integer id (its position in the columns).
7
+ # All structural and payload fields are stored as columns keyed by id:
8
+ #
9
+ # structural parent, first_child, last_child, next_sibling, prev_sibling
10
+ # source span source_start (byte offset, -1 = no span), source_len
11
+ # payload int1..int3, str1, str2 (per-NodeType conventions)
12
+ #
13
+ # Invariants:
14
+ # - id starts at 0 and grows monotonically with #add_node. Ids are
15
+ # never reassigned and never reclaimed; a #detach'ed node keeps its
16
+ # row in the columns and stays addressable (its parent / siblings
17
+ # are reset to NO_NODE) but is no longer reachable from the tree.
18
+ # This means the arena's memory is monotone non-decreasing for the
19
+ # lifetime of a parse — a deliberate trade for allocation simplicity.
20
+ # - NO_NODE (= -1) is the sentinel value used for "no parent",
21
+ # "no sibling", and as a source_start to mean "this node has no
22
+ # span; its content is materialized in str1 instead".
23
+ # - @source is the original document string. It is treated as
24
+ # immutable: callers must not mutate it after constructing the
25
+ # arena, since byteslice positions stored in source_start/source_len
26
+ # refer to it directly.
27
+ class Arena
28
+ NO_NODE = -1
29
+
30
+ # Raised by #check_integrity! when a structural invariant is violated.
31
+ class IntegrityError < StandardError; end
32
+
33
+ attr_reader :source
34
+
35
+ def initialize(source)
36
+ @source = source
37
+ @type = []
38
+ @parent = []
39
+ @first_child = []
40
+ @last_child = []
41
+ @next_sibling = []
42
+ @prev_sibling = []
43
+ @source_start = []
44
+ @source_len = []
45
+ @int1 = []
46
+ @int2 = []
47
+ @int3 = []
48
+ @str1 = []
49
+ @str2 = []
50
+ end
51
+
52
+ # Appends a fresh node to the arena and returns its id.
53
+ # The node starts detached (parent = first_child = ... = NO_NODE).
54
+ def add_node(type, source_start: NO_NODE, source_len: 0, int1: 0, int2: 0, int3: 0, str1: nil, str2: nil)
55
+ id = @type.length
56
+ @type[id] = type
57
+ @parent[id] = NO_NODE
58
+ @first_child[id] = NO_NODE
59
+ @last_child[id] = NO_NODE
60
+ @next_sibling[id] = NO_NODE
61
+ @prev_sibling[id] = NO_NODE
62
+ @source_start[id] = source_start
63
+ @source_len[id] = source_len
64
+ @int1[id] = int1
65
+ @int2[id] = int2
66
+ @int3[id] = int3
67
+ @str1[id] = str1
68
+ @str2[id] = str2
69
+ id
70
+ end
71
+
72
+ def append_child(parent_id, child_id)
73
+ @parent[child_id] = parent_id
74
+ if @first_child[parent_id] == NO_NODE
75
+ @first_child[parent_id] = child_id
76
+ @last_child[parent_id] = child_id
77
+ else
78
+ last = @last_child[parent_id]
79
+ @next_sibling[last] = child_id
80
+ @prev_sibling[child_id] = last
81
+ @last_child[parent_id] = child_id
82
+ end
83
+ child_id
84
+ end
85
+
86
+ # Inserts new_id immediately before ref_id in parent_id's child list.
87
+ def insert_before(parent_id, ref_id, new_id)
88
+ @parent[new_id] = parent_id
89
+ prev_ref = @prev_sibling[ref_id]
90
+ @prev_sibling[new_id] = prev_ref
91
+ @next_sibling[new_id] = ref_id
92
+ @prev_sibling[ref_id] = new_id
93
+ if prev_ref == NO_NODE
94
+ @first_child[parent_id] = new_id
95
+ else
96
+ @next_sibling[prev_ref] = new_id
97
+ end
98
+ new_id
99
+ end
100
+
101
+ # Removes child_id from its current parent. The node's row stays in
102
+ # the arena (its payload columns are untouched) but parent / siblings
103
+ # are reset to NO_NODE, so the node is no longer reachable through
104
+ # any tree walk. Detached rows are not reused by subsequent
105
+ # #add_node calls.
106
+ def detach(child_id)
107
+ parent_id = @parent[child_id]
108
+ prev_id = @prev_sibling[child_id]
109
+ next_id = @next_sibling[child_id]
110
+
111
+ if prev_id == NO_NODE
112
+ @first_child[parent_id] = next_id
113
+ else
114
+ @next_sibling[prev_id] = next_id
115
+ end
116
+
117
+ if next_id == NO_NODE
118
+ @last_child[parent_id] = prev_id
119
+ else
120
+ @prev_sibling[next_id] = prev_id
121
+ end
122
+
123
+ @parent[child_id] = NO_NODE
124
+ @prev_sibling[child_id] = NO_NODE
125
+ @next_sibling[child_id] = NO_NODE
126
+ child_id
127
+ end
128
+
129
+ # Moves a contiguous sibling range [first_id .. last_id] (both
130
+ # inclusive, walking #next_sibling from first to last) under
131
+ # new_parent_id, replacing any existing children there. The walk
132
+ # assumes the range is well-formed; passing nodes from different
133
+ # parents or a last_id not reachable from first_id is undefined
134
+ # behavior.
135
+ def reparent(new_parent_id, first_id, last_id)
136
+ return if first_id == NO_NODE || last_id == NO_NODE
137
+
138
+ original_parent = @parent[first_id]
139
+ prev_of_first = @prev_sibling[first_id]
140
+ next_of_last = @next_sibling[last_id]
141
+
142
+ if prev_of_first == NO_NODE
143
+ @first_child[original_parent] = next_of_last
144
+ else
145
+ @next_sibling[prev_of_first] = next_of_last
146
+ end
147
+
148
+ if next_of_last == NO_NODE
149
+ @last_child[original_parent] = prev_of_first
150
+ else
151
+ @prev_sibling[next_of_last] = prev_of_first
152
+ end
153
+
154
+ @prev_sibling[first_id] = NO_NODE
155
+ @next_sibling[last_id] = NO_NODE
156
+
157
+ id = first_id
158
+ loop do
159
+ @parent[id] = new_parent_id
160
+ break if id == last_id
161
+
162
+ id = @next_sibling[id]
163
+ end
164
+
165
+ @first_child[new_parent_id] = first_id
166
+ @last_child[new_parent_id] = last_id
167
+ end
168
+
169
+ def type(id)
170
+ @type[id]
171
+ end
172
+
173
+ def type_name(id)
174
+ NodeType.name_for(@type[id])
175
+ end
176
+
177
+ # Structural id accessors. The `raw_` prefix flags that these return
178
+ # raw column values that may be the NO_NODE sentinel, and the
179
+ # `_id` suffix flags that the returned integer is a node id
180
+ # (suitable for feeding back into other Arena methods).
181
+ def raw_parent_id(id)
182
+ @parent[id]
183
+ end
184
+
185
+ def raw_first_child_id(id)
186
+ @first_child[id]
187
+ end
188
+
189
+ def raw_last_child_id(id)
190
+ @last_child[id]
191
+ end
192
+
193
+ def raw_next_sibling_id(id)
194
+ @next_sibling[id]
195
+ end
196
+
197
+ def raw_prev_sibling_id(id)
198
+ @prev_sibling[id]
199
+ end
200
+
201
+ def source_start(id)
202
+ @source_start[id]
203
+ end
204
+
205
+ def source_len(id)
206
+ @source_len[id]
207
+ end
208
+
209
+ # Byte offset one past the node's source span (start + len).
210
+ def source_end(id)
211
+ @source_start[id] + @source_len[id]
212
+ end
213
+
214
+ def int1(id)
215
+ @int1[id]
216
+ end
217
+
218
+ def int2(id)
219
+ @int2[id]
220
+ end
221
+
222
+ def int3(id)
223
+ @int3[id]
224
+ end
225
+
226
+ def str1(id)
227
+ @str1[id]
228
+ end
229
+
230
+ def str2(id)
231
+ @str2[id]
232
+ end
233
+
234
+ # Returns a SourceSpan for the node, or nil when the node has no
235
+ # span (source_start < 0, meaning the content is held in str1).
236
+ def source_span(id)
237
+ start_byte = @source_start[id]
238
+ return nil if start_byte.nil? || start_byte.negative?
239
+
240
+ SourceSpan.new(start_byte, start_byte + @source_len[id])
241
+ end
242
+
243
+ # Returns the node's textual content. Prefers str1 (the literal
244
+ # form, e.g. an entity decoded to its character, or a reassembled
245
+ # blockquote line). Falls back to a byteslice of @source when only
246
+ # a span is recorded. Returns nil if neither is available.
247
+ def text(id)
248
+ literal = @str1[id]
249
+ return literal unless literal.nil?
250
+
251
+ start_byte = @source_start[id]
252
+ return nil if start_byte.nil? || start_byte.negative?
253
+
254
+ @source.byteslice(start_byte, @source_len[id])
255
+ end
256
+
257
+ # Yields each child id of `id` in order. Block form is preferred
258
+ # over #child_ids on hot paths (renderer, builder) because it
259
+ # avoids the Enumerator allocation.
260
+ def each_child(id)
261
+ child_id = @first_child[id]
262
+ until child_id == NO_NODE
263
+ yield child_id
264
+ child_id = @next_sibling[child_id]
265
+ end
266
+ self
267
+ end
268
+
269
+ # Returns an Enumerator yielding each child id. Kept for the
270
+ # external NodeRef API where Enumerator chaining (map, select, ...)
271
+ # is convenient.
272
+ def child_ids(id)
273
+ Enumerator.new do |yielder|
274
+ child_id = @first_child[id]
275
+ until child_id == NO_NODE
276
+ yielder << child_id
277
+ child_id = @next_sibling[child_id]
278
+ end
279
+ end
280
+ end
281
+
282
+ def update_str1(id, value)
283
+ @str1[id] = value
284
+ end
285
+
286
+ def update_int3(id, value)
287
+ @int3[id] = value
288
+ end
289
+
290
+ def update_span(id, start_byte, end_byte)
291
+ @source_start[id] = start_byte
292
+ @source_len[id] = end_byte - start_byte
293
+ end
294
+
295
+ # Verifies the structural invariants of the tree rooted at root_id.
296
+ # Raises IntegrityError on the first violation, including the
297
+ # offending node id(s) and a description of the broken rule.
298
+ #
299
+ # Checked invariants:
300
+ # - root has parent = NO_NODE
301
+ # - for every reachable node `n` and its first_child / last_child
302
+ # `fc` / `lc`:
303
+ # * fc and lc are both NO_NODE, or both not NO_NODE
304
+ # * walking next_sibling from fc reaches lc and only lc
305
+ # * for each child `c`, @parent[c] == n
306
+ # * for each child `c`, @prev_sibling[c] equals the previously
307
+ # visited sibling (or NO_NODE for the first)
308
+ # - no node is reached twice (no shared subtrees, no cycles)
309
+ #
310
+ # Intended for development / debugging. Not called by the production
311
+ # parse / render path.
312
+ def check_integrity!(root_id)
313
+ raise IntegrityError, "root_id #{root_id} has no row" if root_id >= @type.length
314
+ if @parent[root_id] != NO_NODE
315
+ raise IntegrityError, "root #{root_id} has non-NO_NODE parent #{@parent[root_id]}"
316
+ end
317
+
318
+ visited = {}
319
+ walk_for_integrity(root_id, NO_NODE, visited)
320
+ self
321
+ end
322
+
323
+ private
324
+
325
+ def walk_for_integrity(id, expected_parent_id, visited)
326
+ if visited[id]
327
+ raise IntegrityError, "node #{id} reached twice (cycle or shared subtree)"
328
+ end
329
+
330
+ visited[id] = true
331
+
332
+ actual_parent = @parent[id]
333
+ if actual_parent != expected_parent_id
334
+ raise IntegrityError,
335
+ "node #{id} parent mismatch: expected #{expected_parent_id}, got #{actual_parent}"
336
+ end
337
+
338
+ fc = @first_child[id]
339
+ lc = @last_child[id]
340
+ if (fc == NO_NODE) != (lc == NO_NODE)
341
+ raise IntegrityError,
342
+ "node #{id} first_child=#{fc} but last_child=#{lc} (one is NO_NODE, the other isn't)"
343
+ end
344
+ return if fc == NO_NODE
345
+
346
+ prev_seen = NO_NODE
347
+ child_id = fc
348
+ tail = NO_NODE
349
+ until child_id == NO_NODE
350
+ if @prev_sibling[child_id] != prev_seen
351
+ raise IntegrityError,
352
+ "node #{child_id} prev_sibling=#{@prev_sibling[child_id]} but previous in chain was #{prev_seen}"
353
+ end
354
+ walk_for_integrity(child_id, id, visited)
355
+ prev_seen = child_id
356
+ tail = child_id
357
+ child_id = @next_sibling[child_id]
358
+ end
359
+
360
+ if tail != lc
361
+ raise IntegrityError,
362
+ "node #{id} last_child=#{lc} but sibling chain from first_child=#{fc} ends at #{tail}"
363
+ end
364
+ end
365
+ end
366
+ end