red_quilt 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +109 -0
  4. data/.rubocop_todo.yml +7 -0
  5. data/CHANGELOG.md +57 -0
  6. data/README.md +284 -0
  7. data/Rakefile +8 -0
  8. data/ast-spec.md +1227 -0
  9. data/docs/architecture.md +81 -0
  10. data/docs/arena-usage.md +363 -0
  11. data/docs/commonmark-conformance.md +241 -0
  12. data/exe/redquilt +7 -0
  13. data/lib/red_quilt/arena.rb +366 -0
  14. data/lib/red_quilt/block_parser.rb +724 -0
  15. data/lib/red_quilt/blockquote.rb +151 -0
  16. data/lib/red_quilt/cli.rb +182 -0
  17. data/lib/red_quilt/diagnostic.rb +47 -0
  18. data/lib/red_quilt/document.rb +126 -0
  19. data/lib/red_quilt/extended_autolink_pass.rb +185 -0
  20. data/lib/red_quilt/footnote_definition.rb +147 -0
  21. data/lib/red_quilt/footnote_pass.rb +39 -0
  22. data/lib/red_quilt/footnote_registry.rb +68 -0
  23. data/lib/red_quilt/indentation.rb +73 -0
  24. data/lib/red_quilt/inline/builder.rb +674 -0
  25. data/lib/red_quilt/inline/flanking.rb +120 -0
  26. data/lib/red_quilt/inline/html_entities.rb +2180 -0
  27. data/lib/red_quilt/inline/lexer.rb +280 -0
  28. data/lib/red_quilt/inline/link_scanner.rb +315 -0
  29. data/lib/red_quilt/inline/token_kind.rb +39 -0
  30. data/lib/red_quilt/inline/tokens.rb +73 -0
  31. data/lib/red_quilt/inline.rb +34 -0
  32. data/lib/red_quilt/inline_pass.rb +53 -0
  33. data/lib/red_quilt/line.rb +14 -0
  34. data/lib/red_quilt/lint_pass.rb +71 -0
  35. data/lib/red_quilt/list.rb +317 -0
  36. data/lib/red_quilt/node_ref.rb +114 -0
  37. data/lib/red_quilt/node_type.rb +66 -0
  38. data/lib/red_quilt/plain_text.rb +46 -0
  39. data/lib/red_quilt/reference_definition.rb +309 -0
  40. data/lib/red_quilt/renderer/html.rb +279 -0
  41. data/lib/red_quilt/renderer/mdast.rb +152 -0
  42. data/lib/red_quilt/source_map.rb +29 -0
  43. data/lib/red_quilt/source_span.rb +26 -0
  44. data/lib/red_quilt/theme.rb +28 -0
  45. data/lib/red_quilt/themes/default.css +87 -0
  46. data/lib/red_quilt/version.rb +5 -0
  47. data/lib/red_quilt.rb +86 -0
  48. data/mise.toml +2 -0
  49. data/sig/red_quilt.rbs +45 -0
  50. metadata +91 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a53fcc10d442493f6de26bbced9811a9f54e5328fbfa3aae39e44491bcbdf99c
4
+ data.tar.gz: 6b0f0be3ccf229a341421f08a3b5eaa6b95bc99ed14a1d0e02a7c4675e9e100e
5
+ SHA512:
6
+ metadata.gz: f919427e21c232babe5c5672e125928557d09a5b3e80fe70a6902913cc0c1796d7478cc180edcbde2cb8b5c861c94d8b0d4fb4a5ece5f876d795b17c4a6ff891
7
+ data.tar.gz: da179b152732e9cfffb8c7580a550191f5d0798a5d6a62be8b2fab17bc4c33fd6725b1a2ebdcf001ad38bba8681030e7f27f7d036cfc68dab0ee96e61e43cb53
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,109 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ # The behavior of RuboCop can be controlled via the .rubocop.yml
4
+ # configuration file. It makes it possible to enable/disable
5
+ # certain cops (checks) and to alter their behavior if they accept
6
+ # any parameters. The file can be placed either in your home
7
+ # directory or in some project directory.
8
+ #
9
+ # RuboCop will start looking for the configuration file in the directory
10
+ # where the inspected file is and continue its way up to the root directory.
11
+ #
12
+ # See https://docs.rubocop.org/rubocop/configuration
13
+
14
+ AllCops:
15
+ NewCops: enable
16
+
17
+ Layout/FirstArrayElementIndentation:
18
+ EnforcedStyle: consistent
19
+
20
+ Layout/FirstHashElementIndentation:
21
+ EnforcedStyle: consistent
22
+
23
+ Lint/ConstantDefinitionInBlock:
24
+ Enabled: false
25
+
26
+ Metrics:
27
+ Enabled: false
28
+
29
+ Naming:
30
+ Enabled: false
31
+
32
+ Style/BlockDelimiters:
33
+ Enabled: false
34
+
35
+ Style/Documentation:
36
+ Enabled: false
37
+
38
+ Style/GuardClause:
39
+ Enabled: false
40
+
41
+ Style/IdenticalConditionalBranches:
42
+ Enabled: false
43
+
44
+ Style/IfUnlessModifier:
45
+ Enabled: false
46
+
47
+ Style/MultipleComparison:
48
+ Enabled: false
49
+
50
+ Style/MutableConstant:
51
+ Enabled: false
52
+
53
+ Style/NumericPredicate:
54
+ Enabled: false
55
+
56
+ Style/RegexpLiteral:
57
+ Enabled: false
58
+
59
+ Style/SafeNavigation:
60
+ Enabled: false
61
+
62
+ Style/StringConcatenation:
63
+ Enabled: false
64
+
65
+ Style/StringLiterals:
66
+ Enabled: false
67
+
68
+ Style/SymbolArray:
69
+ EnforcedStyle: percent
70
+ MinSize: 8
71
+
72
+ Style/TrailingCommaInArguments:
73
+ EnforcedStyleForMultiline: diff_comma
74
+
75
+ Style/TrailingCommaInArrayLiteral:
76
+ EnforcedStyleForMultiline: diff_comma
77
+
78
+ Style/TrailingCommaInHashLiteral:
79
+ EnforcedStyleForMultiline: diff_comma
80
+
81
+ Style/TrailingUnderscoreVariable:
82
+ Enabled: false
83
+
84
+ Style/WhileUntilModifier:
85
+ Enabled: false
86
+
87
+ Layout/LineLength:
88
+ Enabled: false
89
+
90
+ Gemspec/RequireMFA:
91
+ Enabled: false
92
+
93
+ Lint/DuplicateBranch:
94
+ Enabled: false
95
+
96
+ Lint/UselessConstantScoping:
97
+ Enabled: false
98
+
99
+ Style/BitwisePredicate:
100
+ Enabled: false
101
+
102
+ Style/ComparableBetween:
103
+ Enabled: false
104
+
105
+ Style/PartitionInsteadOfDoubleSelect:
106
+ Enabled: false
107
+
108
+ Style/RedundantArgument:
109
+ Enabled: false
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,7 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config --no-offense-counts --no-auto-gen-timestamp`
3
+ # using RuboCop version 1.86.2.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
data/CHANGELOG.md ADDED
@@ -0,0 +1,57 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.6.1] - 2026-05-29
9
+
10
+ ### Added
11
+
12
+ - Opt-in GitHub-style footnotes (`footnotes: true`, off by default):
13
+ `[^label]` references and `[^label]: …` definitions (multi-paragraph and
14
+ lazy continuation), numbered in first-reference order, rendered as a GFM
15
+ `<section class="footnotes">` with backrefs; also emitted to mdast.
16
+ - Bundled standalone HTML theme via `Document#to_html(theme:)` / the
17
+ `--theme` CLI flag: `default` (a compact, dark-mode-aware stylesheet
18
+ embedded inline) or `none` (bare). CLI defaults to `default`, the library
19
+ API to `none`.
20
+
21
+ ### Security
22
+
23
+ - Block script-executing schemes (`javascript:`/`vbscript:`/`data:`) in
24
+ autolinks, which previously rendered as links.
25
+
26
+ ### Fixed
27
+
28
+ - Numeric character references: enforce digit caps; map NUL, surrogate, and
29
+ out-of-range code points to U+FFFD.
30
+ - Enforce the 999-char link-label cap and tighten reference-definition
31
+ destination validation.
32
+ - Restrict whitespace in autolinks, raw HTML tags, link tails, and reference
33
+ definitions to spaces/tabs (plus one line ending); reject form feed and
34
+ vertical tab.
35
+ - Normalize input: CRLF/CR → LF, NUL → U+FFFD.
36
+ - GFM tables: require a row's column count to match the header; reject
37
+ autolinks with underscored domains.
38
+
39
+ ### Performance
40
+
41
+ - Cut inline allocations ~43% (shared byte→char table; skip no-op string
42
+ scans) and redundant per-line / whole-document scans in block parsing.
43
+ - Make the per-line record a positional Struct.
44
+
45
+ ### Changed
46
+
47
+ - Rename `Arena#replace_str1` / `replace_int3` to `update_str1` / `update_int3`.
48
+
49
+ ### Internal
50
+
51
+ - Extract `Inline::LinkScanner` and `RedQuilt::Indentation`; add
52
+ `Arena#source_end`; consolidate ASCII-punctuation tables; make
53
+ `Delimiter` / `Bracket` / `Line` Structs.
54
+ - Drop `__send__` between block-parser collaborators via a public
55
+ collaborator interface on `BlockParser`.
56
+ - Add an allocation-regression CI gate and RSpec / RuboCop workflows;
57
+ support Ruby >= 3.3.
data/README.md ADDED
@@ -0,0 +1,284 @@
1
+ # RedQuilt
2
+
3
+ A modern Markdown document processor in pure Ruby, with an arena-style AST.
4
+ Passes the full CommonMark spec test suite, and generally faster than kramdown.
5
+
6
+ ## Installation
7
+
8
+ Add this line to Gemfile:
9
+
10
+ ```ruby
11
+ gem "red_quilt"
12
+ ```
13
+
14
+ ## Quick Start
15
+
16
+ ### Parsing and rendering
17
+
18
+ ```ruby
19
+ require "red_quilt"
20
+
21
+ # Parse Markdown to a document
22
+ doc = RedQuilt.parse("# Hello\n\nThis is **bold**.")
23
+ html = doc.to_html
24
+ # => "<h1>Hello</h1>\n<p>This is <strong>bold</strong>.</p>\n"
25
+
26
+ # Or render directly (without building AST)
27
+ html = RedQuilt.render_html("# Hello\n\n**bold**")
28
+ ```
29
+
30
+ ### HTML is safe by default
31
+
32
+ ```ruby
33
+ RedQuilt.render_html("Hi <em>tag</em>")
34
+ # => "<p>Hi &lt;em&gt;tag&lt;/em&gt;</p>\n"
35
+
36
+ RedQuilt.render_html("Hi <em>tag</em>", allow_html: true)
37
+ # => "<p>Hi <em>tag</em></p>\n"
38
+ ```
39
+
40
+ ## API Reference
41
+
42
+ ### Document
43
+
44
+ ```ruby
45
+ doc = RedQuilt.parse("# Title\n\nBody")
46
+
47
+ doc.root # Root node (NodeRef)
48
+ doc.walk # Traverse all nodes (block: { |node| ... } or Enumerator)
49
+ doc.to_html # Render as HTML
50
+ doc.to_ast # Export complete AST as Hash
51
+ doc.to_json # Export as MDAST-compatible JSON
52
+ doc.to_mdast # Export as MDAST Hash
53
+ doc.source_map # Line/column lookup (lazy memoized)
54
+ doc.allow_html? # Check HTML pass-through setting
55
+ ```
56
+
57
+ ### NodeRef (AST node wrapper)
58
+
59
+ ```ruby
60
+ node = doc.root.children.first
61
+
62
+ # Traversal
63
+ node.type # :heading, :paragraph, :link, etc. (Symbol)
64
+ node.children # Array[NodeRef]
65
+ node.walk # Enumerator[NodeRef] or { |node| ... } block
66
+ node.find_all(:link) # Array[NodeRef] with matching type
67
+ node.text # String (concatenated child text)
68
+
69
+ # Position information (byte offset)
70
+ node.source_span # SourceSpan with start_byte, end_byte
71
+
72
+ # Position information (line/column)
73
+ node.source_location # { start_line, start_column, end_line, end_column }
74
+ # line: 1-indexed, column: 0-indexed (character-based)
75
+
76
+ # AST export
77
+ node.to_h # Export subtree as Hash[Symbol, untyped]
78
+ ```
79
+
80
+ ### SourceSpan
81
+
82
+ ```ruby
83
+ span = node.source_span
84
+ span.start_byte # Integer (0-indexed byte offset)
85
+ span.end_byte # Integer (exclusive)
86
+ span.length # Computed: end_byte - start_byte
87
+ ```
88
+
89
+ ## Supported Syntax
90
+
91
+ ### Block elements
92
+
93
+ - Paragraphs: Plain text blocks
94
+ - Headings: ATX headings (`# Title`)
95
+ - Thematic breaks: `---`, `***`, `___`
96
+ - Code blocks: Indented and fenced (with info string)
97
+ - Block quotes: `> quote text`
98
+ - Lists: Ordered (`1.`) and unordered (`-`, `*`, `+`)
99
+ - List items: Nested blocks, tight/loose detection
100
+ - Tables: GFM syntax with header/body rows
101
+ - Raw HTML blocks: 7 types (script, comment, etc.)
102
+ - Link reference definitions: `[foo]: /url "title"`
103
+
104
+ ### Inline elements
105
+
106
+ - Text: Plain strings
107
+ - Emphasis/Strong: `*em*`, `**strong**`, `_em_`, `__strong__`
108
+ - Code spans: `` `code` ``
109
+ - Links: `[text](/url)`, `[text](/url "title")`, reference links
110
+ - Images: `![alt](/url)`, `![alt](/url "title")`, reference images
111
+ - Soft/Hard line breaks: Implicit (soft) and explicit `\` or two spaces
112
+ - Raw HTML inline: `<a href="#">link</a>`
113
+ - Autolinks: `<http://example.com>`, `<user@example.com>`
114
+ - Character references: `&amp;`, `&#x27;`, etc.
115
+
116
+ ## CommonMark Compatibility
117
+
118
+ RedQuilt achieves 100% compliance with the CommonMark v0.31.2 specification.
119
+
120
+ ## Command-line Tool
121
+
122
+ RedQuilt ships with a `redquilt` CLI for converting Markdown files to HTML or inspecting the AST.
123
+
124
+ ### Basic usage
125
+
126
+ ```bash
127
+ # Convert Markdown file to HTML
128
+ redquilt input.md > output.html
129
+
130
+ # Convert from stdin
131
+ echo "# Hello" | redquilt
132
+
133
+ # Output as AST (for debugging)
134
+ redquilt --format ast input.md
135
+
136
+ # Output as MDAST-compatible JSON (for external tools)
137
+ redquilt --format json input.md
138
+
139
+ # Standalone HTML document with title
140
+ redquilt --standalone --title "My Document" input.md
141
+
142
+ # Enable GFM extended autolinks
143
+ redquilt --extended-autolinks input.md
144
+ ```
145
+
146
+ ### Options
147
+
148
+ ```
149
+ --format FORMAT Output format: html (default), ast, json
150
+ --allow-html Pass raw HTML through to the output
151
+ --extended-autolinks Linkify bare URLs and email addresses (GFM)
152
+ --[no-]standalone Wrap HTML in full document (default: on)
153
+ --auto-title Use the first heading's text as <title>
154
+ --title TITLE Explicit <title> text
155
+ --lang LANG html lang attribute (default: "en")
156
+ --css URL Add a stylesheet link
157
+ --diagnostics Print diagnostics to stderr
158
+ --diagnostics-only Print diagnostics only (suppress output)
159
+ -h, --help Show help
160
+ -v, --version Show version
161
+ ```
162
+
163
+ Exit code is 0 on success, 1 if errors are detected.
164
+
165
+ ## Safe-by-Default HTML Rendering
166
+
167
+ ### Security model
168
+
169
+ RedQuilt prioritizes security by default:
170
+
171
+ ```ruby
172
+ # Default: All HTML is escaped, dangerous URLs blocked
173
+ RedQuilt.render_html("<script>alert('xss')</script>")
174
+ # => "<p>&lt;script&gt;alert('xss')&lt;/script&gt;</p>"
175
+
176
+ RedQuilt.render_html("[click](javascript:alert(1))")
177
+ # => "<p><a href=\"\">click</a></p>"
178
+ ```
179
+
180
+ ### Allowed URL schemes
181
+
182
+ In link/image destinations, only these schemes are permitted:
183
+
184
+ - Absolute: `http://`, `https://`, `ftp://`, `tel:`, `ssh://`
185
+ - Relative: `/path`, `#anchor`, `path/to/file`
186
+ - Special: `mailto:` (autolinks only)
187
+
188
+ All other schemes (`javascript:`, `data:`, `vbscript:`, etc.) are blocked by replacing the URL with an empty string.
189
+
190
+ ### Opting into HTML pass-through
191
+
192
+ ```ruby
193
+ # Allow raw HTML (use with trusted input only)
194
+ RedQuilt.render_html(user_markdown, allow_html: true)
195
+
196
+ # This passes HTML blocks and inline tags through unchanged
197
+ ```
198
+
199
+ ## Usage Examples
200
+
201
+ ### Extract all headings
202
+
203
+ ```ruby
204
+ doc = RedQuilt.parse(source)
205
+ headings = doc.root.find_all(:heading)
206
+
207
+ headings.each do |node|
208
+ level = node.to_h[:attributes][:level]
209
+ text = node.text
210
+ puts "#{'#' * level} #{text}"
211
+ end
212
+ ```
213
+
214
+ ### Walk the AST with line numbers
215
+
216
+ ```ruby
217
+ doc = RedQuilt.parse(source)
218
+
219
+ doc.root.walk do |node|
220
+ loc = node.source_location
221
+ if loc
222
+ puts "#{node.type} at line #{loc[:start_line]}"
223
+ end
224
+ end
225
+ ```
226
+
227
+ ### Export and transform
228
+
229
+ ```ruby
230
+ doc = RedQuilt.parse("# Title\n\nBody with [link](/url)")
231
+ ast = doc.to_ast
232
+
233
+ # Print AST structure (for debugging)
234
+ pp ast
235
+
236
+ # Process nodes
237
+ doc.root.find_all(:link).each do |link|
238
+ attrs = link.to_h[:attributes]
239
+ puts "Link: #{link.text} → #{attrs[:destination]}"
240
+ end
241
+ ```
242
+
243
+ ## Development
244
+
245
+ ### Running tests
246
+
247
+ ```bash
248
+ bundle exec rake spec
249
+ ```
250
+
251
+ Runs 70+ CommonMark compatibility and feature tests.
252
+
253
+ ### Benchmark
254
+
255
+ ```bash
256
+ ruby spec/bench_inline.rb
257
+ ruby spec/bench_block.rb
258
+ ```
259
+
260
+ Profiles parse performance on various Markdown patterns.
261
+
262
+ ## Performance (v0.6.1, Ruby 4.0.5)
263
+
264
+ Comparison against [kramdown](https://kramdown.gettalong.org/) on arm64-darwin (Apple Silicon), measured with `spec/bench_vs_kramdown.rb` (benchmark-ips):
265
+
266
+ | Fixture | Size | RedQuilt (i/s) | kramdown (i/s) | RedQuilt vs kramdown |
267
+ |---------|-----:|---------------:|---------------:|---------------------:|
268
+ | short_paragraph | 49 B | 26,531 | 5,416 | 4.90x faster |
269
+ | long_paragraph | 1.4 KB | 981 | 926 | within error |
270
+ | nested_emphasis | 1.4 KB | 999 | 689 | 1.45x faster |
271
+ | many_links | 2.0 KB | 1,131 | 794 | 1.43x faster |
272
+ | mixed_markup | 1.8 KB | 1,028 | 729 | 1.41x faster |
273
+ | deep_nesting | 800 B | 827 | 349 | 2.37x faster |
274
+ | cmark_spec | 205 KB | 39.1 | 30.2 | 1.30x faster |
275
+
276
+ Reproduce locally:
277
+
278
+ ```bash
279
+ bundle exec ruby spec/bench_vs_kramdown.rb
280
+ ```
281
+
282
+ ## License
283
+
284
+ MIT
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec