red_quilt 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +109 -0
- data/.rubocop_todo.yml +7 -0
- data/CHANGELOG.md +57 -0
- data/README.md +284 -0
- data/Rakefile +8 -0
- data/ast-spec.md +1227 -0
- data/docs/architecture.md +81 -0
- data/docs/arena-usage.md +363 -0
- data/docs/commonmark-conformance.md +241 -0
- data/exe/redquilt +7 -0
- data/lib/red_quilt/arena.rb +366 -0
- data/lib/red_quilt/block_parser.rb +724 -0
- data/lib/red_quilt/blockquote.rb +151 -0
- data/lib/red_quilt/cli.rb +182 -0
- data/lib/red_quilt/diagnostic.rb +47 -0
- data/lib/red_quilt/document.rb +126 -0
- data/lib/red_quilt/extended_autolink_pass.rb +185 -0
- data/lib/red_quilt/footnote_definition.rb +147 -0
- data/lib/red_quilt/footnote_pass.rb +39 -0
- data/lib/red_quilt/footnote_registry.rb +68 -0
- data/lib/red_quilt/indentation.rb +73 -0
- data/lib/red_quilt/inline/builder.rb +674 -0
- data/lib/red_quilt/inline/flanking.rb +120 -0
- data/lib/red_quilt/inline/html_entities.rb +2180 -0
- data/lib/red_quilt/inline/lexer.rb +280 -0
- data/lib/red_quilt/inline/link_scanner.rb +315 -0
- data/lib/red_quilt/inline/token_kind.rb +39 -0
- data/lib/red_quilt/inline/tokens.rb +73 -0
- data/lib/red_quilt/inline.rb +34 -0
- data/lib/red_quilt/inline_pass.rb +53 -0
- data/lib/red_quilt/line.rb +14 -0
- data/lib/red_quilt/lint_pass.rb +71 -0
- data/lib/red_quilt/list.rb +317 -0
- data/lib/red_quilt/node_ref.rb +114 -0
- data/lib/red_quilt/node_type.rb +66 -0
- data/lib/red_quilt/plain_text.rb +46 -0
- data/lib/red_quilt/reference_definition.rb +309 -0
- data/lib/red_quilt/renderer/html.rb +279 -0
- data/lib/red_quilt/renderer/mdast.rb +152 -0
- data/lib/red_quilt/source_map.rb +29 -0
- data/lib/red_quilt/source_span.rb +26 -0
- data/lib/red_quilt/theme.rb +28 -0
- data/lib/red_quilt/themes/default.css +87 -0
- data/lib/red_quilt/version.rb +5 -0
- data/lib/red_quilt.rb +86 -0
- data/mise.toml +2 -0
- data/sig/red_quilt.rbs +45 -0
- metadata +91 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# CommonMark spec 5.1 blockquotes.
|
|
5
|
+
#
|
|
6
|
+
# Module-level functions are stateless helpers used by BlockParser's
|
|
7
|
+
# predicate dispatch. `Blockquote::Parser` is a cached collaborator
|
|
8
|
+
# created once in BlockParser#initialize and reused for every
|
|
9
|
+
# blockquote (including nested ones) — per-call state lives in method
|
|
10
|
+
# locals so reentrant `#parse` calls are safe.
|
|
11
|
+
module Blockquote
|
|
12
|
+
BLOCKQUOTE_PREFIX_RE = /\A {0,3}>/
|
|
13
|
+
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
def match?(text)
|
|
17
|
+
text.match?(BLOCKQUOTE_PREFIX_RE)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Strip the leading `>` (and at most one column of whitespace after
|
|
21
|
+
# it) from a blockquote line. Returns a new Line whose
|
|
22
|
+
# content is the inner text. If the line has no `>` prefix, the
|
|
23
|
+
# original line is returned unchanged (wrapped in a fresh Line so
|
|
24
|
+
# the caller treats it uniformly).
|
|
25
|
+
def strip_prefix(line)
|
|
26
|
+
content = line.content
|
|
27
|
+
bytes = content.bytesize
|
|
28
|
+
i = 0
|
|
29
|
+
abs_col = 0
|
|
30
|
+
# Up to 3 spaces of indent before `>`.
|
|
31
|
+
while i < 3 && i < bytes && content.getbyte(i) == 0x20
|
|
32
|
+
i += 1
|
|
33
|
+
abs_col += 1
|
|
34
|
+
end
|
|
35
|
+
unless i < bytes && content.getbyte(i) == 0x3E
|
|
36
|
+
return Line.new(content, line.start_byte, line.end_byte, !content.match?(/\S/))
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
i += 1
|
|
40
|
+
abs_col += 1 # consume `>`
|
|
41
|
+
|
|
42
|
+
# Count column width of leading whitespace after `>` using
|
|
43
|
+
# absolute-column tracking so a tab right after `>` (at col 1) is
|
|
44
|
+
# correctly billed as only 3 columns of indent, not 4.
|
|
45
|
+
ws_start_col = abs_col
|
|
46
|
+
j = i
|
|
47
|
+
while j < bytes
|
|
48
|
+
b = content.getbyte(j)
|
|
49
|
+
if b == 0x20
|
|
50
|
+
abs_col += 1
|
|
51
|
+
elsif b == 0x09
|
|
52
|
+
abs_col = ((abs_col / 4) + 1) * 4
|
|
53
|
+
else
|
|
54
|
+
break
|
|
55
|
+
end
|
|
56
|
+
j += 1
|
|
57
|
+
end
|
|
58
|
+
ws_cols = abs_col - ws_start_col
|
|
59
|
+
|
|
60
|
+
if ws_cols >= 1
|
|
61
|
+
tail = (" " * (ws_cols - 1)) + content.byteslice(j..)
|
|
62
|
+
offset = j
|
|
63
|
+
else
|
|
64
|
+
tail = content.byteslice(i..)
|
|
65
|
+
offset = i
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
Line.new(tail, line.start_byte + offset, line.end_byte, !tail.match?(/\S/))
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class Parser
|
|
72
|
+
def initialize(block_parser)
|
|
73
|
+
@block_parser = block_parser
|
|
74
|
+
@arena = block_parser.arena
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def parse(parent_id, lines, index)
|
|
78
|
+
block_lines = []
|
|
79
|
+
paragraph_open = false
|
|
80
|
+
|
|
81
|
+
while index < lines.length
|
|
82
|
+
line = lines[index]
|
|
83
|
+
|
|
84
|
+
if line.blank
|
|
85
|
+
# Blank line outside the blockquote prefix closes it.
|
|
86
|
+
break
|
|
87
|
+
elsif Blockquote.match?(line.content)
|
|
88
|
+
stripped = Blockquote.strip_prefix(line)
|
|
89
|
+
paragraph_open =
|
|
90
|
+
if stripped.content.strip.empty?
|
|
91
|
+
false # `>` 単独 (or `>` followed by blank) ends any open paragraph
|
|
92
|
+
else
|
|
93
|
+
# Recurse through any inner blockquote prefixes — an
|
|
94
|
+
# innermost open paragraph (e.g. `> > > foo` where
|
|
95
|
+
# `foo` is paragraph-eligible) lets a `>`-less follow-
|
|
96
|
+
# up line lazily continue it even at the outer level.
|
|
97
|
+
paragraph_eligible_through_blockquotes?(stripped.content)
|
|
98
|
+
end
|
|
99
|
+
block_lines << stripped
|
|
100
|
+
elsif paragraph_open && !@block_parser.lazy_break?(lines, index)
|
|
101
|
+
# Lazy continuation: a `>`-less line is absorbed into the
|
|
102
|
+
# currently open paragraph as long as it doesn't itself
|
|
103
|
+
# start a new block. Only allowed while the most recent
|
|
104
|
+
# in-quote line is paragraph-eligible content. The `lazy`
|
|
105
|
+
# flag prevents the paragraph parser from interpreting
|
|
106
|
+
# `===` / `---` on such a line as a setext underline.
|
|
107
|
+
block_lines << Line.new(line.content, line.start_byte, line.end_byte, line.blank, true)
|
|
108
|
+
else
|
|
109
|
+
break
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
index += 1
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
block_id = @arena.add_node(NodeType::BLOCKQUOTE,
|
|
116
|
+
source_start: block_lines.first.start_byte,
|
|
117
|
+
source_len: block_lines.last.end_byte - block_lines.first.start_byte)
|
|
118
|
+
@arena.append_child(parent_id, block_id)
|
|
119
|
+
@block_parser.parse_lines(block_id, block_lines, transformed: true)
|
|
120
|
+
index
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private
|
|
124
|
+
|
|
125
|
+
# Like BlockParser#paragraph_eligible_line?, but transparently
|
|
126
|
+
# peels any number of leading wrapper prefixes (blockquote `>`
|
|
127
|
+
# and list item markers) to find out whether the innermost block
|
|
128
|
+
# is still paragraph content. Used so `> > > foo\nbar` and
|
|
129
|
+
# `> 1. > foo\nbar` both let the unprefixed line lazily continue
|
|
130
|
+
# the deepest paragraph.
|
|
131
|
+
def paragraph_eligible_through_blockquotes?(content)
|
|
132
|
+
c = content
|
|
133
|
+
loop do
|
|
134
|
+
if Blockquote.match?(c)
|
|
135
|
+
m = /\A {0,3}> ?/.match(c)
|
|
136
|
+
break unless m
|
|
137
|
+
|
|
138
|
+
c = c[m[0].length..]
|
|
139
|
+
return false if c.strip.empty?
|
|
140
|
+
elsif (li = List.match(c))
|
|
141
|
+
c = li[:content]
|
|
142
|
+
return false if c.strip.empty?
|
|
143
|
+
else
|
|
144
|
+
break
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
@block_parser.paragraph_eligible_line?(c)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
|
|
5
|
+
module RedQuilt
|
|
6
|
+
# Entry point for the `redquilt` executable. Defined as a module-level
|
|
7
|
+
# function so tests can drive it without shelling out.
|
|
8
|
+
#
|
|
9
|
+
# CLI.run takes an argv-style array and an optional set of IO objects
|
|
10
|
+
# (stdin / stdout / stderr) for testability. It returns an Integer
|
|
11
|
+
# exit code: 0 on success, 1 on usage errors.
|
|
12
|
+
module CLI
|
|
13
|
+
USAGE = <<~USAGE
|
|
14
|
+
Usage: redquilt [options] [file]
|
|
15
|
+
|
|
16
|
+
Reads Markdown from FILE (or stdin if FILE is omitted) and writes the
|
|
17
|
+
result to stdout.
|
|
18
|
+
|
|
19
|
+
Options:
|
|
20
|
+
USAGE
|
|
21
|
+
|
|
22
|
+
DEFAULTS = {
|
|
23
|
+
format: :html,
|
|
24
|
+
allow_html: false,
|
|
25
|
+
disallow_raw_html: false,
|
|
26
|
+
extended_autolinks: false,
|
|
27
|
+
lint: false,
|
|
28
|
+
diagnostics: false,
|
|
29
|
+
diagnostics_only: false,
|
|
30
|
+
standalone: true,
|
|
31
|
+
auto_title: false,
|
|
32
|
+
title: nil,
|
|
33
|
+
lang: "en",
|
|
34
|
+
css: nil,
|
|
35
|
+
theme: :default,
|
|
36
|
+
}.freeze
|
|
37
|
+
|
|
38
|
+
THEMES = %i[none default].freeze
|
|
39
|
+
|
|
40
|
+
FORMATS = %i[html ast json].freeze
|
|
41
|
+
|
|
42
|
+
def self.run(argv, stdin: $stdin, stdout: $stdout, stderr: $stderr)
|
|
43
|
+
options = parse_options(argv, stderr: stderr)
|
|
44
|
+
return options if options.is_a?(Integer)
|
|
45
|
+
|
|
46
|
+
source = read_source(argv, stdin: stdin, stderr: stderr)
|
|
47
|
+
return 1 unless source
|
|
48
|
+
|
|
49
|
+
doc = RedQuilt.parse(source,
|
|
50
|
+
allow_html: options[:allow_html],
|
|
51
|
+
disallow_raw_html: options[:disallow_raw_html],
|
|
52
|
+
extended_autolinks: options[:extended_autolinks],
|
|
53
|
+
lint: options[:lint])
|
|
54
|
+
|
|
55
|
+
unless options[:diagnostics_only]
|
|
56
|
+
case options[:format]
|
|
57
|
+
when :html
|
|
58
|
+
stdout.write(render_html(doc, options))
|
|
59
|
+
when :ast
|
|
60
|
+
require "pp"
|
|
61
|
+
PP.pp(doc.to_ast, stdout)
|
|
62
|
+
when :json
|
|
63
|
+
stdout.puts doc.to_json
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
if options[:diagnostics] || options[:diagnostics_only]
|
|
68
|
+
write_diagnostics(doc.diagnostics, stderr)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
doc.diagnostics.any? { |d| d.severity == :error } ? 1 : 0
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.parse_options(argv, stderr:)
|
|
75
|
+
options = DEFAULTS.dup
|
|
76
|
+
parser = OptionParser.new do |opts|
|
|
77
|
+
opts.banner = USAGE
|
|
78
|
+
opts.on("--format FORMAT", FORMATS, "Output format: html (default), ast, json") do |f|
|
|
79
|
+
options[:format] = f
|
|
80
|
+
end
|
|
81
|
+
opts.on("--allow-html", "Pass raw HTML through to the output") do
|
|
82
|
+
options[:allow_html] = true
|
|
83
|
+
end
|
|
84
|
+
opts.on("--disallow-raw-html",
|
|
85
|
+
"Filter dangerous tags (script, iframe, ...) even with --allow-html (GFM)") do
|
|
86
|
+
options[:disallow_raw_html] = true
|
|
87
|
+
end
|
|
88
|
+
opts.on("--extended-autolinks",
|
|
89
|
+
"Linkify bare URLs and email addresses (GFM)") do
|
|
90
|
+
options[:extended_autolinks] = true
|
|
91
|
+
end
|
|
92
|
+
opts.on("--lint",
|
|
93
|
+
"Emit lint-style diagnostics (empty_link, missing_alt, heading_level_skip)") do
|
|
94
|
+
options[:lint] = true
|
|
95
|
+
end
|
|
96
|
+
opts.on("--[no-]standalone",
|
|
97
|
+
"Wrap (or not) the rendered HTML in a full document (default: on)") do |v|
|
|
98
|
+
options[:standalone] = v
|
|
99
|
+
end
|
|
100
|
+
opts.on("--auto-title",
|
|
101
|
+
"Use the first heading's text as <title> (standalone only)") do
|
|
102
|
+
options[:auto_title] = true
|
|
103
|
+
end
|
|
104
|
+
opts.on("--title TITLE", "Explicit <title> text (standalone only)") do |t|
|
|
105
|
+
options[:title] = t
|
|
106
|
+
end
|
|
107
|
+
opts.on("--lang LANG", "html lang attribute (standalone only; default \"en\")") do |l|
|
|
108
|
+
options[:lang] = l
|
|
109
|
+
end
|
|
110
|
+
opts.on("--css URL", "Add a stylesheet link (standalone only)") do |u|
|
|
111
|
+
options[:css] = u
|
|
112
|
+
end
|
|
113
|
+
opts.on("--theme THEME", THEMES,
|
|
114
|
+
"Embedded stylesheet: default (the default) or none (bare HTML)") do |t|
|
|
115
|
+
options[:theme] = t
|
|
116
|
+
end
|
|
117
|
+
opts.on("--diagnostics", "Also print diagnostics to stderr") do
|
|
118
|
+
options[:diagnostics] = true
|
|
119
|
+
end
|
|
120
|
+
opts.on("--diagnostics-only", "Print diagnostics only (suppress normal output)") do
|
|
121
|
+
options[:diagnostics_only] = true
|
|
122
|
+
end
|
|
123
|
+
opts.on("-h", "--help", "Show this help") do
|
|
124
|
+
stderr.puts opts
|
|
125
|
+
return 0
|
|
126
|
+
end
|
|
127
|
+
opts.on("-v", "--version", "Show version") do
|
|
128
|
+
stderr.puts "redquilt #{RedQuilt::VERSION}"
|
|
129
|
+
return 0
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
begin
|
|
134
|
+
parser.parse!(argv)
|
|
135
|
+
rescue OptionParser::ParseError => e
|
|
136
|
+
stderr.puts "redquilt: #{e.message}"
|
|
137
|
+
stderr.puts parser
|
|
138
|
+
return 1
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
options
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def self.read_source(argv, stdin:, stderr:)
|
|
145
|
+
if argv.empty?
|
|
146
|
+
stdin.read
|
|
147
|
+
elsif argv.size == 1
|
|
148
|
+
path = argv.first
|
|
149
|
+
unless File.file?(path)
|
|
150
|
+
stderr.puts "redquilt: no such file: #{path}"
|
|
151
|
+
return nil
|
|
152
|
+
end
|
|
153
|
+
File.read(path)
|
|
154
|
+
else
|
|
155
|
+
stderr.puts "redquilt: too many arguments: #{argv.inspect}"
|
|
156
|
+
nil
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def self.render_html(doc, options)
|
|
161
|
+
title = options[:title]
|
|
162
|
+
title = doc.first_heading_text.to_s if title.nil? && options[:auto_title]
|
|
163
|
+
doc.to_html(
|
|
164
|
+
standalone: options[:standalone],
|
|
165
|
+
title: title,
|
|
166
|
+
lang: options[:lang],
|
|
167
|
+
css: options[:css],
|
|
168
|
+
theme: options[:theme],
|
|
169
|
+
)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def self.write_diagnostics(diagnostics, stderr)
|
|
173
|
+
if diagnostics.empty?
|
|
174
|
+
stderr.puts "redquilt: no diagnostics"
|
|
175
|
+
return
|
|
176
|
+
end
|
|
177
|
+
diagnostics.each do |d|
|
|
178
|
+
stderr.puts "[#{d.severity}] #{d.rule}: #{d.message}"
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# A single warning / error raised while parsing or rendering a
|
|
5
|
+
# document. Diagnostics are collected on the Document and never
|
|
6
|
+
# interrupt processing — every parse / render call still produces a
|
|
7
|
+
# tree and HTML, even if it emitted diagnostics along the way.
|
|
8
|
+
#
|
|
9
|
+
# severity: :info / :warning / :error
|
|
10
|
+
# rule: a short Symbol identifying the rule (e.g. :unsafe_url,
|
|
11
|
+
# :missing_reference) so callers can filter / silence
|
|
12
|
+
# message: human-readable explanation
|
|
13
|
+
# source_span: optional SourceSpan, points at the offending byte range
|
|
14
|
+
class Diagnostic
|
|
15
|
+
SEVERITIES = %i[info warning error].freeze
|
|
16
|
+
|
|
17
|
+
attr_reader :severity, :rule, :message, :source_span
|
|
18
|
+
|
|
19
|
+
def initialize(severity:, rule:, message:, source_span: nil)
|
|
20
|
+
unless SEVERITIES.include?(severity)
|
|
21
|
+
raise ArgumentError, "unknown severity: #{severity.inspect}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
@severity = severity
|
|
25
|
+
@rule = rule
|
|
26
|
+
@message = message
|
|
27
|
+
@source_span = source_span
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def to_h
|
|
31
|
+
{
|
|
32
|
+
severity: severity,
|
|
33
|
+
rule: rule,
|
|
34
|
+
message: message,
|
|
35
|
+
source_span: source_span && { start_byte: source_span.start_byte, end_byte: source_span.end_byte },
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def ==(other)
|
|
40
|
+
other.is_a?(Diagnostic) &&
|
|
41
|
+
other.severity == severity &&
|
|
42
|
+
other.rule == rule &&
|
|
43
|
+
other.message == message &&
|
|
44
|
+
other.source_span == source_span
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
class Document
|
|
5
|
+
attr_reader :source, :arena, :root_id, :references, :footnotes
|
|
6
|
+
|
|
7
|
+
def initialize(source, arena, root_id, allow_html: false, disallow_raw_html: false, references: {}, footnotes: nil)
|
|
8
|
+
@source = source
|
|
9
|
+
@arena = arena
|
|
10
|
+
@root_id = root_id
|
|
11
|
+
@allow_html = allow_html
|
|
12
|
+
@disallow_raw_html = disallow_raw_html
|
|
13
|
+
@references = references
|
|
14
|
+
@footnotes = footnotes
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def allow_html?
|
|
18
|
+
@allow_html
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# When true, raw HTML output filters the 9 dangerous tags defined by
|
|
22
|
+
# GFM's "Disallowed Raw HTML" extension (title, textarea, style, xmp,
|
|
23
|
+
# iframe, noembed, noframes, script, plaintext) by replacing their
|
|
24
|
+
# leading `<` with `<`. Only meaningful when allow_html? is true;
|
|
25
|
+
# when allow_html? is false everything is already escaped.
|
|
26
|
+
def disallow_raw_html?
|
|
27
|
+
@disallow_raw_html
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def root
|
|
31
|
+
NodeRef.new(self, @root_id)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def walk(&)
|
|
35
|
+
root.walk(&)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Renders the document to HTML.
|
|
39
|
+
#
|
|
40
|
+
# standalone: when true, wrap the rendered body in a `<!DOCTYPE html>`
|
|
41
|
+
# template with `<head>` (charset / title / optional stylesheet)
|
|
42
|
+
# and `<body>`. When false (the default), only the rendered body
|
|
43
|
+
# fragment is returned.
|
|
44
|
+
# title / lang / css / theme: applied only when standalone is true.
|
|
45
|
+
# theme: a bundled stylesheet to inline (`:none` embeds nothing, keeping
|
|
46
|
+
# the bare template; `:default` embeds RedQuilt's default theme). `css`
|
|
47
|
+
# (an external stylesheet link) is independent and may be combined.
|
|
48
|
+
def to_html(standalone: false, title: nil, lang: "en", css: nil, theme: :none)
|
|
49
|
+
body = Renderer::HTML.new(self).render
|
|
50
|
+
return body unless standalone
|
|
51
|
+
|
|
52
|
+
wrap_standalone_html(body, title: title.to_s, lang: lang.to_s, css: css, theme: Theme.css(theme))
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def to_ast
|
|
56
|
+
root.to_h
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def to_json(*)
|
|
60
|
+
require "json"
|
|
61
|
+
JSON.pretty_generate(to_mdast)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def to_mdast
|
|
65
|
+
Renderer::Mdast.new(self).render
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Returns the plain-text content of the first HEADING in the
|
|
69
|
+
# document, or nil if there is no heading. Used by callers (e.g. the
|
|
70
|
+
# CLI's --auto-title) to derive a document title.
|
|
71
|
+
def first_heading_text
|
|
72
|
+
first_heading_text_walk(@root_id)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def source_map
|
|
76
|
+
@source_map ||= SourceMap.new(@source)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Returns the array of diagnostics collected during parse / render.
|
|
80
|
+
# The array is mutable and shared with the parser / renderer; new
|
|
81
|
+
# entries appear here without further calls.
|
|
82
|
+
def diagnostics
|
|
83
|
+
@diagnostics ||= []
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
def wrap_standalone_html(body, title:, lang:, css:, theme:)
|
|
89
|
+
out = +"<!DOCTYPE html>\n"
|
|
90
|
+
out << %(<html lang="#{html_escape_attr(lang)}">\n)
|
|
91
|
+
out << "<head>\n"
|
|
92
|
+
out << %(<meta charset="utf-8">\n)
|
|
93
|
+
out << "<title>#{html_escape_text(title)}</title>\n"
|
|
94
|
+
out << %(<link rel="stylesheet" href="#{html_escape_attr(css)}">\n) if css
|
|
95
|
+
out << "<style>\n#{theme}</style>\n" if theme
|
|
96
|
+
out << "</head>\n<body>\n"
|
|
97
|
+
out << body
|
|
98
|
+
out << "</body>\n</html>\n"
|
|
99
|
+
out
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def html_escape_text(str)
|
|
103
|
+
str.to_s.gsub("&", "&").gsub("<", "<").gsub(">", ">")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def html_escape_attr(str)
|
|
107
|
+
html_escape_text(str).gsub('"', """)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def first_heading_text_walk(node_id)
|
|
111
|
+
return nil if node_id == -1
|
|
112
|
+
if @arena.type(node_id) == NodeType::HEADING
|
|
113
|
+
return PlainText.from(@arena, node_id)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
child = @arena.raw_first_child_id(node_id)
|
|
117
|
+
while child != -1
|
|
118
|
+
text = first_heading_text_walk(child)
|
|
119
|
+
return text if text
|
|
120
|
+
|
|
121
|
+
child = @arena.raw_next_sibling_id(child)
|
|
122
|
+
end
|
|
123
|
+
nil
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# GFM Extended autolinks: rewrites bare URLs (`https://...`,
|
|
5
|
+
# `http://...`, `ftp://...`, `www....`) and email addresses inside
|
|
6
|
+
# TEXT nodes into LINK nodes. Runs as an optional pass after the
|
|
7
|
+
# ordinary inline pipeline, so by then all CommonMark inline structure
|
|
8
|
+
# (real `<...>` autolinks, code spans, links, ...) is already in place
|
|
9
|
+
# and protected from rewriting.
|
|
10
|
+
class ExtendedAutolinkPass
|
|
11
|
+
URL_RE = %r{
|
|
12
|
+
(?<![A-Za-z0-9_])
|
|
13
|
+
(?:https?://|ftp://|www\.)
|
|
14
|
+
[^\s<>]+
|
|
15
|
+
}x
|
|
16
|
+
|
|
17
|
+
EMAIL_RE = /
|
|
18
|
+
(?<![A-Za-z0-9._+-])
|
|
19
|
+
[A-Za-z0-9._+-]+
|
|
20
|
+
@
|
|
21
|
+
[A-Za-z0-9](?:[A-Za-z0-9\-_]{0,61}[A-Za-z0-9])?
|
|
22
|
+
(?:\.[A-Za-z0-9](?:[A-Za-z0-9\-_]{0,61}[A-Za-z0-9])?)+
|
|
23
|
+
/x
|
|
24
|
+
|
|
25
|
+
TRAILING_PUNCT_RE = /[?!.,:*_~]+\z/
|
|
26
|
+
TRAILING_ENTITY_RE = /&[A-Za-z0-9]+;\z/
|
|
27
|
+
|
|
28
|
+
# AST contexts whose TEXT descendants must not be auto-linkified.
|
|
29
|
+
SKIP_TYPES = [
|
|
30
|
+
NodeType::LINK,
|
|
31
|
+
NodeType::IMAGE,
|
|
32
|
+
NodeType::CODE_SPAN,
|
|
33
|
+
NodeType::HTML_INLINE,
|
|
34
|
+
NodeType::CODE_BLOCK,
|
|
35
|
+
NodeType::HTML_BLOCK,
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
def initialize(document)
|
|
39
|
+
@document = document
|
|
40
|
+
@arena = document.arena
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def apply
|
|
44
|
+
walk(@document.root_id)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def walk(node_id)
|
|
50
|
+
return if node_id == -1
|
|
51
|
+
|
|
52
|
+
type = @arena.type(node_id)
|
|
53
|
+
return if SKIP_TYPES.include?(type)
|
|
54
|
+
|
|
55
|
+
if type == NodeType::TEXT
|
|
56
|
+
process_text(node_id)
|
|
57
|
+
return
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
child = @arena.raw_first_child_id(node_id)
|
|
61
|
+
while child != -1
|
|
62
|
+
nxt = @arena.raw_next_sibling_id(child)
|
|
63
|
+
walk(child)
|
|
64
|
+
child = nxt
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
Match = Struct.new(:start, :finish, :label, :dest)
|
|
69
|
+
|
|
70
|
+
def process_text(node_id)
|
|
71
|
+
text = @arena.text(node_id).to_s
|
|
72
|
+
return if text.empty?
|
|
73
|
+
|
|
74
|
+
matches = scan_text(text)
|
|
75
|
+
return if matches.empty?
|
|
76
|
+
|
|
77
|
+
parent = @arena.raw_parent_id(node_id)
|
|
78
|
+
prev_end = 0
|
|
79
|
+
matches.each do |m|
|
|
80
|
+
if m.start > prev_end
|
|
81
|
+
@arena.insert_before(parent, node_id,
|
|
82
|
+
@arena.add_node(NodeType::TEXT, str1: text[prev_end...m.start]))
|
|
83
|
+
end
|
|
84
|
+
link_id = @arena.add_node(NodeType::LINK, str1: m.dest)
|
|
85
|
+
@arena.append_child(link_id,
|
|
86
|
+
@arena.add_node(NodeType::TEXT, str1: m.label))
|
|
87
|
+
@arena.insert_before(parent, node_id, link_id)
|
|
88
|
+
prev_end = m.finish
|
|
89
|
+
end
|
|
90
|
+
if prev_end < text.length
|
|
91
|
+
@arena.insert_before(parent, node_id,
|
|
92
|
+
@arena.add_node(NodeType::TEXT, str1: text[prev_end..]))
|
|
93
|
+
end
|
|
94
|
+
@arena.detach(node_id)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def scan_text(text)
|
|
98
|
+
matches = []
|
|
99
|
+
pos = 0
|
|
100
|
+
while pos < text.length
|
|
101
|
+
url_m = URL_RE.match(text, pos)
|
|
102
|
+
email_m = EMAIL_RE.match(text, pos)
|
|
103
|
+
m = first_match(url_m, email_m)
|
|
104
|
+
break unless m
|
|
105
|
+
|
|
106
|
+
candidate = m[0]
|
|
107
|
+
is_email = (m == email_m)
|
|
108
|
+
trimmed = trim_trailing(candidate, email: is_email)
|
|
109
|
+
if trimmed.empty? || !valid_domain?(trimmed, email: is_email)
|
|
110
|
+
pos = m.begin(0) + 1
|
|
111
|
+
next
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
start = m.begin(0)
|
|
115
|
+
finish = start + trimmed.length
|
|
116
|
+
dest = build_destination(trimmed, email: is_email)
|
|
117
|
+
matches << Match.new(start, finish, trimmed, dest)
|
|
118
|
+
pos = finish
|
|
119
|
+
end
|
|
120
|
+
matches
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# GFM spec: "If the domain name contains an underscore (_) in its last two
|
|
124
|
+
# segments, it is invalid." Applies to both URLs and email domains.
|
|
125
|
+
def valid_domain?(candidate, email:)
|
|
126
|
+
domain = extract_domain(candidate, email: email)
|
|
127
|
+
return false if domain.nil? || domain.empty?
|
|
128
|
+
|
|
129
|
+
segments = domain.split(".")
|
|
130
|
+
return false if segments.length < 2
|
|
131
|
+
|
|
132
|
+
last_two = segments.last(2)
|
|
133
|
+
last_two.none? { |seg| seg.include?("_") }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def extract_domain(candidate, email:)
|
|
137
|
+
if email
|
|
138
|
+
candidate.split("@", 2)[1]
|
|
139
|
+
elsif candidate.start_with?("www.")
|
|
140
|
+
host = candidate[4..]
|
|
141
|
+
host.split("/", 2).first
|
|
142
|
+
else
|
|
143
|
+
# https://, http://, ftp://
|
|
144
|
+
after_scheme = candidate.sub(%r{\A[a-z]+://}, "")
|
|
145
|
+
after_scheme.split("/", 2).first
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def first_match(a, b)
|
|
150
|
+
return b unless a
|
|
151
|
+
return a unless b
|
|
152
|
+
|
|
153
|
+
a.begin(0) <= b.begin(0) ? a : b
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def trim_trailing(candidate, email:)
|
|
157
|
+
loop do
|
|
158
|
+
before = candidate.length
|
|
159
|
+
candidate = candidate.sub(TRAILING_PUNCT_RE, "")
|
|
160
|
+
candidate = strip_excess_close_paren(candidate) unless email
|
|
161
|
+
if candidate.end_with?(";") && (em = TRAILING_ENTITY_RE.match(candidate))
|
|
162
|
+
candidate = candidate[0...em.begin(0)]
|
|
163
|
+
end
|
|
164
|
+
break candidate if candidate.length == before
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def strip_excess_close_paren(s)
|
|
169
|
+
opens = s.count("(")
|
|
170
|
+
closes = s.count(")")
|
|
171
|
+
while closes > opens && s.end_with?(")")
|
|
172
|
+
s = s[0..-2]
|
|
173
|
+
closes -= 1
|
|
174
|
+
end
|
|
175
|
+
s
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def build_destination(label, email:)
|
|
179
|
+
return "mailto:#{label}" if email
|
|
180
|
+
return "http://#{label}" if label.start_with?("www.")
|
|
181
|
+
|
|
182
|
+
label
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|