manticore-smash 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +492 -0
- data/lib/manticore.rb +24 -0
- data/lib/mdutils/rediscount.rb +871 -0
- data/lib/xmlutils/formatters.rb +91 -0
- data/lib/xmlutils/node.rb +585 -0
- data/lib/xmlutils/tokenizer.rb +282 -0
- data/lib/xmlutils/tree_parser.rb +161 -0
- data/lib/xmlutils/xml_doc.rb +273 -0
- data/lib/xmlutils/xpath.rb +103 -0
- metadata +48 -0
|
@@ -0,0 +1,871 @@
|
|
|
1
|
+
|
|
2
|
+
#------------------------------------------------------------------------------
|
|
3
|
+
# Markdown 解析器
|
|
4
|
+
#------------------------------------------------------------------------------
|
|
5
|
+
# 纯 Ruby 实现的 Markdown 解析器,兼容 rdiscount gem API,无需 C 扩展。
|
|
6
|
+
#
|
|
7
|
+
# 实现特性:
|
|
8
|
+
# - 段落、标题(ATX + Setext)、分隔线、引用块
|
|
9
|
+
# - 列表(ul/ol/alpha)、代码块(围栏 + 缩进)
|
|
10
|
+
# - 内联:链接、图片、强调、代码片段、自动链接
|
|
11
|
+
# - Discount 扩展:表格、定义列表、脚注、删除线、上标、图片尺寸、字母列表、目录
|
|
12
|
+
# - 标志:smart, filter_html, filter_styles, footnotes, generate_toc,
|
|
13
|
+
# no_image, no_links, no_tables, strict, autolink, safelink,
|
|
14
|
+
# no_pseudo_protocols, no_superscript, no_strikethrough,
|
|
15
|
+
# latex, explicitlist, md1compat
|
|
16
|
+
#
|
|
17
|
+
# Usage:
|
|
18
|
+
# require 'rediscount'
|
|
19
|
+
# markdown = ReDiscount.new("Hello World!")
|
|
20
|
+
# puts markdown.to_html
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
#------------------------------------------------------------------------------
|
|
24
|
+
# Caset Markdown 全局变量说明
|
|
25
|
+
#------------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
# @!attribute $_tmp_global_
|
|
28
|
+
# @return [Hash] 全局配置累积存储(CasetDown 代码执行)
|
|
29
|
+
# 按代码块顺序累积,后续代码块继承前面加载的配置。
|
|
30
|
+
|
|
31
|
+
# @!attribute $_scr_head_
|
|
32
|
+
# @return [String] Ruby 脚本头部(编码声明 + 依赖加载)
|
|
33
|
+
# 默认包含 #coding:utf-8 和 endata/tintext/tabbot 等依赖。
|
|
34
|
+
|
|
35
|
+
# @!attribute $_scr_tail_
|
|
36
|
+
# @return [String] Ruby 脚本尾部(全局数据输出)
|
|
37
|
+
# 包含 __END__ 标记和全局变量输出。
|
|
38
|
+
|
|
39
|
+
# @!attribute $_erl_tail_
|
|
40
|
+
# @return [String] Erlang 脚本尾部(全局数据输出)
|
|
41
|
+
# 包含全局变量的 Erlang 格式输出。
|
|
42
|
+
|
|
43
|
+
# @!attribute $_tmp_endata_
|
|
44
|
+
# @return [Hash] endata 表格数据收集器(casetable 使用)
|
|
45
|
+
# 收集表格数据用于生成脚本尾部输出。
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ReDiscount
|
|
49
|
+
VERSION = '3.1.0'
|
|
50
|
+
|
|
51
|
+
# @return [String] Markdown 原始文本
|
|
52
|
+
attr_reader :text
|
|
53
|
+
|
|
54
|
+
# 解析标志访问器
|
|
55
|
+
attr_accessor :smart # 智能引号
|
|
56
|
+
attr_accessor :filter_html # 过滤 HTML 标签
|
|
57
|
+
attr_accessor :filter_styles # 过滤样式标签
|
|
58
|
+
attr_accessor :footnotes # 启用脚注
|
|
59
|
+
attr_accessor :generate_toc # 生成目录
|
|
60
|
+
attr_accessor :no_image # 禁用图片
|
|
61
|
+
attr_accessor :no_links # 禁用链接
|
|
62
|
+
attr_accessor :no_tables # 禁用表格
|
|
63
|
+
attr_accessor :strict # 严格模式
|
|
64
|
+
attr_accessor :autolink # 自动链接
|
|
65
|
+
attr_accessor :safelink # 安全链接
|
|
66
|
+
attr_accessor :no_pseudo_protocols # 禁用伪协议
|
|
67
|
+
attr_accessor :no_superscript # 禁用上标
|
|
68
|
+
attr_accessor :no_strikethrough # 禁用删除线
|
|
69
|
+
attr_accessor :latex # LaTeX 支持
|
|
70
|
+
attr_accessor :explicitlist # 显式列表
|
|
71
|
+
attr_accessor :md1compat # Markdown 1.0 兼容
|
|
72
|
+
|
|
73
|
+
# 初始化 Markdown 解析器。
|
|
74
|
+
#
|
|
75
|
+
# @param text [String] Markdown 文本
|
|
76
|
+
# @param flags [Symbol*] 可选标志,通过 send 方法设置为 true
|
|
77
|
+
# 支持::smart, :filter_html, :filter_styles, :footnotes, :generate_toc,
|
|
78
|
+
# :no_image, :no_links, :no_tables, :strict, :autolink, :safelink,
|
|
79
|
+
# :no_pseudo_protocols, :no_superscript, :no_strikethrough,
|
|
80
|
+
# :latex, :explicitlist, :md1compat
|
|
81
|
+
# @return [ReDiscount] 解析器实例
|
|
82
|
+
def initialize(text, *flags)
|
|
83
|
+
@text = text.to_s
|
|
84
|
+
@smart = false
|
|
85
|
+
@filter_html = false
|
|
86
|
+
@filter_styles = false
|
|
87
|
+
@footnotes = false
|
|
88
|
+
@generate_toc = false
|
|
89
|
+
@no_image = false
|
|
90
|
+
@no_links = false
|
|
91
|
+
@no_tables = false
|
|
92
|
+
@strict = false
|
|
93
|
+
@autolink = false
|
|
94
|
+
@safelink = false
|
|
95
|
+
@no_pseudo_protocols = false
|
|
96
|
+
@no_superscript = false
|
|
97
|
+
@no_strikethrough = false
|
|
98
|
+
@latex = false
|
|
99
|
+
@explicitlist = false
|
|
100
|
+
@md1compat = false
|
|
101
|
+
|
|
102
|
+
flags.each do |flag|
|
|
103
|
+
send("#{flag}=", true) if respond_to?("#{flag}=")
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# 将 Markdown 文本转换为 HTML。
|
|
108
|
+
#
|
|
109
|
+
# @return [String] HTML 字符串(末尾含换行符)
|
|
110
|
+
def to_html
|
|
111
|
+
parser = MarkdownParser.new(@text, self)
|
|
112
|
+
parser.to_html
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# 生成目录 HTML 内容。
|
|
116
|
+
#
|
|
117
|
+
# 需要 :generate_toc 标志启用,且文档中包含标题。
|
|
118
|
+
#
|
|
119
|
+
# @return [String] 目录 HTML(<ul> 列表),如果无标题则返回空字符串
|
|
120
|
+
def toc_content
|
|
121
|
+
parser = MarkdownParser.new(@text, self)
|
|
122
|
+
parser.toc_content
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# ============================================================================
|
|
127
|
+
# 内部 Markdown 解析器实现
|
|
128
|
+
# ============================================================================
|
|
129
|
+
class MarkdownParser
|
|
130
|
+
# 块级正则常量
|
|
131
|
+
# @!attribute [r] ATX_HEADER_RE
|
|
132
|
+
# @return [Regexp] ATX 标题匹配:^#{1,6}\s+.+?\s*#*\s*$
|
|
133
|
+
# @!attribute [r] HORIZONTAL_RULE
|
|
134
|
+
# @return [Regexp] 水平分隔线:^\*{3,}|-{3,}|_{3,}\s*$
|
|
135
|
+
# @!attribute [r] CODE_BLOCK_FENCE
|
|
136
|
+
# @return [Regexp] 围栏代码块起始:^```(\w*)
|
|
137
|
+
# @!attribute [r] CODE_BLOCK_INDENT
|
|
138
|
+
# @return [Regexp] 缩进代码块:^( |\t)
|
|
139
|
+
# @!attribute [r] BLOCKQUOTE_RE
|
|
140
|
+
# @return [Regexp] 引用块起始:^>
|
|
141
|
+
# @!attribute [r] LIST_BULLET_RE
|
|
142
|
+
# @return [Regexp] 无序列表:^(\*|\+|\-)\s+
|
|
143
|
+
# @!attribute [r] LIST_NUMBER_RE
|
|
144
|
+
# @return [Regexp] 有序列表:^(\d+)[.)]\s+
|
|
145
|
+
# @!attribute [r] LIST_ALPHA_RE
|
|
146
|
+
# @return [Regexp] 字母列表:^([a-zA-Z])[.)]\s+
|
|
147
|
+
# @!attribute [r] REFERENCE_DEF
|
|
148
|
+
# @return [Regexp] 引用定义:^\[(.+?)\]:\s*(\S+)(?:\s+["\'\(](.+?)["\'\)])?\s*$
|
|
149
|
+
# @!attribute [r] HTML_BLOCK_RE
|
|
150
|
+
# @return [Regexp] HTML 块起始:^<(\/?)(\w+)
|
|
151
|
+
# @!attribute [r] TABLE_ROW_RE
|
|
152
|
+
# @return [Regexp] 表格行:^\|(.+?)\|?\s*$
|
|
153
|
+
# @!attribute [r] TABLE_SEP_RE
|
|
154
|
+
# @return [Regexp] 表格分隔行:^\|?[\s:-]+\|?[\s:-|]*\s*$
|
|
155
|
+
ATX_HEADER_RE = /^(\#{1,6})\s+(.+?)\s*#*\s*$/
|
|
156
|
+
HORIZONTAL_RULE = /^(\*{3,}|-{3,}|_{3,})\s*$/
|
|
157
|
+
CODE_BLOCK_FENCE = /^```(\w*)/
|
|
158
|
+
CODE_BLOCK_INDENT = /^( |\t)/
|
|
159
|
+
BLOCKQUOTE_RE = /^\s*>/
|
|
160
|
+
LIST_BULLET_RE = /^(\*|\+|-)\s+/
|
|
161
|
+
LIST_NUMBER_RE = /^(\d+)[.)]\s+/
|
|
162
|
+
LIST_ALPHA_RE = /^([a-zA-Z])[.)]\s+/
|
|
163
|
+
REFERENCE_DEF = /^\[(.+?)\]:\s*(\S+)(?:\s+["'(](.+?)["')])?\s*$/
|
|
164
|
+
HTML_BLOCK_RE = /^<(\/?)(\w+)/
|
|
165
|
+
TABLE_ROW_RE = /^\|(.+?)\|?\s*$/
|
|
166
|
+
TABLE_SEP_RE = /^\|?[\s:|-]+\|?[\s:|-]*\s*$/
|
|
167
|
+
|
|
168
|
+
# 初始化解析器。
|
|
169
|
+
#
|
|
170
|
+
# @param text [String] Markdown 文本
|
|
171
|
+
# @param rdiscount_obj [ReDiscount] ReDiscount 实例,提供标志配置
|
|
172
|
+
def initialize(text, rdiscount_obj)
|
|
173
|
+
@text = text
|
|
174
|
+
@rd = rdiscount_obj
|
|
175
|
+
@references = {}
|
|
176
|
+
@footnotes = {}
|
|
177
|
+
@footnote_counter = 0
|
|
178
|
+
@toc_entries = []
|
|
179
|
+
@used_footnotes = []
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# 完整解析流程:预处理 → 分块 → 渲染 → 后处理。
|
|
183
|
+
#
|
|
184
|
+
# @return [String] 最终 HTML 字符串(末尾含换行符)
|
|
185
|
+
def to_html
|
|
186
|
+
normalized = preprocess(@text)
|
|
187
|
+
blocks = parse_blocks(normalized)
|
|
188
|
+
html = render_blocks(blocks)
|
|
189
|
+
html = postprocess(html)
|
|
190
|
+
html.strip + "\n"
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# 生成目录 HTML。
|
|
194
|
+
#
|
|
195
|
+
# 需要先调用 to_html 收集标题信息。
|
|
196
|
+
#
|
|
197
|
+
# @return [String] 目录 HTML(<ul> 列表),如果无标题返回空字符串
|
|
198
|
+
def toc_content
|
|
199
|
+
normalized = preprocess(@text)
|
|
200
|
+
blocks = parse_blocks(normalized)
|
|
201
|
+
render_blocks(blocks)
|
|
202
|
+
return "" if @toc_entries.empty?
|
|
203
|
+
|
|
204
|
+
build_toc_html(@toc_entries)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
private
|
|
208
|
+
|
|
209
|
+
def build_toc_html(entries)
|
|
210
|
+
return "" if entries.empty?
|
|
211
|
+
|
|
212
|
+
# 构建树结构
|
|
213
|
+
root = { children: [] }
|
|
214
|
+
stack = [root]
|
|
215
|
+
|
|
216
|
+
entries.each do |entry|
|
|
217
|
+
level = entry[:level]
|
|
218
|
+
node = { entry: entry, children: [] }
|
|
219
|
+
|
|
220
|
+
while stack.length > 1 && stack[-1][:entry][:level] >= level
|
|
221
|
+
stack.pop
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
stack[-1][:children] << node
|
|
225
|
+
stack << node
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
render_toc_node(root)
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def render_toc_node(node)
|
|
232
|
+
return "" if node[:children].empty?
|
|
233
|
+
html = "<ul>\n"
|
|
234
|
+
node[:children].each do |child|
|
|
235
|
+
entry = child[:entry]
|
|
236
|
+
html += " <li><a href=\"##{entry[:id]}\">#{escape_html(entry[:text])}</a>"
|
|
237
|
+
if !child[:children].empty?
|
|
238
|
+
html += "\n" + render_toc_node(child)
|
|
239
|
+
end
|
|
240
|
+
html += "</li>\n"
|
|
241
|
+
end
|
|
242
|
+
html += "</ul>\n"
|
|
243
|
+
html
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# ========================================================================
|
|
247
|
+
# 预处理:标准化换行,提取引用链接和脚注定义
|
|
248
|
+
#
|
|
249
|
+
# 将 \r\n 和 \r 统一为 \n。
|
|
250
|
+
# 从正文中移除引用定义和脚注定义,存入实例变量。
|
|
251
|
+
#
|
|
252
|
+
# @param text [String] 原始 Markdown 文本
|
|
253
|
+
# @return [String] 处理后的文本(不含引用/脚注定义)
|
|
254
|
+
# ========================================================================
|
|
255
|
+
def preprocess(text)
|
|
256
|
+
text = text.gsub("\r\n", "\n").gsub("\r", "\n")
|
|
257
|
+
|
|
258
|
+
lines = text.split("\n")
|
|
259
|
+
content_lines = []
|
|
260
|
+
i = 0
|
|
261
|
+
while i < lines.length
|
|
262
|
+
line = lines[i]
|
|
263
|
+
|
|
264
|
+
# Reference-style link definition
|
|
265
|
+
if line =~ REFERENCE_DEF
|
|
266
|
+
id = $1.downcase
|
|
267
|
+
url = $2
|
|
268
|
+
title = $3
|
|
269
|
+
@references[id] = { url: url, title: title }
|
|
270
|
+
i += 1
|
|
271
|
+
next
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Footnote definition
|
|
275
|
+
if @rd.footnotes && line =~ /^\[(\^\w+)\]:\s*(.+)$/
|
|
276
|
+
id = $1
|
|
277
|
+
content = $2
|
|
278
|
+
j = i + 1
|
|
279
|
+
while j < lines.length && (lines[j].start_with?(' ') || lines[j].start_with?("\t"))
|
|
280
|
+
content += "\n" + lines[j].sub(/^(\s+)/, '')
|
|
281
|
+
j += 1
|
|
282
|
+
end
|
|
283
|
+
@footnotes[id] = content
|
|
284
|
+
i = j
|
|
285
|
+
next
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
content_lines << line
|
|
289
|
+
i += 1
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
content_lines.join("\n")
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# ========================================================================
|
|
296
|
+
# 块级解析:将文本分块为结构化 block 数组
|
|
297
|
+
#
|
|
298
|
+
# 识别的块类型::hr, :header, :code, :blockquote, :table, :deflist,
|
|
299
|
+
# :ul, :ol, :ol_alpha, :html, :paragraph
|
|
300
|
+
#
|
|
301
|
+
# @param text [String] 预处理后的文本
|
|
302
|
+
# @return [Array<Hash>] block 数组,每个元素包含 :type 和其他类型特定键
|
|
303
|
+
# ========================================================================
|
|
304
|
+
def parse_blocks(text)
|
|
305
|
+
lines = text.split("\n")
|
|
306
|
+
blocks = []
|
|
307
|
+
i = 0
|
|
308
|
+
|
|
309
|
+
while i < lines.length
|
|
310
|
+
line = lines[i]
|
|
311
|
+
|
|
312
|
+
if line.strip.empty?
|
|
313
|
+
i += 1
|
|
314
|
+
next
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Horizontal rule
|
|
318
|
+
if line =~ HORIZONTAL_RULE
|
|
319
|
+
blocks << { type: :hr }
|
|
320
|
+
i += 1
|
|
321
|
+
next
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# ATX Header
|
|
325
|
+
if line =~ ATX_HEADER_RE
|
|
326
|
+
level = $1.length
|
|
327
|
+
content = $2.strip
|
|
328
|
+
blocks << { type: :header, level: level, content: content }
|
|
329
|
+
i += 1
|
|
330
|
+
next
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Setext Header
|
|
334
|
+
if i + 1 < lines.length && lines[i+1] =~ /^[=-]+\s*$/
|
|
335
|
+
level = lines[i+1][0] == '=' ? 1 : 2
|
|
336
|
+
blocks << { type: :header, level: level, content: line.strip }
|
|
337
|
+
i += 2
|
|
338
|
+
next
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# Fenced code block
|
|
342
|
+
if line =~ CODE_BLOCK_FENCE
|
|
343
|
+
lang = $1
|
|
344
|
+
code_lines = []
|
|
345
|
+
i += 1
|
|
346
|
+
while i < lines.length && lines[i] !~ /^```\s*$/
|
|
347
|
+
code_lines << lines[i]
|
|
348
|
+
i += 1
|
|
349
|
+
end
|
|
350
|
+
i += 1 # skip closing fence
|
|
351
|
+
blocks << { type: :code, lang: lang, content: code_lines.join("\n") }
|
|
352
|
+
next
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# Indented code block
|
|
356
|
+
if line =~ CODE_BLOCK_INDENT
|
|
357
|
+
code_lines = [line.sub(/^( |\t)/, '')]
|
|
358
|
+
i += 1
|
|
359
|
+
while i < lines.length && (lines[i] =~ CODE_BLOCK_INDENT || lines[i].strip.empty?)
|
|
360
|
+
code_lines << lines[i].sub(/^( |\t)/, '')
|
|
361
|
+
i += 1
|
|
362
|
+
end
|
|
363
|
+
blocks << { type: :code, content: code_lines.join("\n").gsub(/\n+\z/, "\n") }
|
|
364
|
+
next
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Blockquote
|
|
368
|
+
if line =~ BLOCKQUOTE_RE
|
|
369
|
+
quote_lines = []
|
|
370
|
+
while i < lines.length && (lines[i] =~ BLOCKQUOTE_RE || (lines[i].strip.empty? && i+1 < lines.length && lines[i+1] =~ BLOCKQUOTE_RE))
|
|
371
|
+
quote_lines << lines[i].sub(/^\s*>\s?/, '')
|
|
372
|
+
i += 1
|
|
373
|
+
end
|
|
374
|
+
inner_parser = MarkdownParser.new(quote_lines.join("\n"), @rd)
|
|
375
|
+
blocks << { type: :blockquote, content: inner_parser.to_html }
|
|
376
|
+
next
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
# Table (GFM/Discount extension)
|
|
380
|
+
if !@rd.no_tables && i + 1 < lines.length && lines[i] =~ TABLE_ROW_RE && lines[i+1] =~ TABLE_SEP_RE
|
|
381
|
+
header = parse_table_row(lines[i])
|
|
382
|
+
alignments = parse_table_alignments(lines[i+1])
|
|
383
|
+
rows = []
|
|
384
|
+
i += 2
|
|
385
|
+
while i < lines.length && lines[i] =~ TABLE_ROW_RE
|
|
386
|
+
rows << parse_table_row(lines[i])
|
|
387
|
+
i += 1
|
|
388
|
+
end
|
|
389
|
+
blocks << { type: :table, header: header, alignments: alignments, rows: rows }
|
|
390
|
+
next
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Definition list (Discount extension MKD_DLEXTRA)
|
|
394
|
+
if !@rd.no_tables && i + 1 < lines.length && lines[i+1] =~ /^:\s+/
|
|
395
|
+
term = line.strip
|
|
396
|
+
defs = []
|
|
397
|
+
i += 1
|
|
398
|
+
while i < lines.length && lines[i] =~ /^:\s+/
|
|
399
|
+
defs << lines[i].sub(/^:\s+/, '')
|
|
400
|
+
i += 1
|
|
401
|
+
if i < lines.length && lines[i].strip.empty?
|
|
402
|
+
i += 1
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
blocks << { type: :deflist, term: term, defs: defs }
|
|
406
|
+
next
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# Lists
|
|
410
|
+
list_match = nil
|
|
411
|
+
list_type = nil
|
|
412
|
+
if line =~ LIST_BULLET_RE
|
|
413
|
+
list_match = $1
|
|
414
|
+
list_type = :ul
|
|
415
|
+
elsif line =~ LIST_NUMBER_RE
|
|
416
|
+
list_match = $1
|
|
417
|
+
list_type = :ol
|
|
418
|
+
elsif @rd.md1compat && line =~ LIST_ALPHA_RE
|
|
419
|
+
list_match = $1
|
|
420
|
+
list_type = :ol_alpha
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
if list_type
|
|
424
|
+
items = []
|
|
425
|
+
current_item = [line.sub(/^#{Regexp.escape(list_match)}[.)]?\s+/, '')]
|
|
426
|
+
i += 1
|
|
427
|
+
|
|
428
|
+
while i < lines.length
|
|
429
|
+
current_line = lines[i]
|
|
430
|
+
|
|
431
|
+
# New item same type (ul: same bullet; ol: any number; ol_alpha: any letter)
|
|
432
|
+
if (list_type == :ul && current_line =~ LIST_BULLET_RE) ||
|
|
433
|
+
(list_type == :ol && current_line =~ LIST_NUMBER_RE) ||
|
|
434
|
+
(list_type == :ol_alpha && current_line =~ LIST_ALPHA_RE)
|
|
435
|
+
match = $1
|
|
436
|
+
items << current_item.join("\n")
|
|
437
|
+
current_item = [current_line.sub(/^#{Regexp.escape(match)}[.)]?\s+/, '')]
|
|
438
|
+
i += 1
|
|
439
|
+
next
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# Different list marker or non-indented non-empty line ends list
|
|
443
|
+
if !current_line.strip.empty? && current_line !~ /^(\s+)/
|
|
444
|
+
if current_line =~ LIST_BULLET_RE || current_line =~ LIST_NUMBER_RE || current_line =~ LIST_ALPHA_RE
|
|
445
|
+
# Only break if it's a different marker type
|
|
446
|
+
if list_type == :ul && current_line !~ LIST_BULLET_RE
|
|
447
|
+
break
|
|
448
|
+
elsif list_type == :ol && current_line !~ LIST_NUMBER_RE
|
|
449
|
+
break
|
|
450
|
+
elsif list_type == :ol_alpha && current_line !~ LIST_ALPHA_RE
|
|
451
|
+
break
|
|
452
|
+
end
|
|
453
|
+
else
|
|
454
|
+
break
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
current_item << current_line
|
|
459
|
+
i += 1
|
|
460
|
+
end
|
|
461
|
+
items << current_item.join("\n")
|
|
462
|
+
|
|
463
|
+
parsed_items = items.map do |item_text|
|
|
464
|
+
stripped = item_text.strip
|
|
465
|
+
if stripped.include?("\n") || @rd.explicitlist
|
|
466
|
+
# 多行列表项:只渲染内联,保留换行,不嵌套 <p>
|
|
467
|
+
render_inline(item_text)
|
|
468
|
+
else
|
|
469
|
+
render_inline(stripped)
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
blocks << { type: list_type, items: parsed_items }
|
|
474
|
+
next
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
# HTML block
|
|
478
|
+
if line =~ HTML_BLOCK_RE && !@rd.filter_html
|
|
479
|
+
tag = $2.downcase
|
|
480
|
+
if %w[p div h1 h2 h3 h4 h5 h6 blockquote pre table ol ul dl form hr br].include?(tag)
|
|
481
|
+
html_lines = [line]
|
|
482
|
+
i += 1
|
|
483
|
+
while i < lines.length
|
|
484
|
+
html_lines << lines[i]
|
|
485
|
+
break if lines[i] =~ /<\/#{tag}>\s*$/i
|
|
486
|
+
i += 1
|
|
487
|
+
end
|
|
488
|
+
blocks << { type: :html, content: html_lines.join("\n") }
|
|
489
|
+
next
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
# Paragraph (default)
|
|
494
|
+
para_lines = [line]
|
|
495
|
+
i += 1
|
|
496
|
+
while i < lines.length && !lines[i].strip.empty? && !is_block_start?(lines[i])
|
|
497
|
+
para_lines << lines[i]
|
|
498
|
+
i += 1
|
|
499
|
+
end
|
|
500
|
+
blocks << { type: :paragraph, content: para_lines.join("\n") }
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
blocks
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# 判断行是否为新块的起始。
|
|
507
|
+
#
|
|
508
|
+
# @param line [String] 单行文本
|
|
509
|
+
# @return [Boolean] 如果是块起始则为 true
|
|
510
|
+
def is_block_start?(line)
|
|
511
|
+
line =~ ATX_HEADER_RE ||
|
|
512
|
+
line =~ HORIZONTAL_RULE ||
|
|
513
|
+
line =~ CODE_BLOCK_FENCE ||
|
|
514
|
+
line =~ CODE_BLOCK_INDENT ||
|
|
515
|
+
line =~ BLOCKQUOTE_RE ||
|
|
516
|
+
line =~ LIST_BULLET_RE ||
|
|
517
|
+
line =~ LIST_NUMBER_RE ||
|
|
518
|
+
line =~ LIST_ALPHA_RE ||
|
|
519
|
+
line =~ HTML_BLOCK_RE
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
# 解析表格行数据。
|
|
523
|
+
#
|
|
524
|
+
# 移除首尾的 | 分隔符,按 | 分割并去除空白。
|
|
525
|
+
#
|
|
526
|
+
# @param line [String] 表格行文本
|
|
527
|
+
# @return [Array<String>] 单元格内容数组
|
|
528
|
+
def parse_table_row(line)
|
|
529
|
+
line.sub(/^\|/, '').sub(/\|\s*$/, '').split('|')
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
# 解析表格列对齐方式。
|
|
533
|
+
#
|
|
534
|
+
# 根据分隔行中 : 的位置判断对齐:
|
|
535
|
+
# - ^:.*:$ → center
|
|
536
|
+
# - ^: → left
|
|
537
|
+
# - :$ → right
|
|
538
|
+
#
|
|
539
|
+
# @param line [String] 表格分隔行文本
|
|
540
|
+
# @return [Array<String, nil>] 对齐方式数组,nil 表示未指定
|
|
541
|
+
def parse_table_alignments(line)
|
|
542
|
+
cells = line.sub(/^\|/, '').sub(/\|\s*$/, '').split('|')
|
|
543
|
+
cells.map do |cell|
|
|
544
|
+
cell = cell.strip
|
|
545
|
+
if cell =~ /^:.*:$/
|
|
546
|
+
'center'
|
|
547
|
+
elsif cell =~ /^:/
|
|
548
|
+
'left'
|
|
549
|
+
elsif cell =~ /:$/
|
|
550
|
+
'right'
|
|
551
|
+
else
|
|
552
|
+
nil
|
|
553
|
+
end
|
|
554
|
+
end
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
# ========================================================================
|
|
558
|
+
# 将 block 数组渲染为 HTML 字符串
|
|
559
|
+
#
|
|
560
|
+
# @param blocks [Array<Hash>] 解析后的 block 数组
|
|
561
|
+
# @return [String] 拼接后的 HTML 片段
|
|
562
|
+
# ========================================================================
|
|
563
|
+
def render_blocks(blocks)
|
|
564
|
+
html_parts = blocks.map do |block|
|
|
565
|
+
case block[:type]
|
|
566
|
+
when :hr
|
|
567
|
+
"<hr />\n"
|
|
568
|
+
when :header
|
|
569
|
+
level = block[:level]
|
|
570
|
+
content = render_inline(block[:content])
|
|
571
|
+
if @rd.generate_toc
|
|
572
|
+
id = generate_header_id(block[:content])
|
|
573
|
+
@toc_entries << { level: level, text: block[:content], id: id }
|
|
574
|
+
"<a name=\"#{id}\"></a> <h#{level}>#{content}</h#{level}>\n"
|
|
575
|
+
else
|
|
576
|
+
"<h#{level}>#{content}</h#{level}>\n"
|
|
577
|
+
end
|
|
578
|
+
when :paragraph
|
|
579
|
+
content = render_inline(block[:content])
|
|
580
|
+
# 保留段落内原始换行符
|
|
581
|
+
"<p>#{content}</p>\n"
|
|
582
|
+
when :code
|
|
583
|
+
if block[:lang] && !block[:lang].empty?
|
|
584
|
+
"<pre><code class=\"#{escape_html(block[:lang])}\">#{escape_html(block[:content])}</code></pre>\n"
|
|
585
|
+
else
|
|
586
|
+
"<pre><code>#{escape_html(block[:content])}</code></pre>\n"
|
|
587
|
+
end
|
|
588
|
+
when :blockquote
|
|
589
|
+
html = block[:content]
|
|
590
|
+
# 去掉内部 <p> 标签但保留内容(避免嵌套引用块内部凭空产生 <p>)
|
|
591
|
+
html = html.gsub(/<p>(.*?)<\/p>/m) { $1 }
|
|
592
|
+
# 确保段落/块间的空行有换行分隔
|
|
593
|
+
html = html.gsub(/\n\n+/, "\n")
|
|
594
|
+
"<blockquote>\n#{html}</blockquote>\n"
|
|
595
|
+
when :ul
|
|
596
|
+
items = block[:items].map { |item| "<li>#{item}</li>" }.join("\n")
|
|
597
|
+
"<ul>\n#{items}\n</ul>\n"
|
|
598
|
+
when :ol
|
|
599
|
+
items = block[:items].map { |item| "<li>#{item}</li>" }.join("\n")
|
|
600
|
+
"<ol>\n#{items}\n</ol>\n"
|
|
601
|
+
when :ol_alpha
|
|
602
|
+
items = block[:items].map { |item| "<li>#{item}</li>" }.join("\n")
|
|
603
|
+
"<ol type=\"a\">\n#{items}\n</ol>\n"
|
|
604
|
+
when :table
|
|
605
|
+
render_table(block)
|
|
606
|
+
when :deflist
|
|
607
|
+
defs = block[:defs].map { |d| "<dd>#{render_inline(d)}</dd>" }.join("\n")
|
|
608
|
+
"<dl>\n<dt>#{render_inline(block[:term])}</dt>\n#{defs}\n</dl>\n"
|
|
609
|
+
when :html
|
|
610
|
+
if @rd.filter_html
|
|
611
|
+
""
|
|
612
|
+
else
|
|
613
|
+
block[:content] + "\n"
|
|
614
|
+
end
|
|
615
|
+
else
|
|
616
|
+
""
|
|
617
|
+
end
|
|
618
|
+
end
|
|
619
|
+
html_parts.join
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
# 渲染表格 block 为完整 HTML 表格。
|
|
623
|
+
#
|
|
624
|
+
# @param block [Hash] 表格 block,包含 :header, :alignments, :rows
|
|
625
|
+
# @return [String] HTML 表格字符串
|
|
626
|
+
def render_table(block)
|
|
627
|
+
html = "<table>\n<thead>\n<tr>\n"
|
|
628
|
+
header = block[:header]
|
|
629
|
+
alignments = block[:alignments]
|
|
630
|
+
rows = block[:rows]
|
|
631
|
+
|
|
632
|
+
header.each_with_index do |cell, i|
|
|
633
|
+
align = alignments[i] ? " style=\"text-align:#{alignments[i]};\"" : ""
|
|
634
|
+
html += "<th#{align}>#{render_inline(cell)}</th>\n"
|
|
635
|
+
end
|
|
636
|
+
html += "</tr>\n</thead>\n<tbody>\n"
|
|
637
|
+
|
|
638
|
+
rows.each do |row|
|
|
639
|
+
html += "<tr>\n"
|
|
640
|
+
row.each_with_index do |cell, i|
|
|
641
|
+
align = alignments[i] ? " style=\"text-align:#{alignments[i]};\"" : ""
|
|
642
|
+
html += "<td#{align}>#{render_inline(cell)}</td>\n"
|
|
643
|
+
end
|
|
644
|
+
html += "</tr>\n"
|
|
645
|
+
end
|
|
646
|
+
html += "</tbody>\n</table>\n"
|
|
647
|
+
html
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
# 生成标题锚点 ID。
|
|
651
|
+
#
|
|
652
|
+
# 转换规则:小写化 → 移除非字母数字空白连字符 → 空格替换为 - →
|
|
653
|
+
# 合并多个 - → 移除首尾 -
|
|
654
|
+
#
|
|
655
|
+
# @param text [String] 标题文本
|
|
656
|
+
# @return [String] 锚点 ID 字符串
|
|
657
|
+
def generate_header_id(text)
|
|
658
|
+
text.gsub(/[^\w\s-]/, '').gsub(/\s+/, '-').gsub(/-+/, '-').sub(/^-/, '').sub(/-$/, '')
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
# ========================================================================
|
|
662
|
+
# 渲染内联元素。
|
|
663
|
+
#
|
|
664
|
+
# 处理顺序:代码片段保护 → 图片 → 链接 → 自动链接 → 删除线 →
|
|
665
|
+
# 上标 → 强调(strong/em)→ 硬换行 → 脚注 → 恢复代码片段
|
|
666
|
+
#
|
|
667
|
+
# @param text [String] 内联文本
|
|
668
|
+
# @return [String] 渲染后的 HTML 字符串
|
|
669
|
+
# ========================================================================
|
|
670
|
+
def render_inline(text)
|
|
671
|
+
return "" if text.nil? || text.empty?
|
|
672
|
+
|
|
673
|
+
# Protect code spans first
|
|
674
|
+
code_spans = []
|
|
675
|
+
text = text.gsub(/`(.+?)`/) do
|
|
676
|
+
code_spans << escape_html($1)
|
|
677
|
+
"\x00CODE#{code_spans.length - 1}\x00"
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
# Images (before links)
|
|
681
|
+
unless @rd.no_image
|
|
682
|
+
# Inline images with optional size: 
|
|
683
|
+
text = text.gsub(/!\[(.*?)\]\((.+?)\)/) do
|
|
684
|
+
alt, src = $1, $2
|
|
685
|
+
if src =~ /(.+?)\s*=\s*(\d+)x(\d+)/
|
|
686
|
+
"<img src=\"#{escape_html($1.strip)}\" alt=\"#{escape_html(alt)}\" width=\"#{$2}\" height=\"#{$3}\" />"
|
|
687
|
+
else
|
|
688
|
+
"<img src=\"#{escape_html(src)}\" alt=\"#{escape_html(alt)}\" />"
|
|
689
|
+
end
|
|
690
|
+
end
|
|
691
|
+
|
|
692
|
+
# Reference-style images
|
|
693
|
+
text = text.gsub(/!\[(.*?)\]\[(.*?)\]/) do
|
|
694
|
+
alt, ref = $1, $2
|
|
695
|
+
ref = alt if ref.empty?
|
|
696
|
+
if @references[ref.downcase]
|
|
697
|
+
url = @references[ref.downcase][:url]
|
|
698
|
+
title = @references[ref.downcase][:title]
|
|
699
|
+
title_attr = title ? " title=\"#{escape_html(title)}\"" : ""
|
|
700
|
+
"<img src=\"#{escape_html(url)}\" alt=\"#{escape_html(alt)}\"#{title_attr} />"
|
|
701
|
+
else
|
|
702
|
+
$&
|
|
703
|
+
end
|
|
704
|
+
end
|
|
705
|
+
end
|
|
706
|
+
|
|
707
|
+
# Links
|
|
708
|
+
unless @rd.no_links
|
|
709
|
+
# Inline links with optional title
|
|
710
|
+
text = text.gsub(/(?<!!)\[(.+?)\]\((.+?)\)/) do
|
|
711
|
+
link_text, url = $1, $2
|
|
712
|
+
title = nil
|
|
713
|
+
if url =~ /(.+?)\s+["'(](.+?)["')]/
|
|
714
|
+
url, title = $1, $2
|
|
715
|
+
end
|
|
716
|
+
title_attr = title ? " title=\"#{escape_html(title)}\"" : ""
|
|
717
|
+
"<a href=\"#{escape_html(url)}\"#{title_attr}>#{render_inline(link_text)}</a>"
|
|
718
|
+
end
|
|
719
|
+
|
|
720
|
+
# Reference-style links
|
|
721
|
+
text = text.gsub(/\[(.+?)\]\[(.*?)\]/) do
|
|
722
|
+
link_text, ref = $1, $2
|
|
723
|
+
ref = link_text if ref.empty?
|
|
724
|
+
if @references[ref.downcase]
|
|
725
|
+
url = @references[ref.downcase][:url]
|
|
726
|
+
title = @references[ref.downcase][:title]
|
|
727
|
+
title_attr = title ? " title=\"#{escape_html(title)}\"" : ""
|
|
728
|
+
"<a href=\"#{escape_html(url)}\"#{title_attr}>#{link_text}</a>"
|
|
729
|
+
else
|
|
730
|
+
$&
|
|
731
|
+
end
|
|
732
|
+
end
|
|
733
|
+
end
|
|
734
|
+
|
|
735
|
+
# Autolinks
|
|
736
|
+
if @rd.autolink
|
|
737
|
+
text = text.gsub(/<(https?:\/\/[^>]+)>/) { "<a href=\"#{$1}\">#{$1}</a>" }
|
|
738
|
+
text = text.gsub(/<([^>\s@]+@[^>\s@]+\.[^>\s@]+)>/) { "<a href=\"mailto:#{$1}\">#{$1}</a>" }
|
|
739
|
+
end
|
|
740
|
+
|
|
741
|
+
# Strikethrough
|
|
742
|
+
unless @rd.no_strikethrough
|
|
743
|
+
text = text.gsub(/~~(.+?)~~/) { "<del>#{$1}</del>" }
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
# Superscript
|
|
747
|
+
unless @rd.no_superscript
|
|
748
|
+
text = text.gsub(/\^(\w+)\^/) { "<sup>#{$1}</sup>" }
|
|
749
|
+
end
|
|
750
|
+
|
|
751
|
+
# Emphasis: strong then em
|
|
752
|
+
text = text.gsub(/\*\*(.+?)\*\*/) { "<strong>#{$1}</strong>" }
|
|
753
|
+
text = text.gsub(/__(.+?)__/) { "<strong>#{$1}</strong>" }
|
|
754
|
+
text = text.gsub(/\*(.+?)\*/) { "<em>#{$1}</em>" }
|
|
755
|
+
text = text.gsub(/_(.+?)_/) { "<em>#{$1}</em>" }
|
|
756
|
+
|
|
757
|
+
# Hard line breaks (two trailing spaces)
|
|
758
|
+
text = text.gsub(/ \n/, "<br />\n")
|
|
759
|
+
|
|
760
|
+
# Footnote references
|
|
761
|
+
if @rd.footnotes
|
|
762
|
+
text = text.gsub(/\[(\^\w+)\]/) do
|
|
763
|
+
id = $1
|
|
764
|
+
if @footnotes[id]
|
|
765
|
+
@footnote_counter += 1
|
|
766
|
+
num = @footnote_counter
|
|
767
|
+
@used_footnotes << { id: id, num: num, content: @footnotes[id] }
|
|
768
|
+
"<sup><a href=\"#fn#{num}\" id=\"ref#{num}\">#{num}</a></sup>"
|
|
769
|
+
else
|
|
770
|
+
$&
|
|
771
|
+
end
|
|
772
|
+
end
|
|
773
|
+
end
|
|
774
|
+
|
|
775
|
+
# Restore code spans
|
|
776
|
+
code_spans.each_with_index do |code, idx|
|
|
777
|
+
text = text.sub("\x00CODE#{idx}\x00", "<code>#{code}</code>")
|
|
778
|
+
end
|
|
779
|
+
|
|
780
|
+
text
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
# ========================================================================
|
|
784
|
+
# 后处理:Smartypants、脚注、HTML 过滤、样式过滤。
|
|
785
|
+
#
|
|
786
|
+
# @param html [String] 渲染后的 HTML
|
|
787
|
+
# @return [String] 处理后的 HTML
|
|
788
|
+
# ========================================================================
|
|
789
|
+
def postprocess(html)
|
|
790
|
+
if @rd.smart
|
|
791
|
+
html = smartypants(html)
|
|
792
|
+
end
|
|
793
|
+
|
|
794
|
+
if @rd.footnotes && !@used_footnotes.empty?
|
|
795
|
+
html += "\n<div class=\"footnotes\">\n<hr />\n<ol>\n"
|
|
796
|
+
@used_footnotes.each do |fn|
|
|
797
|
+
content = render_inline(fn[:content])
|
|
798
|
+
html += "<li id=\"fn#{fn[:num]}\">#{content} <a href=\"#ref#{fn[:num]}\">↩</a></li>\n"
|
|
799
|
+
end
|
|
800
|
+
html += "</ol>\n</div>\n"
|
|
801
|
+
end
|
|
802
|
+
|
|
803
|
+
if @rd.filter_html
|
|
804
|
+
html = html.gsub(/<[^>]+>/, '')
|
|
805
|
+
end
|
|
806
|
+
|
|
807
|
+
if @rd.filter_styles
|
|
808
|
+
html = html.gsub(/<style\b[^>]*>.*?<\/style>/mi, '')
|
|
809
|
+
end
|
|
810
|
+
|
|
811
|
+
html
|
|
812
|
+
end
|
|
813
|
+
|
|
814
|
+
# 智能引号与排版符号转换。
|
|
815
|
+
#
|
|
816
|
+
# 转换规则:
|
|
817
|
+
# - "..." → “...”
|
|
818
|
+
# - '...' → ‘...’
|
|
819
|
+
# - \w'\w → ’
|
|
820
|
+
# - -- → —
|
|
821
|
+
# - 空格-空格 → –
|
|
822
|
+
# - ... → …
|
|
823
|
+
#
|
|
824
|
+
# 保护 HTML 标签内的属性,避免替换引号。
|
|
825
|
+
#
|
|
826
|
+
# @param text [String] HTML 文本
|
|
827
|
+
# @return [String] 转换后的文本
|
|
828
|
+
def smartypants(text)
|
|
829
|
+
# Protect HTML tags
|
|
830
|
+
tags = []
|
|
831
|
+
text = text.gsub(/<[^>]+>/) do
|
|
832
|
+
tags << $&
|
|
833
|
+
"\x00TAG#{tags.length - 1}\x00"
|
|
834
|
+
end
|
|
835
|
+
|
|
836
|
+
text = text.gsub(/"([^"]*?)"/, '“\1”')
|
|
837
|
+
text = text.gsub(/'([^']*?)'/, '‘\1’')
|
|
838
|
+
text = text.gsub(/(\w)'(\w)/, '\1’\2')
|
|
839
|
+
text = text.gsub(/(\w)'/, '\1’')
|
|
840
|
+
text = text.gsub(/--/, '—')
|
|
841
|
+
text = text.gsub(/ - /, ' – ')
|
|
842
|
+
text = text.gsub(/\.\.\./, '…')
|
|
843
|
+
|
|
844
|
+
# Restore tags
|
|
845
|
+
tags.each_with_index do |tag, idx|
|
|
846
|
+
text = text.sub("\x00TAG#{idx}\x00", tag)
|
|
847
|
+
end
|
|
848
|
+
text
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
# HTML 实体转义。
|
|
852
|
+
#
|
|
853
|
+
# 转义字符:& → & < → < > → > " → "
|
|
854
|
+
#
|
|
855
|
+
# @param text [String, nil] 待转义文本
|
|
856
|
+
# @return [String] 转义后的文本,nil 输入返回空字符串
|
|
857
|
+
def escape_html(text)
|
|
858
|
+
return "" if text.nil?
|
|
859
|
+
text.gsub('&', '&')
|
|
860
|
+
.gsub('<', '<')
|
|
861
|
+
.gsub('>', '>')
|
|
862
|
+
.gsub('"', '"')
|
|
863
|
+
end
|
|
864
|
+
end
|
|
865
|
+
|
|
866
|
+
# BlueCloth 兼容别名
|
|
867
|
+
begin
|
|
868
|
+
BlueCloth = ReDiscount
|
|
869
|
+
rescue
|
|
870
|
+
# 如果 BlueCloth 已定义则忽略
|
|
871
|
+
end
|