inkmark 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Cargo.lock +940 -0
- data/Cargo.toml +27 -0
- data/LICENSE.txt +21 -0
- data/NOTICE +16 -0
- data/README.md +1166 -0
- data/ext/inkmark/Cargo.toml +31 -0
- data/ext/inkmark/build.rs +5 -0
- data/ext/inkmark/extconf.rb +6 -0
- data/ext/inkmark/src/autolink.rs +167 -0
- data/ext/inkmark/src/chunks_by_heading.rs +325 -0
- data/ext/inkmark/src/chunks_by_size.rs +302 -0
- data/ext/inkmark/src/document.rs +411 -0
- data/ext/inkmark/src/emoji.rs +197 -0
- data/ext/inkmark/src/handler.rs +758 -0
- data/ext/inkmark/src/heading.rs +262 -0
- data/ext/inkmark/src/highlight.rs +202 -0
- data/ext/inkmark/src/image.rs +284 -0
- data/ext/inkmark/src/lib.rs +54 -0
- data/ext/inkmark/src/link.rs +291 -0
- data/ext/inkmark/src/options.rs +231 -0
- data/ext/inkmark/src/plain_text.rs +445 -0
- data/ext/inkmark/src/scheme_filter.rs +319 -0
- data/ext/inkmark/src/stats.rs +453 -0
- data/ext/inkmark/src/tag_filter.rs +226 -0
- data/ext/inkmark/src/toc.rs +221 -0
- data/ext/inkmark/src/truncate.rs +267 -0
- data/ext/inkmark/src/url_match.rs +178 -0
- data/lib/inkmark/event.rb +342 -0
- data/lib/inkmark/native.rb +8 -0
- data/lib/inkmark/options.rb +698 -0
- data/lib/inkmark/toc.rb +40 -0
- data/lib/inkmark/version.rb +6 -0
- data/lib/inkmark.rb +711 -0
- data/sig/inkmark.rbs +219 -0
- metadata +208 -0
|
@@ -0,0 +1,758 @@
|
|
|
1
|
+
use magnus::{prelude::*, Error, RArray, RHash, Ruby, Value};
|
|
2
|
+
use pulldown_cmark::{
|
|
3
|
+
Alignment, BlockQuoteKind, CodeBlockKind, CowStr, Event, HeadingLevel, LinkType,
|
|
4
|
+
MetadataBlockKind, Parser, Tag, TagEnd,
|
|
5
|
+
};
|
|
6
|
+
use std::collections::{HashMap, VecDeque};
|
|
7
|
+
use std::ops::Range;
|
|
8
|
+
|
|
9
|
+
use crate::document::{apply_post_handler_filters, apply_pre_handler_filters};
|
|
10
|
+
use crate::options::{build_options, Flags};
|
|
11
|
+
|
|
12
|
+
/// Per-kind data needed to reconstruct Start events on serialization —
|
|
13
|
+
/// only fields that are NOT exposed as mutable handler targets.
|
|
14
|
+
enum NodeExtra {
|
|
15
|
+
Heading {
|
|
16
|
+
classes: Vec<CowStr<'static>>,
|
|
17
|
+
attrs: Vec<(CowStr<'static>, Option<CowStr<'static>>)>,
|
|
18
|
+
},
|
|
19
|
+
Link {
|
|
20
|
+
link_type: LinkType,
|
|
21
|
+
link_id: CowStr<'static>,
|
|
22
|
+
},
|
|
23
|
+
Image {
|
|
24
|
+
link_type: LinkType,
|
|
25
|
+
link_id: CowStr<'static>,
|
|
26
|
+
},
|
|
27
|
+
List,
|
|
28
|
+
CodeBlock {
|
|
29
|
+
fenced: bool,
|
|
30
|
+
},
|
|
31
|
+
Table {
|
|
32
|
+
alignments: Vec<Alignment>,
|
|
33
|
+
},
|
|
34
|
+
BlockQuote {
|
|
35
|
+
kind: Option<BlockQuoteKind>,
|
|
36
|
+
},
|
|
37
|
+
MetadataBlock {
|
|
38
|
+
kind: MetadataBlockKind,
|
|
39
|
+
},
|
|
40
|
+
FootnoteDefinition {
|
|
41
|
+
label: CowStr<'static>,
|
|
42
|
+
},
|
|
43
|
+
LeafEvent(Event<'static>),
|
|
44
|
+
None,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
pub(crate) struct Node {
|
|
48
|
+
kind: &'static str,
|
|
49
|
+
is_container: bool,
|
|
50
|
+
text: String,
|
|
51
|
+
depth: usize,
|
|
52
|
+
children: Vec<Node>,
|
|
53
|
+
parent_kind: Option<&'static str>,
|
|
54
|
+
byte_range: Option<Range<usize>>,
|
|
55
|
+
// Fields exposed to / mutated by handlers.
|
|
56
|
+
level: Option<u8>,
|
|
57
|
+
lang: Option<String>,
|
|
58
|
+
dest: Option<String>,
|
|
59
|
+
title: Option<String>,
|
|
60
|
+
id: Option<String>,
|
|
61
|
+
label: Option<String>,
|
|
62
|
+
extra: NodeExtra,
|
|
63
|
+
// Mutations written back after handler dispatch.
|
|
64
|
+
replacement_html: Option<String>,
|
|
65
|
+
replacement_markdown: Option<String>,
|
|
66
|
+
new_dest: Option<String>,
|
|
67
|
+
new_title: Option<String>,
|
|
68
|
+
new_level: Option<u8>,
|
|
69
|
+
new_id: Option<String>,
|
|
70
|
+
deleted: bool,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
fn own_str(s: &str) -> CowStr<'static> {
|
|
74
|
+
CowStr::Boxed(s.to_string().into_boxed_str())
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
fn level_from_u8(n: u8) -> HeadingLevel {
|
|
78
|
+
match n {
|
|
79
|
+
1 => HeadingLevel::H1,
|
|
80
|
+
2 => HeadingLevel::H2,
|
|
81
|
+
3 => HeadingLevel::H3,
|
|
82
|
+
4 => HeadingLevel::H4,
|
|
83
|
+
5 => HeadingLevel::H5,
|
|
84
|
+
_ => HeadingLevel::H6,
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
fn kind_for_tag(tag: &Tag<'_>) -> &'static str {
|
|
89
|
+
match tag {
|
|
90
|
+
Tag::Paragraph => "paragraph",
|
|
91
|
+
Tag::Heading { .. } => "heading",
|
|
92
|
+
Tag::BlockQuote(_) => "blockquote",
|
|
93
|
+
Tag::CodeBlock(_) => "code_block",
|
|
94
|
+
Tag::HtmlBlock => "html_block",
|
|
95
|
+
Tag::List(None) => "list",
|
|
96
|
+
Tag::List(Some(_)) => "ordered_list",
|
|
97
|
+
Tag::Item => "list_item",
|
|
98
|
+
Tag::FootnoteDefinition(_) => "footnote_definition",
|
|
99
|
+
Tag::Table(_) => "table",
|
|
100
|
+
Tag::TableHead => "table_head",
|
|
101
|
+
Tag::TableRow => "table_row",
|
|
102
|
+
Tag::TableCell => "table_cell",
|
|
103
|
+
Tag::Emphasis => "emphasis",
|
|
104
|
+
Tag::Strong => "strong",
|
|
105
|
+
Tag::Strikethrough => "strikethrough",
|
|
106
|
+
Tag::Link { .. } => "link",
|
|
107
|
+
Tag::Image { .. } => "image",
|
|
108
|
+
Tag::DefinitionList => "definition_list",
|
|
109
|
+
Tag::DefinitionListTitle => "definition_list_title",
|
|
110
|
+
Tag::DefinitionListDefinition => "definition_list_definition",
|
|
111
|
+
Tag::Superscript => "superscript",
|
|
112
|
+
Tag::Subscript => "subscript",
|
|
113
|
+
Tag::MetadataBlock(_) => "metadata_block",
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
type RangeMap = HashMap<&'static str, VecDeque<Range<usize>>>;
|
|
118
|
+
|
|
119
|
+
/// Parse `source` with the offset iterator to collect byte ranges per element
|
|
120
|
+
/// kind, keyed by kind string. Ranges are ordered by source position so that
|
|
121
|
+
/// `build_tree` can pop them in document order.
|
|
122
|
+
///
|
|
123
|
+
/// `has_autolink`: when true, "link" ranges are excluded. autolink inserts
|
|
124
|
+
/// new Start(Link) events inline, which would shift the queue and assign
|
|
125
|
+
/// ranges from explicit links to the wrong nodes.
|
|
126
|
+
fn collect_byte_ranges(
|
|
127
|
+
source: &str,
|
|
128
|
+
cm_opts: pulldown_cmark::Options,
|
|
129
|
+
has_autolink: bool,
|
|
130
|
+
) -> RangeMap {
|
|
131
|
+
let mut map: RangeMap = HashMap::new();
|
|
132
|
+
for (event, range) in Parser::new_ext(source, cm_opts).into_offset_iter() {
|
|
133
|
+
let kind: Option<&'static str> = match &event {
|
|
134
|
+
Event::Start(tag) => match tag {
|
|
135
|
+
// autolink inserts extra Start(Link) events—skip to avoid
|
|
136
|
+
// corrupting the per-kind queue ordering for explicit links.
|
|
137
|
+
Tag::Link { .. } if has_autolink => None,
|
|
138
|
+
_ => Some(kind_for_tag(tag)),
|
|
139
|
+
},
|
|
140
|
+
// Inline code spans: autolink never splits these.
|
|
141
|
+
Event::Code(_) => Some("code"),
|
|
142
|
+
Event::Rule => Some("rule"),
|
|
143
|
+
Event::InlineMath(_) => Some("inline_math"),
|
|
144
|
+
Event::DisplayMath(_) => Some("display_math"),
|
|
145
|
+
// Text events can be split by autolink/emoji—skip.
|
|
146
|
+
_ => None,
|
|
147
|
+
};
|
|
148
|
+
if let Some(k) = kind {
|
|
149
|
+
map.entry(k).or_default().push_back(range);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
map
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
fn node_from_start(
|
|
156
|
+
tag: Tag<'_>,
|
|
157
|
+
depth: usize,
|
|
158
|
+
parent_kind: Option<&'static str>,
|
|
159
|
+
byte_range: Option<Range<usize>>,
|
|
160
|
+
) -> Node {
|
|
161
|
+
let kind = kind_for_tag(&tag);
|
|
162
|
+
let (level, lang, dest, title, id, label, extra) = match tag {
|
|
163
|
+
Tag::Heading {
|
|
164
|
+
level,
|
|
165
|
+
id,
|
|
166
|
+
classes,
|
|
167
|
+
attrs,
|
|
168
|
+
} => {
|
|
169
|
+
let lv = crate::toc::level_to_u8(level);
|
|
170
|
+
let id_s = id.as_deref().map(str::to_string);
|
|
171
|
+
let extra = NodeExtra::Heading {
|
|
172
|
+
classes: classes.into_iter().map(|s| s.into_static()).collect(),
|
|
173
|
+
attrs: attrs
|
|
174
|
+
.into_iter()
|
|
175
|
+
.map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
|
|
176
|
+
.collect(),
|
|
177
|
+
};
|
|
178
|
+
(Some(lv), None, None, None, id_s, None, extra)
|
|
179
|
+
}
|
|
180
|
+
Tag::Link {
|
|
181
|
+
link_type,
|
|
182
|
+
dest_url,
|
|
183
|
+
title,
|
|
184
|
+
id,
|
|
185
|
+
} => {
|
|
186
|
+
let d = dest_url.as_ref().to_string();
|
|
187
|
+
let t = title.as_ref().to_string();
|
|
188
|
+
let extra = NodeExtra::Link {
|
|
189
|
+
link_type,
|
|
190
|
+
link_id: id.into_static(),
|
|
191
|
+
};
|
|
192
|
+
(None, None, Some(d), Some(t), None, None, extra)
|
|
193
|
+
}
|
|
194
|
+
Tag::Image {
|
|
195
|
+
link_type,
|
|
196
|
+
dest_url,
|
|
197
|
+
title,
|
|
198
|
+
id,
|
|
199
|
+
} => {
|
|
200
|
+
let d = dest_url.as_ref().to_string();
|
|
201
|
+
let t = title.as_ref().to_string();
|
|
202
|
+
let extra = NodeExtra::Image {
|
|
203
|
+
link_type,
|
|
204
|
+
link_id: id.into_static(),
|
|
205
|
+
};
|
|
206
|
+
(None, None, Some(d), Some(t), None, None, extra)
|
|
207
|
+
}
|
|
208
|
+
Tag::CodeBlock(ref cbk) => {
|
|
209
|
+
let (lang_s, fenced) = match cbk {
|
|
210
|
+
CodeBlockKind::Fenced(lang) => (lang.as_ref().to_string(), true),
|
|
211
|
+
CodeBlockKind::Indented => (String::new(), false),
|
|
212
|
+
};
|
|
213
|
+
(
|
|
214
|
+
None,
|
|
215
|
+
Some(lang_s),
|
|
216
|
+
None,
|
|
217
|
+
None,
|
|
218
|
+
None,
|
|
219
|
+
None,
|
|
220
|
+
NodeExtra::CodeBlock { fenced },
|
|
221
|
+
)
|
|
222
|
+
}
|
|
223
|
+
Tag::List(_start) => (None, None, None, None, None, None, NodeExtra::List),
|
|
224
|
+
Tag::FootnoteDefinition(lbl) => {
|
|
225
|
+
let label_s = lbl.as_ref().to_string();
|
|
226
|
+
(
|
|
227
|
+
None,
|
|
228
|
+
None,
|
|
229
|
+
None,
|
|
230
|
+
None,
|
|
231
|
+
None,
|
|
232
|
+
Some(label_s),
|
|
233
|
+
NodeExtra::FootnoteDefinition {
|
|
234
|
+
label: lbl.into_static(),
|
|
235
|
+
},
|
|
236
|
+
)
|
|
237
|
+
}
|
|
238
|
+
Tag::Table(alignments) => (
|
|
239
|
+
None,
|
|
240
|
+
None,
|
|
241
|
+
None,
|
|
242
|
+
None,
|
|
243
|
+
None,
|
|
244
|
+
None,
|
|
245
|
+
NodeExtra::Table { alignments },
|
|
246
|
+
),
|
|
247
|
+
Tag::BlockQuote(kind) => (
|
|
248
|
+
None,
|
|
249
|
+
None,
|
|
250
|
+
None,
|
|
251
|
+
None,
|
|
252
|
+
None,
|
|
253
|
+
None,
|
|
254
|
+
NodeExtra::BlockQuote { kind },
|
|
255
|
+
),
|
|
256
|
+
Tag::MetadataBlock(kind) => (
|
|
257
|
+
None,
|
|
258
|
+
None,
|
|
259
|
+
None,
|
|
260
|
+
None,
|
|
261
|
+
None,
|
|
262
|
+
None,
|
|
263
|
+
NodeExtra::MetadataBlock { kind },
|
|
264
|
+
),
|
|
265
|
+
_ => (None, None, None, None, None, None, NodeExtra::None),
|
|
266
|
+
};
|
|
267
|
+
Node {
|
|
268
|
+
kind,
|
|
269
|
+
is_container: true,
|
|
270
|
+
text: String::new(),
|
|
271
|
+
depth,
|
|
272
|
+
children: Vec::new(),
|
|
273
|
+
parent_kind,
|
|
274
|
+
byte_range,
|
|
275
|
+
level,
|
|
276
|
+
lang,
|
|
277
|
+
dest,
|
|
278
|
+
title,
|
|
279
|
+
id,
|
|
280
|
+
label,
|
|
281
|
+
extra,
|
|
282
|
+
replacement_html: None,
|
|
283
|
+
replacement_markdown: None,
|
|
284
|
+
new_dest: None,
|
|
285
|
+
new_title: None,
|
|
286
|
+
new_level: None,
|
|
287
|
+
new_id: None,
|
|
288
|
+
deleted: false,
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
fn node_from_leaf(
|
|
293
|
+
event: Event<'_>,
|
|
294
|
+
depth: usize,
|
|
295
|
+
parent_kind: Option<&'static str>,
|
|
296
|
+
byte_range: Option<Range<usize>>,
|
|
297
|
+
) -> Node {
|
|
298
|
+
let (kind, text, label) = match &event {
|
|
299
|
+
Event::Text(s) => ("text", s.as_ref().to_string(), None),
|
|
300
|
+
Event::Code(s) => ("code", s.as_ref().to_string(), None),
|
|
301
|
+
Event::Html(s) | Event::InlineHtml(s) => ("html", s.as_ref().to_string(), None),
|
|
302
|
+
Event::SoftBreak => ("soft_break", String::new(), None),
|
|
303
|
+
Event::HardBreak => ("hard_break", String::new(), None),
|
|
304
|
+
Event::Rule => ("rule", String::new(), None),
|
|
305
|
+
Event::FootnoteReference(s) => {
|
|
306
|
+
let label = s.as_ref().to_string();
|
|
307
|
+
("footnote_reference", label.clone(), Some(label))
|
|
308
|
+
}
|
|
309
|
+
Event::InlineMath(s) => ("inline_math", s.as_ref().to_string(), None),
|
|
310
|
+
Event::DisplayMath(s) => ("display_math", s.as_ref().to_string(), None),
|
|
311
|
+
Event::TaskListMarker(_) => ("task_list_marker", String::new(), None),
|
|
312
|
+
_ => ("unknown", String::new(), None),
|
|
313
|
+
};
|
|
314
|
+
Node {
|
|
315
|
+
kind,
|
|
316
|
+
is_container: false,
|
|
317
|
+
text,
|
|
318
|
+
depth,
|
|
319
|
+
children: Vec::new(),
|
|
320
|
+
parent_kind,
|
|
321
|
+
byte_range,
|
|
322
|
+
level: None,
|
|
323
|
+
lang: None,
|
|
324
|
+
dest: None,
|
|
325
|
+
title: None,
|
|
326
|
+
id: None,
|
|
327
|
+
label,
|
|
328
|
+
extra: NodeExtra::LeafEvent(event.into_static()),
|
|
329
|
+
replacement_html: None,
|
|
330
|
+
replacement_markdown: None,
|
|
331
|
+
new_dest: None,
|
|
332
|
+
new_title: None,
|
|
333
|
+
new_level: None,
|
|
334
|
+
new_id: None,
|
|
335
|
+
deleted: false,
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
fn collect_text(children: &[Node]) -> String {
|
|
340
|
+
children.iter().map(|c| c.text.as_str()).collect()
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/// Kind string for leaves that have stable byte ranges (not affected by
|
|
344
|
+
/// autolink or emoji splitting). Used to look up ranges in the range map.
|
|
345
|
+
fn leaf_range_kind(event: &Event<'_>) -> Option<&'static str> {
|
|
346
|
+
match event {
|
|
347
|
+
Event::Code(_) => Some("code"),
|
|
348
|
+
Event::Rule => Some("rule"),
|
|
349
|
+
Event::InlineMath(_) => Some("inline_math"),
|
|
350
|
+
Event::DisplayMath(_) => Some("display_math"),
|
|
351
|
+
_ => None,
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
pub fn build_tree(events: Vec<Event<'_>>, ranges: &mut RangeMap) -> Vec<Node> {
|
|
356
|
+
let mut stack: Vec<Node> = Vec::new();
|
|
357
|
+
let mut roots: Vec<Node> = Vec::new();
|
|
358
|
+
|
|
359
|
+
for event in events {
|
|
360
|
+
match event {
|
|
361
|
+
Event::Start(tag) => {
|
|
362
|
+
let depth = stack.len();
|
|
363
|
+
let parent_kind = stack.last().map(|n| n.kind);
|
|
364
|
+
let kind = kind_for_tag(&tag);
|
|
365
|
+
let byte_range = ranges.get_mut(kind).and_then(|q| q.pop_front());
|
|
366
|
+
stack.push(node_from_start(tag, depth, parent_kind, byte_range));
|
|
367
|
+
}
|
|
368
|
+
Event::End(_) => {
|
|
369
|
+
if let Some(mut node) = stack.pop() {
|
|
370
|
+
node.text = collect_text(&node.children);
|
|
371
|
+
if let Some(parent) = stack.last_mut() {
|
|
372
|
+
parent.children.push(node);
|
|
373
|
+
} else {
|
|
374
|
+
roots.push(node);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
leaf => {
|
|
379
|
+
let depth = stack.len();
|
|
380
|
+
let parent_kind = stack.last().map(|n| n.kind);
|
|
381
|
+
let byte_range = leaf_range_kind(&leaf)
|
|
382
|
+
.and_then(|k| ranges.get_mut(k).and_then(|q| q.pop_front()));
|
|
383
|
+
let leaf_node = node_from_leaf(leaf, depth, parent_kind, byte_range);
|
|
384
|
+
if let Some(parent) = stack.last_mut() {
|
|
385
|
+
parent.children.push(leaf_node);
|
|
386
|
+
} else {
|
|
387
|
+
roots.push(leaf_node);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
roots
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
fn tagend_for(node: &Node) -> TagEnd {
|
|
397
|
+
match node.kind {
|
|
398
|
+
"paragraph" => TagEnd::Paragraph,
|
|
399
|
+
"heading" => {
|
|
400
|
+
let lv = node.new_level.unwrap_or(node.level.unwrap_or(1));
|
|
401
|
+
TagEnd::Heading(level_from_u8(lv))
|
|
402
|
+
}
|
|
403
|
+
"blockquote" => {
|
|
404
|
+
let kind = if let NodeExtra::BlockQuote { kind } = &node.extra {
|
|
405
|
+
*kind
|
|
406
|
+
} else {
|
|
407
|
+
None
|
|
408
|
+
};
|
|
409
|
+
TagEnd::BlockQuote(kind)
|
|
410
|
+
}
|
|
411
|
+
"list" => TagEnd::List(false),
|
|
412
|
+
"ordered_list" => TagEnd::List(true),
|
|
413
|
+
"list_item" => TagEnd::Item,
|
|
414
|
+
"code_block" => TagEnd::CodeBlock,
|
|
415
|
+
"html_block" => TagEnd::HtmlBlock,
|
|
416
|
+
"table" => TagEnd::Table,
|
|
417
|
+
"table_head" => TagEnd::TableHead,
|
|
418
|
+
"table_row" => TagEnd::TableRow,
|
|
419
|
+
"table_cell" => TagEnd::TableCell,
|
|
420
|
+
"emphasis" => TagEnd::Emphasis,
|
|
421
|
+
"strong" => TagEnd::Strong,
|
|
422
|
+
"strikethrough" => TagEnd::Strikethrough,
|
|
423
|
+
"link" => TagEnd::Link,
|
|
424
|
+
"image" => TagEnd::Image,
|
|
425
|
+
"footnote_definition" => TagEnd::FootnoteDefinition,
|
|
426
|
+
"definition_list" => TagEnd::DefinitionList,
|
|
427
|
+
"definition_list_title" => TagEnd::DefinitionListTitle,
|
|
428
|
+
"definition_list_definition" => TagEnd::DefinitionListDefinition,
|
|
429
|
+
"superscript" => TagEnd::Superscript,
|
|
430
|
+
"subscript" => TagEnd::Subscript,
|
|
431
|
+
"metadata_block" => {
|
|
432
|
+
let kind = if let NodeExtra::MetadataBlock { kind } = &node.extra {
|
|
433
|
+
*kind
|
|
434
|
+
} else {
|
|
435
|
+
MetadataBlockKind::YamlStyle
|
|
436
|
+
};
|
|
437
|
+
TagEnd::MetadataBlock(kind)
|
|
438
|
+
}
|
|
439
|
+
_ => TagEnd::Paragraph,
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
fn start_event_for(node: &Node) -> Event<'static> {
|
|
444
|
+
match node.kind {
|
|
445
|
+
"paragraph" => Event::Start(Tag::Paragraph),
|
|
446
|
+
"heading" => {
|
|
447
|
+
let level = level_from_u8(node.new_level.unwrap_or(node.level.unwrap_or(1)));
|
|
448
|
+
let id = node.new_id.as_deref().or(node.id.as_deref()).map(own_str);
|
|
449
|
+
let (classes, attrs) = if let NodeExtra::Heading { classes, attrs } = &node.extra {
|
|
450
|
+
(classes.clone(), attrs.clone())
|
|
451
|
+
} else {
|
|
452
|
+
(vec![], vec![])
|
|
453
|
+
};
|
|
454
|
+
Event::Start(Tag::Heading {
|
|
455
|
+
level,
|
|
456
|
+
id,
|
|
457
|
+
classes,
|
|
458
|
+
attrs,
|
|
459
|
+
})
|
|
460
|
+
}
|
|
461
|
+
"blockquote" => {
|
|
462
|
+
let kind = if let NodeExtra::BlockQuote { kind } = &node.extra {
|
|
463
|
+
*kind
|
|
464
|
+
} else {
|
|
465
|
+
None
|
|
466
|
+
};
|
|
467
|
+
Event::Start(Tag::BlockQuote(kind))
|
|
468
|
+
}
|
|
469
|
+
"list" => Event::Start(Tag::List(None)),
|
|
470
|
+
"ordered_list" => Event::Start(Tag::List(Some(1))),
|
|
471
|
+
"list_item" => Event::Start(Tag::Item),
|
|
472
|
+
"code_block" => {
|
|
473
|
+
let lang = node.lang.as_deref().unwrap_or("");
|
|
474
|
+
let fenced = matches!(&node.extra, NodeExtra::CodeBlock { fenced: true });
|
|
475
|
+
if fenced {
|
|
476
|
+
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(own_str(lang))))
|
|
477
|
+
} else {
|
|
478
|
+
Event::Start(Tag::CodeBlock(CodeBlockKind::Indented))
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
"html_block" => Event::Start(Tag::HtmlBlock),
|
|
482
|
+
"table" => {
|
|
483
|
+
let alignments = if let NodeExtra::Table { alignments } = &node.extra {
|
|
484
|
+
alignments.clone()
|
|
485
|
+
} else {
|
|
486
|
+
vec![]
|
|
487
|
+
};
|
|
488
|
+
Event::Start(Tag::Table(alignments))
|
|
489
|
+
}
|
|
490
|
+
"table_head" => Event::Start(Tag::TableHead),
|
|
491
|
+
"table_row" => Event::Start(Tag::TableRow),
|
|
492
|
+
"table_cell" => Event::Start(Tag::TableCell),
|
|
493
|
+
"emphasis" => Event::Start(Tag::Emphasis),
|
|
494
|
+
"strong" => Event::Start(Tag::Strong),
|
|
495
|
+
"strikethrough" => Event::Start(Tag::Strikethrough),
|
|
496
|
+
"link" => {
|
|
497
|
+
let dest = own_str(
|
|
498
|
+
node.new_dest
|
|
499
|
+
.as_deref()
|
|
500
|
+
.unwrap_or(node.dest.as_deref().unwrap_or("")),
|
|
501
|
+
);
|
|
502
|
+
let title = own_str(
|
|
503
|
+
node.new_title
|
|
504
|
+
.as_deref()
|
|
505
|
+
.unwrap_or(node.title.as_deref().unwrap_or("")),
|
|
506
|
+
);
|
|
507
|
+
let (link_type, link_id) = if let NodeExtra::Link { link_type, link_id } = &node.extra {
|
|
508
|
+
(*link_type, link_id.clone())
|
|
509
|
+
} else {
|
|
510
|
+
(LinkType::Inline, own_str(""))
|
|
511
|
+
};
|
|
512
|
+
Event::Start(Tag::Link {
|
|
513
|
+
link_type,
|
|
514
|
+
dest_url: dest,
|
|
515
|
+
title,
|
|
516
|
+
id: link_id,
|
|
517
|
+
})
|
|
518
|
+
}
|
|
519
|
+
"image" => {
|
|
520
|
+
let dest = own_str(
|
|
521
|
+
node.new_dest
|
|
522
|
+
.as_deref()
|
|
523
|
+
.unwrap_or(node.dest.as_deref().unwrap_or("")),
|
|
524
|
+
);
|
|
525
|
+
let title = own_str(
|
|
526
|
+
node.new_title
|
|
527
|
+
.as_deref()
|
|
528
|
+
.unwrap_or(node.title.as_deref().unwrap_or("")),
|
|
529
|
+
);
|
|
530
|
+
let (link_type, link_id) = if let NodeExtra::Image { link_type, link_id } = &node.extra
|
|
531
|
+
{
|
|
532
|
+
(*link_type, link_id.clone())
|
|
533
|
+
} else {
|
|
534
|
+
(LinkType::Inline, own_str(""))
|
|
535
|
+
};
|
|
536
|
+
Event::Start(Tag::Image {
|
|
537
|
+
link_type,
|
|
538
|
+
dest_url: dest,
|
|
539
|
+
title,
|
|
540
|
+
id: link_id,
|
|
541
|
+
})
|
|
542
|
+
}
|
|
543
|
+
"footnote_definition" => {
|
|
544
|
+
let label = if let NodeExtra::FootnoteDefinition { label } = &node.extra {
|
|
545
|
+
label.clone()
|
|
546
|
+
} else {
|
|
547
|
+
own_str("")
|
|
548
|
+
};
|
|
549
|
+
Event::Start(Tag::FootnoteDefinition(label))
|
|
550
|
+
}
|
|
551
|
+
"definition_list" => Event::Start(Tag::DefinitionList),
|
|
552
|
+
"definition_list_title" => Event::Start(Tag::DefinitionListTitle),
|
|
553
|
+
"definition_list_definition" => Event::Start(Tag::DefinitionListDefinition),
|
|
554
|
+
"superscript" => Event::Start(Tag::Superscript),
|
|
555
|
+
"subscript" => Event::Start(Tag::Subscript),
|
|
556
|
+
"metadata_block" => {
|
|
557
|
+
let kind = if let NodeExtra::MetadataBlock { kind } = &node.extra {
|
|
558
|
+
*kind
|
|
559
|
+
} else {
|
|
560
|
+
MetadataBlockKind::YamlStyle
|
|
561
|
+
};
|
|
562
|
+
Event::Start(Tag::MetadataBlock(kind))
|
|
563
|
+
}
|
|
564
|
+
_ => Event::Start(Tag::Paragraph),
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
pub fn tree_to_events(
|
|
569
|
+
nodes: Vec<Node>,
|
|
570
|
+
cm_opts: pulldown_cmark::Options,
|
|
571
|
+
flags: &Flags,
|
|
572
|
+
) -> Vec<Event<'static>> {
|
|
573
|
+
let mut out = Vec::new();
|
|
574
|
+
serialize_nodes(nodes, &mut out, cm_opts, flags);
|
|
575
|
+
out
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
fn serialize_nodes(
|
|
579
|
+
nodes: Vec<Node>,
|
|
580
|
+
out: &mut Vec<Event<'static>>,
|
|
581
|
+
cm_opts: pulldown_cmark::Options,
|
|
582
|
+
flags: &Flags,
|
|
583
|
+
) {
|
|
584
|
+
for node in nodes {
|
|
585
|
+
serialize_node(node, out, cm_opts, flags);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
fn serialize_node(
|
|
590
|
+
node: Node,
|
|
591
|
+
out: &mut Vec<Event<'static>>,
|
|
592
|
+
cm_opts: pulldown_cmark::Options,
|
|
593
|
+
flags: &Flags,
|
|
594
|
+
) {
|
|
595
|
+
if node.deleted {
|
|
596
|
+
return;
|
|
597
|
+
}
|
|
598
|
+
// html= takes priority over markdown=; both override default rendering.
|
|
599
|
+
if let Some(html) = node.replacement_html {
|
|
600
|
+
out.push(Event::Html(CowStr::Boxed(html.into_boxed_str())));
|
|
601
|
+
return;
|
|
602
|
+
}
|
|
603
|
+
if let Some(md_src) = node.replacement_markdown {
|
|
604
|
+
// Re-parse the replacement markdown and apply the same enrichment
|
|
605
|
+
// filters (emoji, heading_ids, suppress_raw_html) so the fragment
|
|
606
|
+
// feels native. Handler dispatch is skipped—only the top-level
|
|
607
|
+
// document's handlers fire. Post-handler filters (syntax_highlight,
|
|
608
|
+
// allowlists) apply automatically since they run on the full stream
|
|
609
|
+
// after tree_to_events returns.
|
|
610
|
+
let sub_events: Vec<Event<'static>> = Parser::new_ext(&md_src, cm_opts)
|
|
611
|
+
.map(|e| e.into_static())
|
|
612
|
+
.collect();
|
|
613
|
+
let filtered = apply_pre_handler_filters(sub_events, flags);
|
|
614
|
+
out.extend(filtered);
|
|
615
|
+
return;
|
|
616
|
+
}
|
|
617
|
+
if node.is_container {
|
|
618
|
+
// Compute end tag before consuming node.children.
|
|
619
|
+
let end = tagend_for(&node);
|
|
620
|
+
out.push(start_event_for(&node));
|
|
621
|
+
serialize_nodes(node.children, out, cm_opts, flags);
|
|
622
|
+
out.push(Event::End(end));
|
|
623
|
+
} else if let NodeExtra::LeafEvent(ev) = node.extra {
|
|
624
|
+
out.push(ev);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
fn get_event_class(ruby: &Ruby) -> Result<magnus::RClass, Error> {
|
|
629
|
+
let inkmark: magnus::RClass = ruby.class_object().const_get("Inkmark")?;
|
|
630
|
+
inkmark.const_get("Event")
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
fn node_to_ruby_hash(node: &Node, ruby: &Ruby) -> Result<RHash, Error> {
|
|
634
|
+
let hash = ruby.hash_new();
|
|
635
|
+
hash.aset(ruby.to_symbol("kind"), ruby.str_new(node.kind))?;
|
|
636
|
+
hash.aset(ruby.to_symbol("text"), ruby.str_new(&node.text))?;
|
|
637
|
+
hash.aset(ruby.to_symbol("depth"), node.depth as i64)?;
|
|
638
|
+
set_optional_str(ruby, &hash, "parent_kind", node.parent_kind)?;
|
|
639
|
+
|
|
640
|
+
let ancestors = ruby.ary_new();
|
|
641
|
+
if let Some(pk) = node.parent_kind {
|
|
642
|
+
ancestors.push(ruby.to_symbol(pk))?;
|
|
643
|
+
}
|
|
644
|
+
hash.aset(ruby.to_symbol("ancestor_kinds"), ancestors)?;
|
|
645
|
+
|
|
646
|
+
set_optional_str(ruby, &hash, "lang", node.lang.as_deref())?;
|
|
647
|
+
set_optional_str(ruby, &hash, "dest", node.dest.as_deref())?;
|
|
648
|
+
set_optional_str(ruby, &hash, "title", node.title.as_deref())?;
|
|
649
|
+
set_optional_str(ruby, &hash, "id", node.id.as_deref())?;
|
|
650
|
+
set_optional_str(ruby, &hash, "label", node.label.as_deref())?;
|
|
651
|
+
|
|
652
|
+
match node.level {
|
|
653
|
+
Some(l) => hash.aset(ruby.to_symbol("level"), l as i64)?,
|
|
654
|
+
None => hash.aset(ruby.to_symbol("level"), ruby.qnil())?,
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
match &node.byte_range {
|
|
658
|
+
Some(r) => {
|
|
659
|
+
let ruby_range = ruby.range_new(r.start as i64, r.end as i64, true)?;
|
|
660
|
+
hash.aset(ruby.to_symbol("byte_range"), ruby_range)?;
|
|
661
|
+
}
|
|
662
|
+
None => hash.aset(ruby.to_symbol("byte_range"), ruby.qnil())?,
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
let children_arr = ruby.ary_new();
|
|
666
|
+
for child in &node.children {
|
|
667
|
+
let child_hash = node_to_ruby_hash(child, ruby)?;
|
|
668
|
+
children_arr.push(child_hash)?;
|
|
669
|
+
}
|
|
670
|
+
hash.aset(ruby.to_symbol("children"), children_arr)?;
|
|
671
|
+
|
|
672
|
+
Ok(hash)
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
fn set_optional_str(ruby: &Ruby, hash: &RHash, key: &str, val: Option<&str>) -> Result<(), Error> {
|
|
676
|
+
match val {
|
|
677
|
+
Some(s) => hash.aset(ruby.to_symbol(key), ruby.str_new(s)),
|
|
678
|
+
None => hash.aset(ruby.to_symbol(key), ruby.qnil()),
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
fn apply_mutations(node: &mut Node, event_obj: Value, _ruby: &Ruby) -> Result<(), Error> {
|
|
683
|
+
node.replacement_html = event_obj.funcall("html", ())?;
|
|
684
|
+
node.replacement_markdown = event_obj.funcall("markdown", ())?;
|
|
685
|
+
node.deleted = event_obj.funcall("deleted?", ())?;
|
|
686
|
+
|
|
687
|
+
if matches!(node.kind, "link" | "image") {
|
|
688
|
+
node.new_dest = event_obj.funcall("dest", ())?;
|
|
689
|
+
node.new_title = event_obj.funcall("title", ())?;
|
|
690
|
+
}
|
|
691
|
+
if node.kind == "heading" {
|
|
692
|
+
let level: Option<i64> = event_obj.funcall("level", ())?;
|
|
693
|
+
node.new_level = level.map(|l| l.clamp(1, 6) as u8);
|
|
694
|
+
node.new_id = event_obj.funcall("id", ())?;
|
|
695
|
+
}
|
|
696
|
+
Ok(())
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
pub fn dispatch_handlers(
|
|
700
|
+
nodes: &mut Vec<Node>,
|
|
701
|
+
handlers: &RHash,
|
|
702
|
+
ruby: &Ruby,
|
|
703
|
+
) -> Result<(), Error> {
|
|
704
|
+
for node in nodes.iter_mut() {
|
|
705
|
+
dispatch_handlers(&mut node.children, handlers, ruby)?;
|
|
706
|
+
|
|
707
|
+
let key = ruby.to_symbol(node.kind);
|
|
708
|
+
let handler_arr: Option<RArray> = handlers.lookup(key)?;
|
|
709
|
+
if let Some(arr) = handler_arr {
|
|
710
|
+
let event_hash = node_to_ruby_hash(node, ruby)?;
|
|
711
|
+
let event_class = get_event_class(ruby)?;
|
|
712
|
+
let event_obj: Value = event_class.funcall("new", (event_hash,))?;
|
|
713
|
+
|
|
714
|
+
for handler_val in arr.into_iter() {
|
|
715
|
+
handler_val.funcall::<_, _, Value>("call", (event_obj,))?;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
apply_mutations(node, event_obj, ruby)?;
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
Ok(())
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
pub fn native_walk(
|
|
725
|
+
ruby: &Ruby,
|
|
726
|
+
source: String,
|
|
727
|
+
opts_hash: RHash,
|
|
728
|
+
handlers: RHash,
|
|
729
|
+
) -> Result<(), Error> {
|
|
730
|
+
let (cm_opts, flags) = build_options(ruby, opts_hash)?;
|
|
731
|
+
let mut ranges = collect_byte_ranges(&source, cm_opts, flags.autolink);
|
|
732
|
+
let parser = Parser::new_ext(&source, cm_opts);
|
|
733
|
+
let events: Vec<Event> = parser.collect();
|
|
734
|
+
let pre = apply_pre_handler_filters(events, &flags);
|
|
735
|
+
let mut tree = build_tree(pre, &mut ranges);
|
|
736
|
+
dispatch_handlers(&mut tree, &handlers, ruby)?;
|
|
737
|
+
Ok(())
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
pub fn native_render_with_handlers(
|
|
741
|
+
ruby: &Ruby,
|
|
742
|
+
source: String,
|
|
743
|
+
opts_hash: RHash,
|
|
744
|
+
handlers: RHash,
|
|
745
|
+
) -> Result<String, Error> {
|
|
746
|
+
let (cm_opts, flags) = build_options(ruby, opts_hash)?;
|
|
747
|
+
let mut ranges = collect_byte_ranges(&source, cm_opts, flags.autolink);
|
|
748
|
+
let parser = Parser::new_ext(&source, cm_opts);
|
|
749
|
+
let events: Vec<Event> = parser.collect();
|
|
750
|
+
let pre = apply_pre_handler_filters(events, &flags);
|
|
751
|
+
let mut tree = build_tree(pre, &mut ranges);
|
|
752
|
+
dispatch_handlers(&mut tree, &handlers, ruby)?;
|
|
753
|
+
let owned = tree_to_events(tree, cm_opts, &flags);
|
|
754
|
+
let post = apply_post_handler_filters(owned, &flags);
|
|
755
|
+
let mut buf = String::with_capacity(source.len() * 3 / 2);
|
|
756
|
+
pulldown_cmark::html::push_html(&mut buf, post.into_iter());
|
|
757
|
+
Ok(buf)
|
|
758
|
+
}
|