inkmark 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,262 @@
1
+ //! Heading ID generation filter.
2
+ //!
3
+ //! When enabled, walks the event stream, collects the text content of each
4
+ //! heading that doesn't already have an id, and rewrites the `Event::Start`
5
+ //! to carry an auto-generated `id` derived from the heading text. Headings
6
+ //! that already have an id (via `heading_attributes: true`) are left alone.
7
+ //!
8
+ //! Duplicate base slugs get a counter suffix: `intro`, `intro-1`, `intro-2`.
9
+
10
+ use std::collections::HashMap;
11
+
12
+ use deunicode::deunicode_char;
13
+ use pulldown_cmark::{CowStr, Event, Tag, TagEnd};
14
+
15
+ /// Encapsulates slug deduplication logic: first use of a base slug is bare,
16
+ /// subsequent collisions get a `-N` suffix (intro, intro-1, intro-2, …).
17
+ ///
18
+ /// Shared between `heading::add_ids` and `stats::collect` so both produce
19
+ /// identical slug sequences from the same heading stream.
20
+ pub struct SlugDeduplicator {
21
+ seen: HashMap<String, usize>,
22
+ }
23
+
24
+ impl SlugDeduplicator {
25
+ pub fn new() -> Self {
26
+ Self {
27
+ seen: HashMap::new(),
28
+ }
29
+ }
30
+
31
+ /// Return the deduplicated slug for `base`. If `base` is empty it is
32
+ /// returned as-is (the caller should skip it). Otherwise the first call
33
+ /// with a given base returns the base unchanged; subsequent calls append
34
+ /// `-1`, `-2`, etc.
35
+ pub fn deduplicate(&mut self, base: String) -> String {
36
+ if base.is_empty() {
37
+ return base;
38
+ }
39
+ let count = self.seen.entry(base.clone()).or_insert(0);
40
+ let slug = if *count == 0 {
41
+ base
42
+ } else {
43
+ format!("{base}-{count}")
44
+ };
45
+ *count += 1;
46
+ slug
47
+ }
48
+ }
49
+
50
+ /// Apply heading-id generation to a full event stream in place.
51
+ ///
52
+ /// Nested headings aren't possible in CommonMark so a single-level scan is
53
+ /// sufficient.
54
+ pub fn add_ids(events: &mut Vec<Event<'_>>) {
55
+ let mut dedup = SlugDeduplicator::new();
56
+
57
+ for i in 0..events.len() {
58
+ // Only act on `Start(Heading)` events that lack an id.
59
+ let needs_id = matches!(&events[i], Event::Start(Tag::Heading { id: None, .. }));
60
+ if !needs_id {
61
+ continue;
62
+ }
63
+
64
+ // Collect the raw text of this heading by scanning forward until
65
+ // the matching `End(Heading)`.
66
+ let text = collect_heading_text(events, i);
67
+ let base = slugify(&text);
68
+ if base.is_empty() {
69
+ continue;
70
+ }
71
+
72
+ let slug = dedup.deduplicate(base);
73
+
74
+ // Rebuild the heading event with the generated id.
75
+ let placeholder = Event::SoftBreak;
76
+ let old = std::mem::replace(&mut events[i], placeholder);
77
+ if let Event::Start(Tag::Heading {
78
+ level,
79
+ classes,
80
+ attrs,
81
+ ..
82
+ }) = old
83
+ {
84
+ events[i] = Event::Start(Tag::Heading {
85
+ level,
86
+ id: Some(CowStr::Boxed(slug.into_boxed_str())),
87
+ classes,
88
+ attrs,
89
+ });
90
+ }
91
+ }
92
+ }
93
+
94
+ /// Walk forward from a `Start(Heading)` at index `start`, concatenating all
95
+ /// `Event::Text` and `Event::Code` payloads until the matching `End(Heading)`.
96
+ fn collect_heading_text(events: &[Event<'_>], start: usize) -> String {
97
+ let mut text = String::new();
98
+ let mut i = start + 1;
99
+ while i < events.len() {
100
+ match &events[i] {
101
+ Event::End(TagEnd::Heading(_)) => return text,
102
+ Event::Text(t) | Event::Code(t) => text.push_str(t),
103
+ _ => {}
104
+ }
105
+ i += 1;
106
+ }
107
+ text
108
+ }
109
+
110
+ /// Convert heading text into a URL-safe slug for use as an `id` attribute.
111
+ ///
112
+ /// Algorithm: walk the input char by char. ASCII alphanumerics are emitted
113
+ /// lowercased on a fast path without any transliteration lookup. Every
114
+ /// other character goes through `deunicode_char`, which returns an ASCII
115
+ /// transliteration. The ASCII expansion is then scanned the same way
116
+ /// as the input: alphanumerics pushed, anything else coalesced into a
117
+ /// single `-` separator with the usual no-double-dash collapse.
118
+ ///
119
+ /// Leading separators never appear because we start with `prev_was_sep = true`;
120
+ /// trailing separators are stripped at the end. A heading whose entire
121
+ /// transliteration is empty produces an empty slug, so no id is emitted.
122
+ pub fn slugify(text: &str) -> String {
123
+ let mut slug = String::with_capacity(text.len());
124
+ let mut prev_was_sep = true;
125
+
126
+ for ch in text.chars() {
127
+ // Fast path: ASCII alphanumeric
128
+ if ch.is_ascii_alphanumeric() {
129
+ slug.push(ch.to_ascii_lowercase());
130
+ prev_was_sep = false;
131
+ continue;
132
+ }
133
+
134
+ match deunicode_char(ch) {
135
+ Some(s) => {
136
+ for r in s.chars() {
137
+ if r.is_ascii_alphanumeric() {
138
+ slug.push(r.to_ascii_lowercase());
139
+ prev_was_sep = false;
140
+ } else if !prev_was_sep {
141
+ slug.push('-');
142
+ prev_was_sep = true;
143
+ }
144
+ }
145
+ }
146
+ None => {
147
+ // Character has no known transliteration. Treat as a
148
+ // separator boundary.
149
+ if !prev_was_sep {
150
+ slug.push('-');
151
+ prev_was_sep = true;
152
+ }
153
+ }
154
+ }
155
+ }
156
+
157
+ if slug.ends_with('-') {
158
+ slug.pop();
159
+ }
160
+ slug
161
+ }
162
+
163
+ #[cfg(test)]
164
+ mod tests {
165
+ use super::{add_ids, slugify};
166
+ use pulldown_cmark::{CowStr, Event, HeadingLevel, Tag, TagEnd};
167
+
168
+ #[test]
169
+ fn slugify_basic() {
170
+ assert_eq!(slugify("Hello, World!"), "hello-world");
171
+ }
172
+
173
+ #[test]
174
+ fn slugify_trims_edges() {
175
+ assert_eq!(slugify(" Leading and trailing "), "leading-and-trailing");
176
+ }
177
+
178
+ #[test]
179
+ fn slugify_collapses_runs() {
180
+ assert_eq!(slugify("Spaces between words"), "spaces-between-words");
181
+ assert_eq!(slugify("Multiple---Dashes"), "multiple-dashes");
182
+ }
183
+
184
+ #[test]
185
+ fn slugify_plain_word() {
186
+ assert_eq!(slugify("Introduction"), "introduction");
187
+ }
188
+
189
+ #[test]
190
+ fn slugify_transliterates_latin_diacritics() {
191
+ assert_eq!(slugify("Résumé"), "resume");
192
+ assert_eq!(slugify("naïve"), "naive");
193
+ }
194
+
195
+ #[test]
196
+ fn slugify_transliterates_cyrillic() {
197
+ assert_eq!(slugify("Лев Толстой"), "lev-tolstoi");
198
+ assert_eq!(slugify("Санкт-Петербург"), "sankt-peterburg");
199
+ }
200
+
201
+ #[test]
202
+ fn slugify_transliterates_cjk() {
203
+ assert_eq!(slugify("中文"), "zhong-wen");
204
+ assert_eq!(slugify("Hello 中文 World"), "hello-zhong-wen-world");
205
+ }
206
+
207
+ #[test]
208
+ fn add_ids_assigns_id_to_heading_without_one() {
209
+ // Build: Start(Heading{id: None}) + Text("Hello") + End(Heading)
210
+ let mut events = vec![
211
+ Event::Start(Tag::Heading {
212
+ level: HeadingLevel::H1,
213
+ id: None,
214
+ classes: vec![],
215
+ attrs: vec![],
216
+ }),
217
+ Event::Text(CowStr::Borrowed("Hello")),
218
+ Event::End(TagEnd::Heading(HeadingLevel::H1)),
219
+ ];
220
+ add_ids(&mut events);
221
+ match &events[0] {
222
+ Event::Start(Tag::Heading { id: Some(id), .. }) => {
223
+ assert_eq!(id.as_ref(), "hello");
224
+ }
225
+ other => panic!("expected Start(Heading{{id: Some(_)}}), got {other:?}"),
226
+ }
227
+ }
228
+
229
+ #[test]
230
+ fn add_ids_deduplicates_colliding_slugs() {
231
+ fn heading(text: &'static str) -> Vec<Event<'static>> {
232
+ vec![
233
+ Event::Start(Tag::Heading {
234
+ level: HeadingLevel::H2,
235
+ id: None,
236
+ classes: vec![],
237
+ attrs: vec![],
238
+ }),
239
+ Event::Text(CowStr::Borrowed(text)),
240
+ Event::End(TagEnd::Heading(HeadingLevel::H2)),
241
+ ]
242
+ }
243
+
244
+ let mut events: Vec<Event> = heading("Intro")
245
+ .into_iter()
246
+ .chain(heading("Intro"))
247
+ .chain(heading("Intro"))
248
+ .collect();
249
+
250
+ add_ids(&mut events);
251
+
252
+ let ids: Vec<String> = events
253
+ .iter()
254
+ .filter_map(|e| match e {
255
+ Event::Start(Tag::Heading { id: Some(id), .. }) => Some(id.to_string()),
256
+ _ => None,
257
+ })
258
+ .collect();
259
+
260
+ assert_eq!(ids, vec!["intro", "intro-1", "intro-2"]);
261
+ }
262
+ }
@@ -0,0 +1,202 @@
1
+ //! Syntax highlighting filter for fenced code blocks.
2
+ //!
3
+ //! When enabled, intercepts fenced code blocks that have an explicit language
4
+ //! tag (e.g. ````rust`), runs the code through syntect's
5
+ //! `ClassedHTMLGenerator`, and replaces the original
6
+ //! `Start(CodeBlock) / Text / End(CodeBlock)` event sequence with a
7
+ //! single `Event::Html` carrying the highlighted markup.
8
+ //!
9
+ //! Code blocks without a language tag (bare ```` ``` ````) and indented code
10
+ //! blocks are left alone (no language specified).
11
+ //!
12
+ //! The output uses CSS class names (via `ClassStyle::Spaced`).
13
+
14
+ use std::sync::OnceLock;
15
+
16
+ use magnus::{Error, Ruby};
17
+ use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag, TagEnd};
18
+ use syntect::highlighting::ThemeSet;
19
+ use syntect::html::{css_for_theme_with_class_style, ClassStyle, ClassedHTMLGenerator};
20
+ use syntect::parsing::SyntaxSet;
21
+ use syntect::util::LinesWithEndings;
22
+
23
+ /// Process-lifetime cache for the default syntax set. Loading the embedded
24
+ /// syntax definitions takes ~100-200ms on first call.
25
+ static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
26
+
27
+ fn syntax_set() -> &'static SyntaxSet {
28
+ SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines)
29
+ }
30
+
31
+ /// Replace fenced code blocks that have a language tag with syntect-
32
+ /// highlighted HTML. Blocks without a language and indented code blocks
33
+ /// pass through unchanged.
34
+ pub fn highlight(events: Vec<Event<'_>>) -> Vec<Event<'_>> {
35
+ let ss = syntax_set();
36
+ let mut out: Vec<Event<'_>> = Vec::with_capacity(events.len());
37
+ let mut iter = events.into_iter();
38
+
39
+ while let Some(event) = iter.next() {
40
+ match &event {
41
+ Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) if !lang.is_empty() => {
42
+ let lang_str = lang.to_string();
43
+
44
+ // Consume text events until End(CodeBlock).
45
+ let mut code = String::new();
46
+ for inner in iter.by_ref() {
47
+ match inner {
48
+ Event::End(TagEnd::CodeBlock) => break,
49
+ Event::Text(t) => code.push_str(&t),
50
+ _ => {}
51
+ }
52
+ }
53
+
54
+ let html = highlight_code(&code, &lang_str, ss);
55
+ out.push(Event::Html(CowStr::Boxed(html.into_boxed_str())));
56
+ }
57
+ _ => out.push(event),
58
+ }
59
+ }
60
+
61
+ out
62
+ }
63
+
64
+ /// Run syntect on a code string with the given language hint. Returns a
65
+ /// complete `<pre><code class="language-{lang}">...highlighted...</code></pre>`
66
+ /// block. If the language isn't recognized, falls back to plain-text grammar.
67
+ #[inline]
68
+ fn highlight_code(code: &str, lang: &str, ss: &SyntaxSet) -> String {
69
+ let syntax = ss
70
+ .find_syntax_by_token(lang)
71
+ .unwrap_or_else(|| ss.find_syntax_plain_text());
72
+
73
+ let mut gen = ClassedHTMLGenerator::new_with_class_style(syntax, ss, ClassStyle::Spaced);
74
+
75
+ for line in LinesWithEndings::from(code) {
76
+ // parse_html_for_line_which_includes_newline can return Err on
77
+ // malformed syntax definitions. Swallow the error and stop highlighting this block.
78
+ if gen
79
+ .parse_html_for_line_which_includes_newline(line)
80
+ .is_err()
81
+ {
82
+ break;
83
+ }
84
+ }
85
+
86
+ let highlighted = gen.finalize();
87
+
88
+ // Wrap each line in <span class="line"> so CSS can add line numbers
89
+ // via counter()/::before, highlight specific lines on hover, etc.
90
+ let mut buf = format!("<pre><code class=\"language-{lang}\">");
91
+ for line in highlighted.split('\n') {
92
+ if !line.is_empty() {
93
+ buf.push_str("<span class=\"line\">");
94
+ buf.push_str(line);
95
+ buf.push_str("</span>\n");
96
+ }
97
+ }
98
+ buf.push_str("</code></pre>");
99
+ buf
100
+ }
101
+
102
+ /// Default theme name for CSS generation.
103
+ const DEFAULT_THEME: &str = "base16-ocean.dark";
104
+
105
+ static THEME_SET: OnceLock<ThemeSet> = OnceLock::new();
106
+
107
+ fn theme_set() -> &'static ThemeSet {
108
+ THEME_SET.get_or_init(ThemeSet::load_defaults)
109
+ }
110
+
111
+ /// Quality of life helper.
112
+ /// Return CSS that styles the `<span class="...">` tokens produced by
113
+ /// `highlight()`. Accepts an optional theme name; defaults to
114
+ /// "base16-ocean.dark" when nil. The CSS string is suitable for embedding
115
+ /// in a `<style>` tag or writing to a `.css` file.
116
+ pub fn syntax_css(ruby: &Ruby, theme_name: Option<String>) -> Result<String, Error> {
117
+ let ts = theme_set();
118
+ let name = theme_name.as_deref().unwrap_or(DEFAULT_THEME);
119
+ let theme = ts.themes.get(name).ok_or_else(|| {
120
+ let available: Vec<&str> = ts.themes.keys().map(|s| s.as_str()).collect();
121
+ Error::new(
122
+ ruby.exception_arg_error(),
123
+ format!("unknown syntax theme '{name}'. Available: {available:?}"),
124
+ )
125
+ })?;
126
+ css_for_theme_with_class_style(theme, ClassStyle::Spaced).map_err(|e| {
127
+ Error::new(
128
+ ruby.exception_runtime_error(),
129
+ format!("failed to generate CSS: {e}"),
130
+ )
131
+ })
132
+ }
133
+
134
+ /// Return an array of available theme names.
135
+ pub fn syntax_themes() -> Vec<String> {
136
+ theme_set().themes.keys().cloned().collect()
137
+ }
138
+
139
+ #[cfg(test)]
140
+ mod tests {
141
+ use super::*;
142
+ use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag, TagEnd};
143
+
144
+ #[test]
145
+ fn highlight_rust_code() {
146
+ let html = highlight_code("let x = 1;\n", "rust", syntax_set());
147
+ assert!(html.contains("<span"), "should contain span tags: {html}");
148
+ assert!(html.contains("language-rust"));
149
+ assert!(html.contains("<pre><code"));
150
+ }
151
+
152
+ #[test]
153
+ fn unknown_language_falls_back_to_plain_text() {
154
+ let html = highlight_code("hello\n", "nonexistent-lang-xyz", syntax_set());
155
+ // Plain text grammar produces no <span> tags—just escaped text.
156
+ assert!(html.contains("hello"));
157
+ assert!(html.contains("<pre><code"));
158
+ }
159
+
160
+ #[test]
161
+ fn highlight_filter_replaces_fenced_block() {
162
+ let events = vec![
163
+ Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(
164
+ "rust",
165
+ )))),
166
+ Event::Text(CowStr::Borrowed("let x = 1;\n")),
167
+ Event::End(TagEnd::CodeBlock),
168
+ ];
169
+ let out = highlight(events);
170
+ assert_eq!(out.len(), 1);
171
+ match &out[0] {
172
+ Event::Html(html) => {
173
+ assert!(html.contains("<span"), "missing spans: {html}");
174
+ assert!(html.contains("language-rust"));
175
+ }
176
+ other => panic!("expected Html event, got {other:?}"),
177
+ }
178
+ }
179
+
180
+ #[test]
181
+ fn highlight_filter_skips_blocks_without_language() {
182
+ let events = vec![
183
+ Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed("")))),
184
+ Event::Text(CowStr::Borrowed("plain\n")),
185
+ Event::End(TagEnd::CodeBlock),
186
+ ];
187
+ let out = highlight(events);
188
+ // Should pass through unchanged (3 events, not collapsed to 1)
189
+ assert_eq!(out.len(), 3);
190
+ }
191
+
192
+ #[test]
193
+ fn highlight_filter_skips_indented_blocks() {
194
+ let events = vec![
195
+ Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)),
196
+ Event::Text(CowStr::Borrowed("indented\n")),
197
+ Event::End(TagEnd::CodeBlock),
198
+ ];
199
+ let out = highlight(events);
200
+ assert_eq!(out.len(), 3);
201
+ }
202
+ }