RubyGems - inkmark - Versions diffs - 0.1.0 - Mend

inkmark 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +3 -0
data/Cargo.lock +940 -0
data/Cargo.toml +27 -0
data/LICENSE.txt +21 -0
data/NOTICE +16 -0
data/README.md +1166 -0
data/ext/inkmark/Cargo.toml +31 -0
data/ext/inkmark/build.rs +5 -0
data/ext/inkmark/extconf.rb +6 -0
data/ext/inkmark/src/autolink.rs +167 -0
data/ext/inkmark/src/chunks_by_heading.rs +325 -0
data/ext/inkmark/src/chunks_by_size.rs +302 -0
data/ext/inkmark/src/document.rs +411 -0
data/ext/inkmark/src/emoji.rs +197 -0
data/ext/inkmark/src/handler.rs +758 -0
data/ext/inkmark/src/heading.rs +262 -0
data/ext/inkmark/src/highlight.rs +202 -0
data/ext/inkmark/src/image.rs +284 -0
data/ext/inkmark/src/lib.rs +54 -0
data/ext/inkmark/src/link.rs +291 -0
data/ext/inkmark/src/options.rs +231 -0
data/ext/inkmark/src/plain_text.rs +445 -0
data/ext/inkmark/src/scheme_filter.rs +319 -0
data/ext/inkmark/src/stats.rs +453 -0
data/ext/inkmark/src/tag_filter.rs +226 -0
data/ext/inkmark/src/toc.rs +221 -0
data/ext/inkmark/src/truncate.rs +267 -0
data/ext/inkmark/src/url_match.rs +178 -0
data/lib/inkmark/event.rb +342 -0
data/lib/inkmark/native.rb +8 -0
data/lib/inkmark/options.rb +698 -0
data/lib/inkmark/toc.rb +40 -0
data/lib/inkmark/version.rb +6 -0
data/lib/inkmark.rb +711 -0
data/sig/inkmark.rbs +219 -0
metadata +208 -0

data/ext/inkmark/src/heading.rs ADDED Viewed

@@ -0,0 +1,262 @@
+//! Heading ID generation filter.
+//!
+//! When enabled, walks the event stream, collects the text content of each
+//! heading that doesn't already have an id, and rewrites the `Event::Start`
+//! to carry an auto-generated `id` derived from the heading text. Headings
+//! that already have an id (via `heading_attributes: true`) are left alone.
+//!
+//! Duplicate base slugs get a counter suffix: `intro`, `intro-1`, `intro-2`.
+use std::collections::HashMap;
+use deunicode::deunicode_char;
+use pulldown_cmark::{CowStr, Event, Tag, TagEnd};
+/// Encapsulates slug deduplication logic: first use of a base slug is bare,
+/// subsequent collisions get a `-N` suffix (intro, intro-1, intro-2, …).
+///
+/// Shared between `heading::add_ids` and `stats::collect` so both produce
+/// identical slug sequences from the same heading stream.
+pub struct SlugDeduplicator {
+    seen: HashMap<String, usize>,
+}
+impl SlugDeduplicator {
+    pub fn new() -> Self {
+        Self {
+            seen: HashMap::new(),
+        }
+    }
+    /// Return the deduplicated slug for `base`. If `base` is empty it is
+    /// returned as-is (the caller should skip it). Otherwise the first call
+    /// with a given base returns the base unchanged; subsequent calls append
+    /// `-1`, `-2`, etc.
+    pub fn deduplicate(&mut self, base: String) -> String {
+        if base.is_empty() {
+            return base;
+        }
+        let count = self.seen.entry(base.clone()).or_insert(0);
+        let slug = if *count == 0 {
+            base
+        } else {
+            format!("{base}-{count}")
+        };
+        *count += 1;
+        slug
+    }
+}
+/// Apply heading-id generation to a full event stream in place.
+///
+/// Nested headings aren't possible in CommonMark so a single-level scan is
+/// sufficient.
+pub fn add_ids(events: &mut Vec<Event<'_>>) {
+    let mut dedup = SlugDeduplicator::new();
+    for i in 0..events.len() {
+        // Only act on `Start(Heading)` events that lack an id.
+        let needs_id = matches!(&events[i], Event::Start(Tag::Heading { id: None, .. }));
+        if !needs_id {
+            continue;
+        }
+        // Collect the raw text of this heading by scanning forward until
+        // the matching `End(Heading)`.
+        let text = collect_heading_text(events, i);
+        let base = slugify(&text);
+        if base.is_empty() {
+            continue;
+        }
+        let slug = dedup.deduplicate(base);
+        // Rebuild the heading event with the generated id.
+        let placeholder = Event::SoftBreak;
+        let old = std::mem::replace(&mut events[i], placeholder);
+        if let Event::Start(Tag::Heading {
+            level,
+            classes,
+            attrs,
+            ..
+        }) = old
+        {
+            events[i] = Event::Start(Tag::Heading {
+                level,
+                id: Some(CowStr::Boxed(slug.into_boxed_str())),
+                classes,
+                attrs,
+            });
+        }
+    }
+}
+/// Walk forward from a `Start(Heading)` at index `start`, concatenating all
+/// `Event::Text` and `Event::Code` payloads until the matching `End(Heading)`.
+fn collect_heading_text(events: &[Event<'_>], start: usize) -> String {
+    let mut text = String::new();
+    let mut i = start + 1;
+    while i < events.len() {
+        match &events[i] {
+            Event::End(TagEnd::Heading(_)) => return text,
+            Event::Text(t) | Event::Code(t) => text.push_str(t),
+            _ => {}
+        }
+        i += 1;
+    }
+    text
+}
+/// Convert heading text into a URL-safe slug for use as an `id` attribute.
+///
+/// Algorithm: walk the input char by char. ASCII alphanumerics are emitted
+/// lowercased on a fast path without any transliteration lookup. Every
+/// other character goes through `deunicode_char`, which returns an ASCII
+/// transliteration. The ASCII expansion is then scanned the same way
+/// as the input: alphanumerics pushed, anything else coalesced into a
+/// single `-` separator with the usual no-double-dash collapse.
+///
+/// Leading separators never appear because we start with `prev_was_sep = true`;
+/// trailing separators are stripped at the end. A heading whose entire
+/// transliteration is empty produces an empty slug, so no id is emitted.
+pub fn slugify(text: &str) -> String {
+    let mut slug = String::with_capacity(text.len());
+    let mut prev_was_sep = true;
+    for ch in text.chars() {
+        // Fast path: ASCII alphanumeric
+        if ch.is_ascii_alphanumeric() {
+            slug.push(ch.to_ascii_lowercase());
+            prev_was_sep = false;
+            continue;
+        }
+        match deunicode_char(ch) {
+            Some(s) => {
+                for r in s.chars() {
+                    if r.is_ascii_alphanumeric() {
+                        slug.push(r.to_ascii_lowercase());
+                        prev_was_sep = false;
+                    } else if !prev_was_sep {
+                        slug.push('-');
+                        prev_was_sep = true;
+                    }
+                }
+            }
+            None => {
+                // Character has no known transliteration. Treat as a
+                // separator boundary.
+                if !prev_was_sep {
+                    slug.push('-');
+                    prev_was_sep = true;
+                }
+            }
+        }
+    }
+    if slug.ends_with('-') {
+        slug.pop();
+    }
+    slug
+}
+#[cfg(test)]
+mod tests {
+    use super::{add_ids, slugify};
+    use pulldown_cmark::{CowStr, Event, HeadingLevel, Tag, TagEnd};
+    #[test]
+    fn slugify_basic() {
+        assert_eq!(slugify("Hello, World!"), "hello-world");
+    }
+    #[test]
+    fn slugify_trims_edges() {
+        assert_eq!(slugify("  Leading and trailing  "), "leading-and-trailing");
+    }
+    #[test]
+    fn slugify_collapses_runs() {
+        assert_eq!(slugify("Spaces   between  words"), "spaces-between-words");
+        assert_eq!(slugify("Multiple---Dashes"), "multiple-dashes");
+    }
+    #[test]
+    fn slugify_plain_word() {
+        assert_eq!(slugify("Introduction"), "introduction");
+    }
+    #[test]
+    fn slugify_transliterates_latin_diacritics() {
+        assert_eq!(slugify("Résumé"), "resume");
+        assert_eq!(slugify("naïve"), "naive");
+    }
+    #[test]
+    fn slugify_transliterates_cyrillic() {
+        assert_eq!(slugify("Лев Толстой"), "lev-tolstoi");
+        assert_eq!(slugify("Санкт-Петербург"), "sankt-peterburg");
+    }
+    #[test]
+    fn slugify_transliterates_cjk() {
+        assert_eq!(slugify("中文"), "zhong-wen");
+        assert_eq!(slugify("Hello 中文 World"), "hello-zhong-wen-world");
+    }
+    #[test]
+    fn add_ids_assigns_id_to_heading_without_one() {
+        // Build: Start(Heading{id: None}) + Text("Hello") + End(Heading)
+        let mut events = vec![
+            Event::Start(Tag::Heading {
+                level: HeadingLevel::H1,
+                id: None,
+                classes: vec![],
+                attrs: vec![],
+            }),
+            Event::Text(CowStr::Borrowed("Hello")),
+            Event::End(TagEnd::Heading(HeadingLevel::H1)),
+        ];
+        add_ids(&mut events);
+        match &events[0] {
+            Event::Start(Tag::Heading { id: Some(id), .. }) => {
+                assert_eq!(id.as_ref(), "hello");
+            }
+            other => panic!("expected Start(Heading{{id: Some(_)}}), got {other:?}"),
+        }
+    }
+    #[test]
+    fn add_ids_deduplicates_colliding_slugs() {
+        fn heading(text: &'static str) -> Vec<Event<'static>> {
+            vec![
+                Event::Start(Tag::Heading {
+                    level: HeadingLevel::H2,
+                    id: None,
+                    classes: vec![],
+                    attrs: vec![],
+                }),
+                Event::Text(CowStr::Borrowed(text)),
+                Event::End(TagEnd::Heading(HeadingLevel::H2)),
+            ]
+        }
+        let mut events: Vec<Event> = heading("Intro")
+            .into_iter()
+            .chain(heading("Intro"))
+            .chain(heading("Intro"))
+            .collect();
+        add_ids(&mut events);
+        let ids: Vec<String> = events
+            .iter()
+            .filter_map(|e| match e {
+                Event::Start(Tag::Heading { id: Some(id), .. }) => Some(id.to_string()),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(ids, vec!["intro", "intro-1", "intro-2"]);
+    }
+}

data/ext/inkmark/src/highlight.rs ADDED Viewed

@@ -0,0 +1,202 @@
+//! Syntax highlighting filter for fenced code blocks.
+//!
+//! When enabled, intercepts fenced code blocks that have an explicit language
+//! tag (e.g. ````rust`), runs the code through syntect's
+//! `ClassedHTMLGenerator`, and replaces the original
+//! `Start(CodeBlock) / Text / End(CodeBlock)` event sequence with a
+//! single `Event::Html` carrying the highlighted markup.
+//!
+//! Code blocks without a language tag (bare ```` ``` ````) and indented code
+//! blocks are left alone (no language specified).
+//!
+//! The output uses CSS class names (via `ClassStyle::Spaced`).
+use std::sync::OnceLock;
+use magnus::{Error, Ruby};
+use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag, TagEnd};
+use syntect::highlighting::ThemeSet;
+use syntect::html::{css_for_theme_with_class_style, ClassStyle, ClassedHTMLGenerator};
+use syntect::parsing::SyntaxSet;
+use syntect::util::LinesWithEndings;
+/// Process-lifetime cache for the default syntax set. Loading the embedded
+/// syntax definitions takes ~100-200ms on first call.
+static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
+fn syntax_set() -> &'static SyntaxSet {
+    SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines)
+}
+/// Replace fenced code blocks that have a language tag with syntect-
+/// highlighted HTML. Blocks without a language and indented code blocks
+/// pass through unchanged.
+pub fn highlight(events: Vec<Event<'_>>) -> Vec<Event<'_>> {
+    let ss = syntax_set();
+    let mut out: Vec<Event<'_>> = Vec::with_capacity(events.len());
+    let mut iter = events.into_iter();
+    while let Some(event) = iter.next() {
+        match &event {
+            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) if !lang.is_empty() => {
+                let lang_str = lang.to_string();
+                // Consume text events until End(CodeBlock).
+                let mut code = String::new();
+                for inner in iter.by_ref() {
+                    match inner {
+                        Event::End(TagEnd::CodeBlock) => break,
+                        Event::Text(t) => code.push_str(&t),
+                        _ => {}
+                    }
+                }
+                let html = highlight_code(&code, &lang_str, ss);
+                out.push(Event::Html(CowStr::Boxed(html.into_boxed_str())));
+            }
+            _ => out.push(event),
+        }
+    }
+    out
+}
+/// Run syntect on a code string with the given language hint. Returns a
+/// complete `<pre><code class="language-{lang}">...highlighted...</code></pre>`
+/// block. If the language isn't recognized, falls back to plain-text grammar.
+#[inline]
+fn highlight_code(code: &str, lang: &str, ss: &SyntaxSet) -> String {
+    let syntax = ss
+        .find_syntax_by_token(lang)
+        .unwrap_or_else(|| ss.find_syntax_plain_text());
+    let mut gen = ClassedHTMLGenerator::new_with_class_style(syntax, ss, ClassStyle::Spaced);
+    for line in LinesWithEndings::from(code) {
+        // parse_html_for_line_which_includes_newline can return Err on
+        // malformed syntax definitions. Swallow the error and stop highlighting this block.
+        if gen
+            .parse_html_for_line_which_includes_newline(line)
+            .is_err()
+        {
+            break;
+        }
+    }
+    let highlighted = gen.finalize();
+    // Wrap each line in <span class="line"> so CSS can add line numbers
+    // via counter()/::before, highlight specific lines on hover, etc.
+    let mut buf = format!("<pre><code class=\"language-{lang}\">");
+    for line in highlighted.split('\n') {
+        if !line.is_empty() {
+            buf.push_str("<span class=\"line\">");
+            buf.push_str(line);
+            buf.push_str("</span>\n");
+        }
+    }
+    buf.push_str("</code></pre>");
+    buf
+}
+/// Default theme name for CSS generation.
+const DEFAULT_THEME: &str = "base16-ocean.dark";
+static THEME_SET: OnceLock<ThemeSet> = OnceLock::new();
+fn theme_set() -> &'static ThemeSet {
+    THEME_SET.get_or_init(ThemeSet::load_defaults)
+}
+/// Quality of life helper.
+/// Return CSS that styles the `<span class="...">` tokens produced by
+/// `highlight()`. Accepts an optional theme name; defaults to
+/// "base16-ocean.dark" when nil. The CSS string is suitable for embedding
+/// in a `<style>` tag or writing to a `.css` file.
+pub fn syntax_css(ruby: &Ruby, theme_name: Option<String>) -> Result<String, Error> {
+    let ts = theme_set();
+    let name = theme_name.as_deref().unwrap_or(DEFAULT_THEME);
+    let theme = ts.themes.get(name).ok_or_else(|| {
+        let available: Vec<&str> = ts.themes.keys().map(|s| s.as_str()).collect();
+        Error::new(
+            ruby.exception_arg_error(),
+            format!("unknown syntax theme '{name}'. Available: {available:?}"),
+        )
+    })?;
+    css_for_theme_with_class_style(theme, ClassStyle::Spaced).map_err(|e| {
+        Error::new(
+            ruby.exception_runtime_error(),
+            format!("failed to generate CSS: {e}"),
+        )
+    })
+}
+/// Return an array of available theme names.
+pub fn syntax_themes() -> Vec<String> {
+    theme_set().themes.keys().cloned().collect()
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag, TagEnd};
+    #[test]
+    fn highlight_rust_code() {
+        let html = highlight_code("let x = 1;\n", "rust", syntax_set());
+        assert!(html.contains("<span"), "should contain span tags: {html}");
+        assert!(html.contains("language-rust"));
+        assert!(html.contains("<pre><code"));
+    }
+    #[test]
+    fn unknown_language_falls_back_to_plain_text() {
+        let html = highlight_code("hello\n", "nonexistent-lang-xyz", syntax_set());
+        // Plain text grammar produces no <span> tags—just escaped text.
+        assert!(html.contains("hello"));
+        assert!(html.contains("<pre><code"));
+    }
+    #[test]
+    fn highlight_filter_replaces_fenced_block() {
+        let events = vec![
+            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(
+                "rust",
+            )))),
+            Event::Text(CowStr::Borrowed("let x = 1;\n")),
+            Event::End(TagEnd::CodeBlock),
+        ];
+        let out = highlight(events);
+        assert_eq!(out.len(), 1);
+        match &out[0] {
+            Event::Html(html) => {
+                assert!(html.contains("<span"), "missing spans: {html}");
+                assert!(html.contains("language-rust"));
+            }
+            other => panic!("expected Html event, got {other:?}"),
+        }
+    }
+    #[test]
+    fn highlight_filter_skips_blocks_without_language() {
+        let events = vec![
+            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed("")))),
+            Event::Text(CowStr::Borrowed("plain\n")),
+            Event::End(TagEnd::CodeBlock),
+        ];
+        let out = highlight(events);
+        // Should pass through unchanged (3 events, not collapsed to 1)
+        assert_eq!(out.len(), 3);
+    }
+    #[test]
+    fn highlight_filter_skips_indented_blocks() {
+        let events = vec![
+            Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)),
+            Event::Text(CowStr::Borrowed("indented\n")),
+            Event::End(TagEnd::CodeBlock),
+        ];
+        let out = highlight(events);
+        assert_eq!(out.len(), 3);
+    }
+}