inkmark 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ use globset::{Glob, GlobSet, GlobSetBuilder};
2
+ use magnus::value::{Id, LazyId};
3
+ use magnus::{Error, RHash, Ruby};
4
+ use pulldown_cmark::Options;
5
+
6
+ use crate::stats::ExtractFlags;
7
+
8
+ // `sym_id!(ruby, "name")` resolves a Ruby option-key symbol through
9
+ // a block-scoped `static LazyId` cache. Each call site expands to
10
+ // its own static, so the intern happens exactly once per key over
11
+ // the process's lifetime; subsequent calls return the cached `Id`
12
+ // directly. Avoids the `ruby.to_symbol(key)` intern-table lookup
13
+ // that would otherwise run on every render for 25+ keys and kill
14
+ // performance.
15
+ macro_rules! sym_id {
16
+ ($ruby:expr, $name:literal) => {{
17
+ static K: LazyId = LazyId::new($name);
18
+ LazyId::get_inner_with(&K, $ruby)
19
+ }};
20
+ }
21
+
22
+ /// Runtime flags that don't map to pulldown-cmark's `Options` bitflags but
23
+ /// instead drive Inkmark's own event filters (raw-HTML suppression, heading-id
24
+ /// generation, and future filters). Grouped into a struct so `build_options`
25
+ /// stays single-return as we add more filter knobs.
26
+ pub struct Flags {
27
+ pub suppress_raw_html: bool,
28
+ pub hard_wrap: bool,
29
+ pub gfm: bool,
30
+ pub gfm_tag_filter: bool,
31
+ pub heading_ids: bool,
32
+ pub emoji_shortcodes: bool,
33
+ pub autolink: bool,
34
+ pub lazy_images: bool,
35
+ pub nofollow_external_links: bool,
36
+ pub syntax_highlight: bool,
37
+ pub toc: bool,
38
+ pub toc_depth: Option<u8>,
39
+ pub statistics: bool,
40
+ // Extract-array flags, parsed from the nested `extract: {...}` hash.
41
+ // `ExtractFlags::any()` tells the renderer whether to take the
42
+ // single-pass stats/extract path.
43
+ pub extract: ExtractFlags,
44
+ // Compiled host-glob allowlists. `None` means the option was unset
45
+ // (no filtering); `Some(set)` means filter: `set` may be empty, in
46
+ // which case nothing matches and every external link/image is
47
+ // rejected.
48
+ pub allowed_link_hosts: Option<GlobSet>,
49
+ pub allowed_image_hosts: Option<GlobSet>,
50
+ // URL scheme allowlists for markdown-emitted links/images. `None`
51
+ // means the option is unset (filtering disabled—the Ruby-side
52
+ // default); `Some(list)` means filter. Stored as Vec rather than
53
+ // HashSet because realistic scheme lists are 2–5 entries, where a
54
+ // linear scan beats a hash table on cache alone.
55
+ pub allowed_link_schemes: Option<Vec<String>>,
56
+ pub allowed_image_schemes: Option<Vec<String>>,
57
+ }
58
+
59
+ pub fn build_options(ruby: &Ruby, hash: RHash) -> Result<(Options, Flags), Error> {
60
+ let mut opts = Options::empty();
61
+
62
+ let get_bool = |id: Id| -> Result<bool, Error> {
63
+ let value: Option<bool> = hash.lookup(id)?;
64
+ Ok(value.unwrap_or(false))
65
+ };
66
+
67
+ // Pull each bool option once; "gfm" used to feed both `opts` and
68
+ // `flags` via a redundant second lookup—now read once, reused.
69
+ let gfm = get_bool(sym_id!(ruby, "gfm"))?;
70
+ let tables = get_bool(sym_id!(ruby, "tables"))?;
71
+ let strikethrough = get_bool(sym_id!(ruby, "strikethrough"))?;
72
+ let tasklists = get_bool(sym_id!(ruby, "tasklists"))?;
73
+ let footnotes = get_bool(sym_id!(ruby, "footnotes"))?;
74
+ let smart_punctuation = get_bool(sym_id!(ruby, "smart_punctuation"))?;
75
+ let heading_attributes = get_bool(sym_id!(ruby, "heading_attributes"))?;
76
+ let math = get_bool(sym_id!(ruby, "math"))?;
77
+ let definition_list = get_bool(sym_id!(ruby, "definition_list"))?;
78
+ let superscript = get_bool(sym_id!(ruby, "superscript"))?;
79
+ let subscript = get_bool(sym_id!(ruby, "subscript"))?;
80
+ let wikilinks = get_bool(sym_id!(ruby, "wikilinks"))?;
81
+ let frontmatter = get_bool(sym_id!(ruby, "frontmatter"))?;
82
+
83
+ if gfm {
84
+ opts.insert(Options::ENABLE_GFM);
85
+ }
86
+ if tables {
87
+ opts.insert(Options::ENABLE_TABLES);
88
+ }
89
+ if strikethrough {
90
+ opts.insert(Options::ENABLE_STRIKETHROUGH);
91
+ }
92
+ if tasklists {
93
+ opts.insert(Options::ENABLE_TASKLISTS);
94
+ }
95
+ if footnotes {
96
+ opts.insert(Options::ENABLE_FOOTNOTES);
97
+ }
98
+ if smart_punctuation {
99
+ opts.insert(Options::ENABLE_SMART_PUNCTUATION);
100
+ }
101
+ if heading_attributes {
102
+ opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
103
+ }
104
+ if math {
105
+ opts.insert(Options::ENABLE_MATH);
106
+ }
107
+ if definition_list {
108
+ opts.insert(Options::ENABLE_DEFINITION_LIST);
109
+ }
110
+ if superscript {
111
+ opts.insert(Options::ENABLE_SUPERSCRIPT);
112
+ }
113
+ if subscript {
114
+ opts.insert(Options::ENABLE_SUBSCRIPT);
115
+ }
116
+ if wikilinks {
117
+ opts.insert(Options::ENABLE_WIKILINKS);
118
+ }
119
+ if frontmatter {
120
+ opts.insert(Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
121
+ }
122
+
123
+ let flags = Flags {
124
+ suppress_raw_html: !get_bool(sym_id!(ruby, "raw_html"))?,
125
+ hard_wrap: get_bool(sym_id!(ruby, "hard_wrap"))?,
126
+ gfm,
127
+ gfm_tag_filter: get_bool(sym_id!(ruby, "gfm_tag_filter"))?,
128
+ heading_ids: get_bool(sym_id!(ruby, "heading_ids"))?,
129
+ emoji_shortcodes: get_bool(sym_id!(ruby, "emoji_shortcodes"))?,
130
+ autolink: get_bool(sym_id!(ruby, "autolink"))?,
131
+ lazy_images: get_bool(sym_id!(ruby, "lazy_images"))?,
132
+ nofollow_external_links: get_bool(sym_id!(ruby, "nofollow_external_links"))?,
133
+ syntax_highlight: get_bool(sym_id!(ruby, "syntax_highlight"))?,
134
+ toc: get_bool(sym_id!(ruby, "toc"))?,
135
+ toc_depth: hash.lookup::<_, Option<u8>>(sym_id!(ruby, "toc_depth"))?,
136
+ statistics: get_bool(sym_id!(ruby, "statistics"))?,
137
+ extract: build_extract_flags(ruby, &hash)?,
138
+ allowed_link_hosts: build_host_globset(
139
+ ruby,
140
+ &hash,
141
+ sym_id!(ruby, "allowed_link_hosts"),
142
+ "allowed_link_hosts",
143
+ )?,
144
+ allowed_image_hosts: build_host_globset(
145
+ ruby,
146
+ &hash,
147
+ sym_id!(ruby, "allowed_image_hosts"),
148
+ "allowed_image_hosts",
149
+ )?,
150
+ allowed_link_schemes: build_scheme_set(&hash, sym_id!(ruby, "allowed_link_schemes"))?,
151
+ allowed_image_schemes: build_scheme_set(&hash, sym_id!(ruby, "allowed_image_schemes"))?,
152
+ };
153
+ Ok((opts, flags))
154
+ }
155
+
156
+ /// Read an optional `Array<String>` option and compile it into a `GlobSet`.
157
+ /// Returns `Ok(None)` when the option is `nil` (the Ruby-side default) —
158
+ /// this signals "filtering disabled" to the event pipeline.
159
+ ///
160
+ /// An empty array compiles to an empty `GlobSet` that matches nothing, so
161
+ /// `allowed_link_hosts: []` acts as a deny-all allowlist. Pattern compile
162
+ /// failures surface as a Ruby `ArgumentError` with the bad pattern quoted
163
+ /// so the user can find and fix it.
164
+ fn build_host_globset(
165
+ ruby: &Ruby,
166
+ hash: &RHash,
167
+ key_id: Id,
168
+ key_name: &str,
169
+ ) -> Result<Option<GlobSet>, Error> {
170
+ let patterns: Option<Vec<String>> = hash.lookup(key_id)?;
171
+ let Some(patterns) = patterns else {
172
+ return Ok(None);
173
+ };
174
+
175
+ let mut builder = GlobSetBuilder::new();
176
+ for pattern in &patterns {
177
+ let glob = Glob::new(pattern).map_err(|e| {
178
+ Error::new(
179
+ ruby.exception_arg_error(),
180
+ format!("invalid glob pattern in {key_name}: {pattern:?}—{e}"),
181
+ )
182
+ })?;
183
+ builder.add(glob);
184
+ }
185
+ let set = builder.build().map_err(|e| {
186
+ Error::new(
187
+ ruby.exception_arg_error(),
188
+ format!("failed to compile {key_name} globset: {e}"),
189
+ )
190
+ })?;
191
+ Ok(Some(set))
192
+ }
193
+
194
+ /// Read an optional `Array<String>` scheme allowlist and normalize to
195
+ /// lowercase. Returns `Ok(None)` when the option is `nil`, signalling
196
+ /// "filtering disabled" to the pipeline. An empty array compiles to an
197
+ /// empty `Vec` that matches nothing, which blocks every absolute URL
198
+ /// (relative URLs still pass through
199
+ /// [`crate::url_match::is_scheme_allowed`]).
200
+ fn build_scheme_set(hash: &RHash, key_id: Id) -> Result<Option<Vec<String>>, Error> {
201
+ let schemes: Option<Vec<String>> = hash.lookup(key_id)?;
202
+ Ok(schemes.map(|list| list.into_iter().map(|s| s.to_ascii_lowercase()).collect()))
203
+ }
204
+
205
+ /// Read the nested `extract: { images: true, ... }` hash and compile to
206
+ /// an `ExtractFlags`. Nil / missing option → all flags off.
207
+ ///
208
+ /// Ruby-side validation (`Inkmark::Options`) enforces the key set and
209
+ /// boolean value type, so by the time we get here an unknown key or
210
+ /// non-boolean value has already raised `ArgumentError`. We still read
211
+ /// defensively using `Option<bool>` + `unwrap_or(false)` so that a
212
+ /// missing sub-key is treated as "off".
213
+ fn build_extract_flags(ruby: &Ruby, hash: &RHash) -> Result<ExtractFlags, Error> {
214
+ let nested: Option<RHash> = hash.lookup(sym_id!(ruby, "extract"))?;
215
+ let Some(nested) = nested else {
216
+ return Ok(ExtractFlags::default());
217
+ };
218
+
219
+ let read = |id: Id| -> Result<bool, Error> {
220
+ let v: Option<bool> = nested.lookup(id)?;
221
+ Ok(v.unwrap_or(false))
222
+ };
223
+
224
+ Ok(ExtractFlags {
225
+ images: read(sym_id!(ruby, "images"))?,
226
+ links: read(sym_id!(ruby, "links"))?,
227
+ code_blocks: read(sym_id!(ruby, "code_blocks"))?,
228
+ headings: read(sym_id!(ruby, "headings"))?,
229
+ footnote_definitions: read(sym_id!(ruby, "footnote_definitions"))?,
230
+ })
231
+ }
@@ -0,0 +1,445 @@
1
+ //! Serialize pulldown-cmark events to plain text.
2
+ //!
3
+ //! Designed for embedding models, token counting, and any pipeline
4
+ //! where Markdown syntax is noise. Runs after the normal filter
5
+ //! pipeline (emoji replacement, autolink, host/scheme allowlists), so
6
+ //! the caller already sees resolved emoji, unwrapped disallowed links,
7
+ //! and so on.
8
+ //!
9
+ //! Core idea: **buffer stack**. Most writes go to the top-of-stack
10
+ //! buffer. Contexts that need post-processing (blockquote line
11
+ //! prefixing, link `text (url)` formatting, image alt capture,
12
+ //! footnote body capture) open a fresh buffer at the Start event and
13
+ //! pop + format at End. Nested contexts fall out for free because the
14
+ //! stack naturally tracks nesting depth.
15
+
16
+ use pulldown_cmark::{Event, Tag, TagEnd};
17
+
18
+ /// Write plain-text output into `buf` from a pulldown-cmark event stream.
19
+ pub fn write_plain_text<'a, I: IntoIterator<Item = Event<'a>>>(events: I, buf: &mut String) {
20
+ let mut w = Writer::new();
21
+ for event in events {
22
+ w.handle(event);
23
+ }
24
+ let out = w.finalize();
25
+ buf.push_str(&out);
26
+ }
27
+
28
+ struct Writer {
29
+ /// Stack of write targets. Always non-empty; top is the current
30
+ /// target. `open()` pushes, `close()` pops.
31
+ buffers: Vec<String>,
32
+ list_stack: Vec<ListCtx>,
33
+ link_dest: String,
34
+ image_dest: String,
35
+ footnote_label: String,
36
+ /// Accumulated definitions, emitted at `finalize` in document order.
37
+ footnote_bodies: Vec<(String, String)>,
38
+ /// Current row's cells, tab-joined at TableRow/TableHead End.
39
+ current_row: Vec<String>,
40
+ }
41
+
42
+ struct ListCtx {
43
+ ordered: bool,
44
+ counter: u64,
45
+ indent: usize,
46
+ }
47
+
48
+ impl Writer {
49
+ fn new() -> Self {
50
+ Self {
51
+ buffers: vec![String::new()],
52
+ list_stack: Vec::new(),
53
+ link_dest: String::new(),
54
+ image_dest: String::new(),
55
+ footnote_label: String::new(),
56
+ footnote_bodies: Vec::new(),
57
+ current_row: Vec::new(),
58
+ }
59
+ }
60
+
61
+ fn write(&mut self, s: &str) {
62
+ self.buffers
63
+ .last_mut()
64
+ .expect("buffer stack is never empty")
65
+ .push_str(s);
66
+ }
67
+
68
+ fn open(&mut self) {
69
+ self.buffers.push(String::new());
70
+ }
71
+
72
+ fn close(&mut self) -> String {
73
+ self.buffers.pop().expect("close() without matching open()")
74
+ }
75
+
76
+ /// Ensure the current buffer ends with exactly one blank line
77
+ /// (i.e. `"\n\n"`), except when the buffer is empty (no leading
78
+ /// newlines at document or subtree start).
79
+ fn ensure_blank_line(&mut self) {
80
+ let buf = self.buffers.last().expect("buffer stack is never empty");
81
+ if buf.is_empty() || buf.ends_with("\n\n") {
82
+ return;
83
+ }
84
+ if buf.ends_with('\n') {
85
+ self.write("\n");
86
+ } else {
87
+ self.write("\n\n");
88
+ }
89
+ }
90
+
91
+ /// Ensure the current buffer ends with `\n`. Used for transitions
92
+ /// that should just break the current line without introducing
93
+ /// paragraph-style separation (e.g. a nested list inside a list
94
+ /// item: `- outer\n - inner`, not a blank line between them).
95
+ fn ensure_newline(&mut self) {
96
+ let buf = self.buffers.last().expect("buffer stack is never empty");
97
+ if buf.is_empty() || buf.ends_with('\n') {
98
+ return;
99
+ }
100
+ self.write("\n");
101
+ }
102
+
103
+ fn handle(&mut self, event: Event<'_>) {
104
+ match event {
105
+ Event::Start(tag) => self.start(tag),
106
+ Event::End(end) => self.end(end),
107
+ Event::Text(t) | Event::Code(t) => self.write(&t),
108
+ Event::SoftBreak => self.write(" "),
109
+ Event::HardBreak => self.write("\n"),
110
+ Event::Rule => {
111
+ self.ensure_blank_line();
112
+ self.write("---\n\n");
113
+ }
114
+ // Raw HTML reaches us only when raw_html: true (the
115
+ // suppress_raw_html filter rewrites it to Event::Text
116
+ // otherwise). Emit it verbatim to mirror the to_html /
117
+ // to_markdown contract.
118
+ Event::Html(h) | Event::InlineHtml(h) => self.write(&h),
119
+ Event::FootnoteReference(label) => {
120
+ self.write("[");
121
+ self.write(&label);
122
+ self.write("]");
123
+ }
124
+ // Task-list markers are dropped; the item bullet remains.
125
+ Event::TaskListMarker(_) => {}
126
+ Event::InlineMath(t) | Event::DisplayMath(t) => self.write(&t),
127
+ }
128
+ }
129
+
130
+ fn start(&mut self, tag: Tag<'_>) {
131
+ match tag {
132
+ Tag::Paragraph => {}
133
+ Tag::Heading { .. } => self.ensure_blank_line(),
134
+ Tag::BlockQuote(_) => {
135
+ self.ensure_blank_line();
136
+ self.open();
137
+ }
138
+ Tag::CodeBlock(_) => {
139
+ self.ensure_blank_line();
140
+ self.open();
141
+ }
142
+ Tag::List(first) => {
143
+ // Nested lists separate with a single newline (appear
144
+ // as the next line of their parent item); top-level
145
+ // lists get paragraph-style blank-line separation.
146
+ if self.list_stack.is_empty() {
147
+ self.ensure_blank_line();
148
+ } else {
149
+ self.ensure_newline();
150
+ }
151
+ let indent = self.list_stack.len() * 2;
152
+ self.list_stack.push(ListCtx {
153
+ ordered: first.is_some(),
154
+ counter: first.unwrap_or(1),
155
+ indent,
156
+ });
157
+ }
158
+ Tag::Item => {
159
+ let ctx = self.list_stack.last_mut().expect("item outside list");
160
+ let indent = " ".repeat(ctx.indent);
161
+ let bullet = if ctx.ordered {
162
+ let n = ctx.counter;
163
+ ctx.counter += 1;
164
+ format!("{}. ", n)
165
+ } else {
166
+ "- ".to_string()
167
+ };
168
+ self.write(&indent);
169
+ self.write(&bullet);
170
+ }
171
+ Tag::Table(_) => self.ensure_blank_line(),
172
+ Tag::TableHead | Tag::TableRow => {}
173
+ Tag::TableCell => self.open(),
174
+ Tag::Link { dest_url, .. } => {
175
+ self.link_dest = dest_url.to_string();
176
+ self.open();
177
+ }
178
+ Tag::Image { dest_url, .. } => {
179
+ self.image_dest = dest_url.to_string();
180
+ self.open();
181
+ }
182
+ Tag::Emphasis | Tag::Strong | Tag::Strikethrough => {}
183
+ Tag::FootnoteDefinition(label) => {
184
+ self.footnote_label = label.to_string();
185
+ self.open();
186
+ }
187
+ // YAML metadata: buffer + discard on End so the raw
188
+ // frontmatter never reaches plain-text output (the Ruby
189
+ // side consumes it separately via `frontmatter`).
190
+ Tag::MetadataBlock(_) => self.open(),
191
+ // Pass-through structural tags—inner content writes to
192
+ // the current buffer unchanged.
193
+ Tag::HtmlBlock
194
+ | Tag::DefinitionList
195
+ | Tag::DefinitionListTitle
196
+ | Tag::DefinitionListDefinition
197
+ | Tag::Subscript
198
+ | Tag::Superscript => {}
199
+ }
200
+ }
201
+
202
+ fn end(&mut self, end: TagEnd) {
203
+ match end {
204
+ TagEnd::Paragraph | TagEnd::Heading(_) => self.write("\n\n"),
205
+ TagEnd::BlockQuote(_) => {
206
+ let inner = self.close();
207
+ let prefixed = prefix_lines(inner.trim_end_matches('\n'), "> ");
208
+ self.write(&prefixed);
209
+ self.write("\n\n");
210
+ }
211
+ TagEnd::CodeBlock => {
212
+ let inner = self.close();
213
+ self.write(&inner);
214
+ self.ensure_blank_line();
215
+ }
216
+ TagEnd::List(_) => {
217
+ self.list_stack.pop();
218
+ // Only paragraph-separate after top-level lists; inside
219
+ // a parent item we're about to hit End(Item), which
220
+ // writes its own `\n`.
221
+ if self.list_stack.is_empty() {
222
+ self.ensure_blank_line();
223
+ } else {
224
+ self.ensure_newline();
225
+ }
226
+ }
227
+ TagEnd::Item => self.write("\n"),
228
+ TagEnd::Table => self.write("\n"),
229
+ TagEnd::TableHead => {
230
+ let row = std::mem::take(&mut self.current_row).join("\t");
231
+ self.write(&row);
232
+ // Blank line between header and body for readability.
233
+ self.write("\n\n");
234
+ }
235
+ TagEnd::TableRow => {
236
+ let row = std::mem::take(&mut self.current_row).join("\t");
237
+ self.write(&row);
238
+ self.write("\n");
239
+ }
240
+ TagEnd::TableCell => {
241
+ let cell = self.close();
242
+ self.current_row.push(cell);
243
+ }
244
+ TagEnd::Link => {
245
+ let text = self.close();
246
+ // Collapse when link text equals its URL (autolinks
247
+ // like `<https://x>` or linkify-produced links).
248
+ if text == self.link_dest {
249
+ self.write(&text);
250
+ } else {
251
+ self.write(&text);
252
+ self.write(" (");
253
+ let dest = std::mem::take(&mut self.link_dest);
254
+ self.write(&dest);
255
+ self.write(")");
256
+ }
257
+ }
258
+ TagEnd::Image => {
259
+ let alt = self.close();
260
+ self.write(&alt);
261
+ self.write(" (");
262
+ let dest = std::mem::take(&mut self.image_dest);
263
+ self.write(&dest);
264
+ self.write(")");
265
+ }
266
+ TagEnd::FootnoteDefinition => {
267
+ let body = self.close();
268
+ let label = std::mem::take(&mut self.footnote_label);
269
+ self.footnote_bodies.push((label, body.trim().to_string()));
270
+ }
271
+ TagEnd::MetadataBlock(_) => {
272
+ let _ = self.close();
273
+ }
274
+ _ => {}
275
+ }
276
+ }
277
+
278
+ fn finalize(mut self) -> String {
279
+ if !self.footnote_bodies.is_empty() {
280
+ self.ensure_blank_line();
281
+ let defs = std::mem::take(&mut self.footnote_bodies);
282
+ for (i, (label, body)) in defs.iter().enumerate() {
283
+ if i > 0 {
284
+ self.write("\n");
285
+ }
286
+ self.write("[");
287
+ self.write(label);
288
+ self.write("]: ");
289
+ self.write(body);
290
+ }
291
+ self.write("\n");
292
+ }
293
+ let mut out = self.buffers.pop().expect("buffer stack is never empty");
294
+ // Trim trailing blank lines down to one final newline.
295
+ while out.ends_with("\n\n") {
296
+ out.pop();
297
+ }
298
+ if !out.is_empty() && !out.ends_with('\n') {
299
+ out.push('\n');
300
+ }
301
+ out
302
+ }
303
+ }
304
+
305
+ /// Prefix every line of `s` with `prefix`. Empty lines receive the
306
+ /// prefix with its trailing whitespace stripped—so a `"> "` prefix
307
+ /// on a blank line produces `>`, matching email quoting convention.
308
+ fn prefix_lines(s: &str, prefix: &str) -> String {
309
+ let trimmed_prefix = prefix.trim_end();
310
+ let mut out = String::with_capacity(s.len() + prefix.len() * 4);
311
+ for (i, line) in s.split('\n').enumerate() {
312
+ if i > 0 {
313
+ out.push('\n');
314
+ }
315
+ if line.is_empty() {
316
+ out.push_str(trimmed_prefix);
317
+ } else {
318
+ out.push_str(prefix);
319
+ out.push_str(line);
320
+ }
321
+ }
322
+ out
323
+ }
324
+
325
+ #[cfg(test)]
326
+ mod tests {
327
+ use super::*;
328
+ use pulldown_cmark::{Options, Parser};
329
+
330
+ fn plain(md: &str) -> String {
331
+ let mut buf = String::new();
332
+ let opts = Options::ENABLE_GFM
333
+ | Options::ENABLE_TABLES
334
+ | Options::ENABLE_STRIKETHROUGH
335
+ | Options::ENABLE_TASKLISTS
336
+ | Options::ENABLE_FOOTNOTES;
337
+ write_plain_text(Parser::new_ext(md, opts), &mut buf);
338
+ buf
339
+ }
340
+
341
+ #[test]
342
+ fn paragraph_strips_emphasis() {
343
+ assert_eq!(
344
+ plain("**bold** and *italic* and ~~strike~~"),
345
+ "bold and italic and strike\n"
346
+ );
347
+ }
348
+
349
+ #[test]
350
+ fn link_expands_to_text_with_url() {
351
+ assert_eq!(
352
+ plain("[example](https://example.net)"),
353
+ "example (https://example.net)\n"
354
+ );
355
+ }
356
+
357
+ #[test]
358
+ fn autolink_collapses_text_equals_url() {
359
+ assert_eq!(plain("<https://example.net>"), "https://example.net\n");
360
+ }
361
+
362
+ #[test]
363
+ fn image_emits_alt_and_src() {
364
+ assert_eq!(plain("![cat](cat.png)"), "cat (cat.png)\n");
365
+ }
366
+
367
+ #[test]
368
+ fn heading_is_plain_text_with_blank_line() {
369
+ assert_eq!(plain("# Title\n\nBody"), "Title\n\nBody\n");
370
+ }
371
+
372
+ #[test]
373
+ fn blockquote_prefixes_lines() {
374
+ let out = plain("> hello\n> world");
375
+ assert_eq!(out, "> hello world\n");
376
+ }
377
+
378
+ #[test]
379
+ fn nested_blockquote_double_prefix() {
380
+ let out = plain("> > nested");
381
+ assert_eq!(out, "> > nested\n");
382
+ }
383
+
384
+ #[test]
385
+ fn blockquote_with_blank_line_uses_bare_marker() {
386
+ let out = plain("> first\n>\n> second");
387
+ assert_eq!(out, "> first\n>\n> second\n");
388
+ }
389
+
390
+ #[test]
391
+ fn unordered_list_dash_bullet() {
392
+ assert_eq!(plain("- a\n- b"), "- a\n- b\n");
393
+ }
394
+
395
+ #[test]
396
+ fn ordered_list_numbers() {
397
+ assert_eq!(plain("1. first\n2. second"), "1. first\n2. second\n");
398
+ }
399
+
400
+ #[test]
401
+ fn nested_list_indented_two_spaces() {
402
+ let out = plain("- outer\n - inner");
403
+ assert_eq!(out, "- outer\n - inner\n");
404
+ }
405
+
406
+ #[test]
407
+ fn tasklist_drops_checkbox() {
408
+ assert_eq!(plain("- [x] done\n- [ ] todo"), "- done\n- todo\n");
409
+ }
410
+
411
+ #[test]
412
+ fn table_header_blank_line_then_body() {
413
+ let md = "| a | b |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |";
414
+ let out = plain(md);
415
+ assert_eq!(out, "a\tb\n\n1\t2\n3\t4\n");
416
+ }
417
+
418
+ #[test]
419
+ fn code_block_preserved_verbatim() {
420
+ let out = plain("```ruby\nputs \"hi\"\n```");
421
+ assert_eq!(out, "puts \"hi\"\n");
422
+ }
423
+
424
+ #[test]
425
+ fn horizontal_rule_emits_dashes() {
426
+ assert_eq!(plain("before\n\n---\n\nafter"), "before\n\n---\n\nafter\n");
427
+ }
428
+
429
+ #[test]
430
+ fn footnote_reference_and_definition() {
431
+ let md = "See[^x].\n\n[^x]: body text";
432
+ let out = plain(md);
433
+ assert_eq!(out, "See[x].\n\n[x]: body text\n");
434
+ }
435
+
436
+ #[test]
437
+ fn inline_code_strips_backticks() {
438
+ assert_eq!(plain("use `puts` please"), "use puts please\n");
439
+ }
440
+
441
+ #[test]
442
+ fn hard_break_is_newline() {
443
+ assert_eq!(plain("line1 \nline2"), "line1\nline2\n");
444
+ }
445
+ }