html-to-markdown 2.29.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +18 -41
  3. data/README.md +37 -50
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +17 -705
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
  6. data/ext/html-to-markdown-rb/native/README.md +4 -13
  7. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
  8. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
  9. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
  10. data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
  11. data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
  12. data/lib/html_to_markdown/version.rb +1 -1
  13. data/lib/html_to_markdown.rb +13 -194
  14. data/sig/html_to_markdown.rbs +12 -373
  15. data/vendor/Cargo.toml +7 -4
  16. data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
  17. data/vendor/html-to-markdown-rs/README.md +127 -51
  18. data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
  19. data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
  20. data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
  21. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
  22. data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
  23. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
  24. data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
  25. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
  26. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
  27. data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
  28. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
  29. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
  30. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
  31. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
  32. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
  33. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
  38. data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
  43. data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
  44. data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
  45. data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
  46. data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
  47. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
  48. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
  49. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
  50. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
  52. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
  53. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
  54. data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
  56. data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -67
  57. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
  58. data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
  59. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
  60. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
  61. data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
  62. data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
  63. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
  65. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
  66. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
  67. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
  68. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
  69. data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
  70. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
  71. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
  72. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
  73. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
  74. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
  75. data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
  76. data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
  77. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  78. data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
  79. data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
  80. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
  81. data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
  82. data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -319
  83. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
  84. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
  85. data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
  86. data/vendor/html-to-markdown-rs/src/text.rs +25 -14
  87. data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
  88. data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
  89. data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
  90. data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
  91. data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
  92. data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
  93. data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
  94. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
  95. data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
  96. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
  97. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
  98. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
  99. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
  100. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
  101. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
  102. data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
  103. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
  104. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
  105. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
  106. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
  107. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
  108. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
  109. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
  110. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
  111. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
  112. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
  113. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
  114. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
  115. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
  116. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
  117. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
  118. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
  119. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
  120. data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
  121. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
  122. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
  123. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
  124. data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
  125. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
  126. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
  127. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
  128. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
  129. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
  130. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
  131. metadata +9 -37
  132. data/bin/benchmark.rb +0 -232
  133. data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
  134. data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
  135. data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
  136. data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
  137. data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
  138. data/spec/convert_spec.rb +0 -77
  139. data/spec/convert_with_tables_spec.rb +0 -194
  140. data/spec/metadata_extraction_spec.rb +0 -437
  141. data/spec/visitor_issue_187_spec.rb +0 -605
  142. data/spec/visitor_spec.rb +0 -1149
  143. data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
  144. data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
  145. data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
  146. data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
  147. data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
  148. data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
  149. data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
  150. data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
  151. data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
  152. data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -31
  153. data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
  154. data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
  155. data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
  156. data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
  157. data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
  158. data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
  159. data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
  160. data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
  161. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
  162. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
  163. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
  164. data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
  165. data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
  166. data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
@@ -1,18 +1,27 @@
1
1
  #![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]
2
2
 
3
- //! Main conversion options and their builder utilities.
4
- //!
5
- //! This module provides the primary `ConversionOptions` struct with all configuration
6
- //! settings for HTML to Markdown conversion, along with partial update support for
7
- //! selective option modifications.
3
+ //! Main conversion options with builder pattern.
8
4
 
9
5
  use crate::options::preprocessing::PreprocessingOptions;
10
- use crate::options::preprocessing::PreprocessingOptionsUpdate;
11
6
  use crate::options::validation::{
12
7
  CodeBlockStyle, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle, OutputFormat, WhitespaceMode,
13
8
  };
14
9
 
15
10
  /// Main conversion options for HTML to Markdown conversion.
11
+ ///
12
+ /// Use [`ConversionOptions::builder()`] to construct, or [`Default::default()`] for defaults.
13
+ ///
14
+ /// # Example
15
+ ///
16
+ /// ```rust,ignore
17
+ /// use html_to_markdown_rs::ConversionOptions;
18
+ ///
19
+ /// let options = ConversionOptions::builder()
20
+ /// .heading_style(HeadingStyle::Atx)
21
+ /// .wrap(true)
22
+ /// .wrap_width(100)
23
+ /// .build();
24
+ /// ```
16
25
  #[derive(Debug, Clone)]
17
26
  #[cfg_attr(
18
27
  any(feature = "serde", feature = "metadata"),
@@ -20,386 +29,399 @@ use crate::options::validation::{
20
29
  )]
21
30
  #[cfg_attr(
22
31
  any(feature = "serde", feature = "metadata"),
23
- serde(rename_all = "camelCase", default)
32
+ serde(rename_all = "camelCase", default, deny_unknown_fields)
24
33
  )]
25
34
  pub struct ConversionOptions {
26
- /// Heading style (Underlined, Atx, `AtxClosed`)
35
+ /// Heading style to use in Markdown output (ATX `#` or Setext underline).
27
36
  pub heading_style: HeadingStyle,
28
-
29
- /// List indentation type (Spaces or Tabs)
37
+ /// How to indent nested list items (spaces or tab).
30
38
  pub list_indent_type: ListIndentType,
31
-
32
- /// List indentation width in spaces (applied if using spaces indentation)
39
+ /// Number of spaces (or tabs) to use for each level of list indentation.
33
40
  pub list_indent_width: usize,
34
-
35
- /// Bullet characters for unordered lists (e.g., "-", "*", "+")
41
+ /// Bullet character(s) to use for unordered list items (e.g. `"-"`, `"*"`).
36
42
  pub bullets: String,
37
-
38
- /// Symbol for strong/emphasis emphasis rendering (* or _)
43
+ /// Character used for bold/italic emphasis markers (`*` or `_`).
39
44
  pub strong_em_symbol: char,
40
-
41
- /// Escape asterisks (*) in text to prevent accidental formatting
45
+ /// Escape `*` characters in plain text to avoid unintended bold/italic.
42
46
  pub escape_asterisks: bool,
43
-
44
- /// Escape underscores (_) in text to prevent accidental formatting
47
+ /// Escape `_` characters in plain text to avoid unintended bold/italic.
45
48
  pub escape_underscores: bool,
46
-
47
- /// Escape miscellaneous markdown characters (\ & < ` [ > ~ # = + | -)
49
+ /// Escape miscellaneous Markdown metacharacters (`[]()#` etc.) in plain text.
48
50
  pub escape_misc: bool,
49
-
50
- /// Escape all ASCII punctuation characters (for `CommonMark` spec compliance tests)
51
+ /// Escape ASCII characters that have special meaning in certain Markdown dialects.
51
52
  pub escape_ascii: bool,
52
-
53
- /// Default code language for fenced code blocks when not specified
53
+ /// Default language annotation for fenced code blocks that have no language hint.
54
54
  pub code_language: String,
55
-
56
- /// Use autolinks syntax for bare URLs (<http://example.com>)
55
+ /// Automatically convert bare URLs into Markdown autolinks.
57
56
  pub autolinks: bool,
58
-
59
- /// Add default title element to HTML if none exists before conversion
57
+ /// Emit a default title when no `<title>` tag is present.
60
58
  pub default_title: bool,
61
-
62
- /// Use HTML <br> elements in tables instead of spaces for line breaks
59
+ /// Render `<br>` elements inside table cells as literal line breaks.
63
60
  pub br_in_tables: bool,
64
-
65
- /// Enable spatial table reconstruction in hOCR documents (via spatial positioning analysis)
66
- pub hocr_spatial_tables: bool,
67
-
68
- /// Highlight style for <mark> elements (`DoubleEqual`, Html, Bold, None)
61
+ /// Style used for `<mark>` / highlighted text (e.g. `==text==`).
69
62
  pub highlight_style: HighlightStyle,
70
-
71
- /// Extract metadata from HTML (title, description, images, links, etc.)
63
+ /// Extract `<meta>` and `<head>` information into the result metadata.
72
64
  pub extract_metadata: bool,
73
-
74
- /// Whitespace handling mode (Normalized collapses multiple spaces, Strict preserves)
65
+ /// Controls how whitespace is normalised during conversion.
75
66
  pub whitespace_mode: WhitespaceMode,
76
-
77
- /// Strip newline characters from HTML before processing
67
+ /// Strip all newlines from the output, producing a single-line result.
78
68
  pub strip_newlines: bool,
79
-
80
- /// Enable automatic text wrapping at `wrap_width`
69
+ /// Wrap long lines at [`wrap_width`](Self::wrap_width) characters.
81
70
  pub wrap: bool,
82
-
83
- /// Text wrapping width in characters (default 80)
71
+ /// Maximum line width when [`wrap`](Self::wrap) is enabled (default `80`).
84
72
  pub wrap_width: usize,
85
-
86
- /// Treat block-level elements as inline during conversion
73
+ /// Treat the entire document as inline content (no block-level wrappers).
87
74
  pub convert_as_inline: bool,
88
-
89
- /// Custom symbol for subscript content (e.g., "~")
75
+ /// Markdown notation for subscript text (e.g. `"~"`).
90
76
  pub sub_symbol: String,
91
-
92
- /// Custom symbol for superscript content (e.g., "^")
77
+ /// Markdown notation for superscript text (e.g. `"^"`).
93
78
  pub sup_symbol: String,
94
-
95
- /// Newline style in markdown output (Spaces adds two spaces, Backslash adds \)
79
+ /// How to encode hard line breaks (`<br>`) in Markdown.
96
80
  pub newline_style: NewlineStyle,
97
-
98
- /// Code block fence style (Indented, Backticks, Tildes)
81
+ /// Style used for fenced code blocks (backticks or tilde).
99
82
  pub code_block_style: CodeBlockStyle,
100
-
101
- /// HTML elements where images should remain as markdown links (not converted to alt text)
83
+ /// HTML tag names whose `<img>` children are kept inline instead of block.
102
84
  pub keep_inline_images_in: Vec<String>,
103
-
104
- /// HTML preprocessing options (remove nav, forms, etc.)
85
+ /// Pre-processing options applied to the HTML before conversion.
105
86
  pub preprocessing: PreprocessingOptions,
106
-
107
- /// Source document encoding (informational, typically "utf-8")
87
+ /// Expected character encoding of the input HTML (default `"utf-8"`).
108
88
  pub encoding: String,
109
-
110
- /// Enable debug mode with diagnostic warnings on conversion issues
89
+ /// Emit debug information during conversion.
111
90
  pub debug: bool,
112
-
113
- /// HTML tags to strip (extract text content, no markdown conversion)
91
+ /// HTML tag names whose content is stripped from the output entirely.
114
92
  pub strip_tags: Vec<String>,
115
-
116
- /// HTML tags to preserve as-is in output (keep original HTML, useful for complex tables)
93
+ /// HTML tag names that are preserved verbatim in the output.
117
94
  pub preserve_tags: Vec<String>,
118
-
119
- /// Skip all images during conversion.
120
- /// When enabled, all `<img>` elements are completely omitted from output.
121
- /// Useful for text-only extraction or filtering out visual content.
95
+ /// Skip conversion of `<img>` elements (omit images from output).
122
96
  pub skip_images: bool,
123
-
124
- /// Output format for conversion (Markdown, Djot, or Plain)
97
+ /// Target output format (Markdown, plain text, etc.).
125
98
  pub output_format: OutputFormat,
99
+ /// Include structured document tree in result.
100
+ pub include_document_structure: bool,
101
+ /// Extract inline images from data URIs and SVGs.
102
+ pub extract_images: bool,
103
+ /// Maximum decoded image size in bytes (default 5MB).
104
+ pub max_image_size: u64,
105
+ /// Capture SVG elements as images.
106
+ pub capture_svg: bool,
107
+ /// Infer image dimensions from data.
108
+ pub infer_dimensions: bool,
109
+ }
110
+
111
+ impl Default for ConversionOptions {
112
+ fn default() -> Self {
113
+ Self {
114
+ heading_style: HeadingStyle::default(),
115
+ list_indent_type: ListIndentType::default(),
116
+ list_indent_width: 2,
117
+ bullets: "-".to_string(),
118
+ strong_em_symbol: '*',
119
+ escape_asterisks: false,
120
+ escape_underscores: false,
121
+ escape_misc: false,
122
+ escape_ascii: false,
123
+ code_language: String::new(),
124
+ autolinks: true,
125
+ default_title: false,
126
+ br_in_tables: false,
127
+ highlight_style: HighlightStyle::default(),
128
+ extract_metadata: true,
129
+ whitespace_mode: WhitespaceMode::default(),
130
+ strip_newlines: false,
131
+ wrap: false,
132
+ wrap_width: 80,
133
+ convert_as_inline: false,
134
+ sub_symbol: String::new(),
135
+ sup_symbol: String::new(),
136
+ newline_style: NewlineStyle::Spaces,
137
+ code_block_style: CodeBlockStyle::default(),
138
+ keep_inline_images_in: Vec::new(),
139
+ preprocessing: PreprocessingOptions::default(),
140
+ encoding: "utf-8".to_string(),
141
+ debug: false,
142
+ strip_tags: Vec::new(),
143
+ preserve_tags: Vec::new(),
144
+ skip_images: false,
145
+ output_format: OutputFormat::default(),
146
+ include_document_structure: false,
147
+ extract_images: false,
148
+ max_image_size: 5_242_880,
149
+ capture_svg: false,
150
+ infer_dimensions: true,
151
+ }
152
+ }
153
+ }
154
+
155
+ impl ConversionOptions {
156
+ /// Create a new builder with default values.
157
+ #[must_use]
158
+ pub fn builder() -> ConversionOptionsBuilder {
159
+ ConversionOptionsBuilder(Self::default())
160
+ }
161
+ }
162
+
163
+ // ── Builder ─────────────────────────────────────────────────────────────────
164
+
165
+ /// Builder for [`ConversionOptions`].
166
+ ///
167
+ /// All fields start with default values. Call `.build()` to produce the final options.
168
+ #[derive(Debug, Clone)]
169
+ pub struct ConversionOptionsBuilder(ConversionOptions);
170
+
171
+ macro_rules! builder_setter {
172
+ ($name:ident, $ty:ty) => {
173
+ /// Set the value.
174
+ #[must_use]
175
+ pub fn $name(mut self, value: $ty) -> Self {
176
+ self.0.$name = value;
177
+ self
178
+ }
179
+ };
180
+ }
181
+
182
+ macro_rules! builder_setter_into {
183
+ ($name:ident, $ty:ty) => {
184
+ /// Set the value.
185
+ #[must_use]
186
+ pub fn $name(mut self, value: impl Into<$ty>) -> Self {
187
+ self.0.$name = value.into();
188
+ self
189
+ }
190
+ };
191
+ }
192
+
193
+ impl ConversionOptionsBuilder {
194
+ // Output control
195
+ builder_setter!(output_format, OutputFormat);
196
+ builder_setter!(include_document_structure, bool);
197
+ builder_setter!(extract_metadata, bool);
198
+ builder_setter!(extract_images, bool);
199
+
200
+ // Markdown formatting
201
+ builder_setter!(heading_style, HeadingStyle);
202
+ builder_setter!(list_indent_type, ListIndentType);
203
+ builder_setter!(list_indent_width, usize);
204
+ builder_setter_into!(bullets, String);
205
+ builder_setter!(strong_em_symbol, char);
206
+ builder_setter!(code_block_style, CodeBlockStyle);
207
+ builder_setter!(newline_style, NewlineStyle);
208
+ builder_setter!(highlight_style, HighlightStyle);
209
+ builder_setter_into!(code_language, String);
210
+ builder_setter!(autolinks, bool);
211
+ builder_setter!(default_title, bool);
212
+ builder_setter!(br_in_tables, bool);
213
+ builder_setter_into!(sub_symbol, String);
214
+ builder_setter_into!(sup_symbol, String);
215
+
216
+ // Escaping
217
+ builder_setter!(escape_asterisks, bool);
218
+ builder_setter!(escape_underscores, bool);
219
+ builder_setter!(escape_misc, bool);
220
+ builder_setter!(escape_ascii, bool);
221
+
222
+ // Whitespace / wrapping
223
+ builder_setter!(whitespace_mode, WhitespaceMode);
224
+ builder_setter!(strip_newlines, bool);
225
+ builder_setter!(wrap, bool);
226
+ builder_setter!(wrap_width, usize);
227
+
228
+ // Element handling
229
+ builder_setter!(convert_as_inline, bool);
230
+ builder_setter!(skip_images, bool);
231
+
232
+ /// Set the list of HTML tag names whose content is stripped from output.
233
+ #[must_use]
234
+ pub fn strip_tags(mut self, tags: Vec<String>) -> Self {
235
+ self.0.strip_tags = tags;
236
+ self
237
+ }
238
+
239
+ /// Set the list of HTML tag names that are preserved verbatim in output.
240
+ #[must_use]
241
+ pub fn preserve_tags(mut self, tags: Vec<String>) -> Self {
242
+ self.0.preserve_tags = tags;
243
+ self
244
+ }
245
+
246
+ /// Set the list of HTML tag names whose `<img>` children are kept inline.
247
+ #[must_use]
248
+ pub fn keep_inline_images_in(mut self, tags: Vec<String>) -> Self {
249
+ self.0.keep_inline_images_in = tags;
250
+ self
251
+ }
252
+
253
+ // Image extraction config
254
+ builder_setter!(max_image_size, u64);
255
+ builder_setter!(capture_svg, bool);
256
+ builder_setter!(infer_dimensions, bool);
257
+
258
+ // Preprocessing
259
+ /// Set the pre-processing options applied to the HTML before conversion.
260
+ #[must_use]
261
+ pub fn preprocessing(mut self, preprocessing: PreprocessingOptions) -> Self {
262
+ self.0.preprocessing = preprocessing;
263
+ self
264
+ }
265
+
266
+ // Encoding
267
+ builder_setter_into!(encoding, String);
268
+
269
+ // Debug
270
+ builder_setter!(debug, bool);
271
+
272
+ /// Build the final [`ConversionOptions`].
273
+ #[must_use]
274
+ pub fn build(self) -> ConversionOptions {
275
+ self.0
276
+ }
126
277
  }
127
278
 
279
+ // ── ConversionOptionsUpdate (for binding crate compatibility) ────────────
280
+
281
+ use crate::options::preprocessing::PreprocessingOptionsUpdate;
282
+
128
283
  /// Partial update for `ConversionOptions`.
129
284
  ///
130
- /// This struct uses `Option<T>` to represent optional fields that can be selectively updated.
131
- /// Only specified fields (Some values) will override existing options; None values leave the
132
- /// corresponding fields unchanged when applied via [`ConversionOptions::apply_update`].
285
+ /// Uses `Option<T>` fields for selective updates. Bindings use this to construct
286
+ /// options from language-native types. Prefer [`ConversionOptionsBuilder`] for Rust code.
133
287
  #[derive(Debug, Clone, Default)]
134
288
  #[cfg_attr(
135
289
  any(feature = "serde", feature = "metadata"),
136
290
  derive(serde::Serialize, serde::Deserialize)
137
291
  )]
138
- #[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
292
+ #[cfg_attr(
293
+ any(feature = "serde", feature = "metadata"),
294
+ serde(rename_all = "camelCase", deny_unknown_fields)
295
+ )]
139
296
  pub struct ConversionOptionsUpdate {
140
- /// Optional heading style override (Underlined, Atx, `AtxClosed`)
297
+ /// Optional override for [`ConversionOptions::heading_style`].
141
298
  pub heading_style: Option<HeadingStyle>,
142
-
143
- /// Optional list indentation type override (Spaces or Tabs)
299
+ /// Optional override for [`ConversionOptions::list_indent_type`].
144
300
  pub list_indent_type: Option<ListIndentType>,
145
-
146
- /// Optional list indentation width override in spaces
301
+ /// Optional override for [`ConversionOptions::list_indent_width`].
147
302
  pub list_indent_width: Option<usize>,
148
-
149
- /// Optional bullet characters override for unordered lists
303
+ /// Optional override for [`ConversionOptions::bullets`].
150
304
  pub bullets: Option<String>,
151
-
152
- /// Optional strong/emphasis symbol override (* or _)
305
+ /// Optional override for [`ConversionOptions::strong_em_symbol`].
153
306
  pub strong_em_symbol: Option<char>,
154
-
155
- /// Optional asterisk escaping override in text content
307
+ /// Optional override for [`ConversionOptions::escape_asterisks`].
156
308
  pub escape_asterisks: Option<bool>,
157
-
158
- /// Optional underscore escaping override in text content
309
+ /// Optional override for [`ConversionOptions::escape_underscores`].
159
310
  pub escape_underscores: Option<bool>,
160
-
161
- /// Optional miscellaneous character escaping override (\ & < ` [ > ~ # = + | -)
311
+ /// Optional override for [`ConversionOptions::escape_misc`].
162
312
  pub escape_misc: Option<bool>,
163
-
164
- /// Optional ASCII punctuation escaping override (for spec compliance testing)
313
+ /// Optional override for [`ConversionOptions::escape_ascii`].
165
314
  pub escape_ascii: Option<bool>,
166
-
167
- /// Optional default code language override for fenced code blocks
315
+ /// Optional override for [`ConversionOptions::code_language`].
168
316
  pub code_language: Option<String>,
169
-
170
- /// Optional autolinks syntax override for bare URLs
317
+ /// Optional override for [`ConversionOptions::autolinks`].
171
318
  pub autolinks: Option<bool>,
172
-
173
- /// Optional default title element injection override
319
+ /// Optional override for [`ConversionOptions::default_title`].
174
320
  pub default_title: Option<bool>,
175
-
176
- /// Optional HTML <br> usage in tables override
321
+ /// Optional override for [`ConversionOptions::br_in_tables`].
177
322
  pub br_in_tables: Option<bool>,
178
-
179
- /// Optional spatial table reconstruction for hOCR documents override
180
- pub hocr_spatial_tables: Option<bool>,
181
-
182
- /// Optional highlight style override for <mark> elements
323
+ /// Optional override for [`ConversionOptions::highlight_style`].
183
324
  pub highlight_style: Option<HighlightStyle>,
184
-
185
- /// Optional metadata extraction override (title, description, images, links)
325
+ /// Optional override for [`ConversionOptions::extract_metadata`].
186
326
  pub extract_metadata: Option<bool>,
187
-
188
- /// Optional whitespace handling mode override (Normalized or Strict)
327
+ /// Optional override for [`ConversionOptions::whitespace_mode`].
189
328
  pub whitespace_mode: Option<WhitespaceMode>,
190
-
191
- /// Optional newline stripping override before processing
329
+ /// Optional override for [`ConversionOptions::strip_newlines`].
192
330
  pub strip_newlines: Option<bool>,
193
-
194
- /// Optional automatic text wrapping override
331
+ /// Optional override for [`ConversionOptions::wrap`].
195
332
  pub wrap: Option<bool>,
196
-
197
- /// Optional text wrapping width override in characters
333
+ /// Optional override for [`ConversionOptions::wrap_width`].
198
334
  pub wrap_width: Option<usize>,
199
-
200
- /// Optional block-level to inline conversion override
335
+ /// Optional override for [`ConversionOptions::convert_as_inline`].
201
336
  pub convert_as_inline: Option<bool>,
202
-
203
- /// Optional subscript symbol override
337
+ /// Optional override for [`ConversionOptions::sub_symbol`].
204
338
  pub sub_symbol: Option<String>,
205
-
206
- /// Optional superscript symbol override
339
+ /// Optional override for [`ConversionOptions::sup_symbol`].
207
340
  pub sup_symbol: Option<String>,
208
-
209
- /// Optional newline style override for markdown output
341
+ /// Optional override for [`ConversionOptions::newline_style`].
210
342
  pub newline_style: Option<NewlineStyle>,
211
-
212
- /// Optional code block fence style override (Indented, Backticks, Tildes)
343
+ /// Optional override for [`ConversionOptions::code_block_style`].
213
344
  pub code_block_style: Option<CodeBlockStyle>,
214
-
215
- /// Optional context elements where images remain as markdown links override
345
+ /// Optional override for [`ConversionOptions::keep_inline_images_in`].
216
346
  pub keep_inline_images_in: Option<Vec<String>>,
217
-
218
- /// Optional preprocessing options partial update
347
+ /// Optional override for [`ConversionOptions::preprocessing`].
219
348
  pub preprocessing: Option<PreprocessingOptionsUpdate>,
220
-
221
- /// Optional source document encoding override
349
+ /// Optional override for [`ConversionOptions::encoding`].
222
350
  pub encoding: Option<String>,
223
-
224
- /// Optional debug mode override for diagnostic warnings
351
+ /// Optional override for [`ConversionOptions::debug`].
225
352
  pub debug: Option<bool>,
226
-
227
- /// Optional HTML tags to strip override (extract text, no conversion)
353
+ /// Optional override for [`ConversionOptions::strip_tags`].
228
354
  pub strip_tags: Option<Vec<String>>,
229
-
230
- /// Optional HTML tags to preserve as-is override in output
355
+ /// Optional override for [`ConversionOptions::preserve_tags`].
231
356
  pub preserve_tags: Option<Vec<String>>,
232
-
233
- /// Optional skip images override
357
+ /// Optional override for [`ConversionOptions::skip_images`].
234
358
  pub skip_images: Option<bool>,
235
-
236
- /// Optional output format override (Markdown, Djot, or Plain)
359
+ /// Optional override for [`ConversionOptions::output_format`].
237
360
  pub output_format: Option<OutputFormat>,
238
- }
239
-
240
- impl Default for ConversionOptions {
241
- fn default() -> Self {
242
- Self {
243
- heading_style: HeadingStyle::default(),
244
- list_indent_type: ListIndentType::default(),
245
- list_indent_width: 2,
246
- bullets: "-".to_string(),
247
- strong_em_symbol: '*',
248
- escape_asterisks: false,
249
- escape_underscores: false,
250
- escape_misc: false,
251
- escape_ascii: false,
252
- code_language: String::new(),
253
- autolinks: true,
254
- default_title: false,
255
- br_in_tables: false,
256
- hocr_spatial_tables: true,
257
- highlight_style: HighlightStyle::default(),
258
- extract_metadata: true,
259
- whitespace_mode: WhitespaceMode::default(),
260
- strip_newlines: false,
261
- wrap: false,
262
- wrap_width: 80,
263
- convert_as_inline: false,
264
- sub_symbol: String::new(),
265
- sup_symbol: String::new(),
266
- newline_style: NewlineStyle::Spaces,
267
- code_block_style: CodeBlockStyle::default(),
268
- keep_inline_images_in: Vec::new(),
269
- preprocessing: PreprocessingOptions::default(),
270
- encoding: "utf-8".to_string(),
271
- debug: false,
272
- strip_tags: Vec::new(),
273
- preserve_tags: Vec::new(),
274
- skip_images: false,
275
- output_format: OutputFormat::default(),
276
- }
277
- }
361
+ /// Optional override for [`ConversionOptions::include_document_structure`].
362
+ pub include_document_structure: Option<bool>,
363
+ /// Optional override for [`ConversionOptions::extract_images`].
364
+ pub extract_images: Option<bool>,
365
+ /// Optional override for [`ConversionOptions::max_image_size`].
366
+ pub max_image_size: Option<u64>,
367
+ /// Optional override for [`ConversionOptions::capture_svg`].
368
+ pub capture_svg: Option<bool>,
369
+ /// Optional override for [`ConversionOptions::infer_dimensions`].
370
+ pub infer_dimensions: Option<bool>,
278
371
  }
279
372
 
280
373
  impl ConversionOptions {
281
374
  /// Apply a partial update to these conversion options.
282
- ///
283
- /// Any specified fields in the update will override the current values.
284
- /// Unspecified fields (None) are left unchanged.
285
- ///
286
- /// # Arguments
287
- ///
288
- /// * `update` - Partial options update with fields to override
289
375
  pub fn apply_update(&mut self, update: ConversionOptionsUpdate) {
290
- if let Some(heading_style) = update.heading_style {
291
- self.heading_style = heading_style;
292
- }
293
- if let Some(list_indent_type) = update.list_indent_type {
294
- self.list_indent_type = list_indent_type;
295
- }
296
- if let Some(list_indent_width) = update.list_indent_width {
297
- self.list_indent_width = list_indent_width;
298
- }
299
- if let Some(bullets) = update.bullets {
300
- self.bullets = bullets;
301
- }
302
- if let Some(strong_em_symbol) = update.strong_em_symbol {
303
- self.strong_em_symbol = strong_em_symbol;
304
- }
305
- if let Some(escape_asterisks) = update.escape_asterisks {
306
- self.escape_asterisks = escape_asterisks;
307
- }
308
- if let Some(escape_underscores) = update.escape_underscores {
309
- self.escape_underscores = escape_underscores;
310
- }
311
- if let Some(escape_misc) = update.escape_misc {
312
- self.escape_misc = escape_misc;
313
- }
314
- if let Some(escape_ascii) = update.escape_ascii {
315
- self.escape_ascii = escape_ascii;
316
- }
317
- if let Some(code_language) = update.code_language {
318
- self.code_language = code_language;
319
- }
320
- if let Some(autolinks) = update.autolinks {
321
- self.autolinks = autolinks;
322
- }
323
- if let Some(default_title) = update.default_title {
324
- self.default_title = default_title;
325
- }
326
- if let Some(br_in_tables) = update.br_in_tables {
327
- self.br_in_tables = br_in_tables;
328
- }
329
- if let Some(hocr_spatial_tables) = update.hocr_spatial_tables {
330
- self.hocr_spatial_tables = hocr_spatial_tables;
331
- }
332
- if let Some(highlight_style) = update.highlight_style {
333
- self.highlight_style = highlight_style;
334
- }
335
- if let Some(extract_metadata) = update.extract_metadata {
336
- self.extract_metadata = extract_metadata;
337
- }
338
- if let Some(whitespace_mode) = update.whitespace_mode {
339
- self.whitespace_mode = whitespace_mode;
340
- }
341
- if let Some(strip_newlines) = update.strip_newlines {
342
- self.strip_newlines = strip_newlines;
343
- }
344
- if let Some(wrap) = update.wrap {
345
- self.wrap = wrap;
346
- }
347
- if let Some(wrap_width) = update.wrap_width {
348
- self.wrap_width = wrap_width;
349
- }
350
- if let Some(convert_as_inline) = update.convert_as_inline {
351
- self.convert_as_inline = convert_as_inline;
352
- }
353
- if let Some(sub_symbol) = update.sub_symbol {
354
- self.sub_symbol = sub_symbol;
355
- }
356
- if let Some(sup_symbol) = update.sup_symbol {
357
- self.sup_symbol = sup_symbol;
358
- }
359
- if let Some(newline_style) = update.newline_style {
360
- self.newline_style = newline_style;
361
- }
362
- if let Some(code_block_style) = update.code_block_style {
363
- self.code_block_style = code_block_style;
364
- }
365
- if let Some(keep_inline_images_in) = update.keep_inline_images_in {
366
- self.keep_inline_images_in = keep_inline_images_in;
367
- }
376
+ macro_rules! apply {
377
+ ($field:ident) => {
378
+ if let Some(v) = update.$field {
379
+ self.$field = v;
380
+ }
381
+ };
382
+ }
383
+ apply!(heading_style);
384
+ apply!(list_indent_type);
385
+ apply!(list_indent_width);
386
+ apply!(bullets);
387
+ apply!(strong_em_symbol);
388
+ apply!(escape_asterisks);
389
+ apply!(escape_underscores);
390
+ apply!(escape_misc);
391
+ apply!(escape_ascii);
392
+ apply!(code_language);
393
+ apply!(autolinks);
394
+ apply!(default_title);
395
+ apply!(br_in_tables);
396
+ apply!(highlight_style);
397
+ apply!(extract_metadata);
398
+ apply!(whitespace_mode);
399
+ apply!(strip_newlines);
400
+ apply!(wrap);
401
+ apply!(wrap_width);
402
+ apply!(convert_as_inline);
403
+ apply!(sub_symbol);
404
+ apply!(sup_symbol);
405
+ apply!(newline_style);
406
+ apply!(code_block_style);
407
+ apply!(keep_inline_images_in);
408
+ apply!(encoding);
409
+ apply!(debug);
410
+ apply!(strip_tags);
411
+ apply!(preserve_tags);
412
+ apply!(skip_images);
413
+ apply!(output_format);
414
+ apply!(include_document_structure);
415
+ apply!(extract_images);
416
+ apply!(max_image_size);
417
+ apply!(capture_svg);
418
+ apply!(infer_dimensions);
368
419
  if let Some(preprocessing) = update.preprocessing {
369
420
  self.preprocessing.apply_update(preprocessing);
370
421
  }
371
- if let Some(encoding) = update.encoding {
372
- self.encoding = encoding;
373
- }
374
- if let Some(debug) = update.debug {
375
- self.debug = debug;
376
- }
377
- if let Some(strip_tags) = update.strip_tags {
378
- self.strip_tags = strip_tags;
379
- }
380
- if let Some(preserve_tags) = update.preserve_tags {
381
- self.preserve_tags = preserve_tags;
382
- }
383
- if let Some(skip_images) = update.skip_images {
384
- self.skip_images = skip_images;
385
- }
386
- if let Some(output_format) = update.output_format {
387
- self.output_format = output_format;
388
- }
389
422
  }
390
423
 
391
- /// Create new conversion options from a partial update.
392
- ///
393
- /// Creates a new `ConversionOptions` struct with defaults, then applies the update.
394
- /// Fields not specified in the update keep their default values.
395
- ///
396
- /// # Arguments
397
- ///
398
- /// * `update` - Partial options update with fields to set
399
- ///
400
- /// # Returns
401
- ///
402
- /// New `ConversionOptions` with specified updates applied to defaults
424
+ /// Create from a partial update, applying to defaults.
403
425
  #[must_use]
404
426
  pub fn from_update(update: ConversionOptionsUpdate) -> Self {
405
427
  let mut options = Self::default();
@@ -414,28 +436,25 @@ impl From<ConversionOptionsUpdate> for ConversionOptions {
414
436
  }
415
437
  }
416
438
 
439
+ // ── Tests ───────────────────────────────────────────────────────────────────
440
+
417
441
  #[cfg(all(test, any(feature = "serde", feature = "metadata")))]
418
442
  mod tests {
419
443
  use super::*;
420
444
 
421
445
  #[test]
422
446
  fn test_conversion_options_serde() {
423
- let options = ConversionOptions {
424
- heading_style: HeadingStyle::AtxClosed,
425
- list_indent_width: 4,
426
- bullets: "*".to_string(),
427
- escape_asterisks: true,
428
- whitespace_mode: WhitespaceMode::Strict,
429
- ..Default::default()
430
- };
431
-
432
- // Serialize to JSON
433
- let json = serde_json::to_string(&options).expect("Failed to serialize");
447
+ let options = ConversionOptions::builder()
448
+ .heading_style(HeadingStyle::AtxClosed)
449
+ .list_indent_width(4)
450
+ .bullets("*")
451
+ .escape_asterisks(true)
452
+ .whitespace_mode(WhitespaceMode::Strict)
453
+ .build();
434
454
 
435
- // Deserialize back
455
+ let json = serde_json::to_string(&options).expect("Failed to serialize");
436
456
  let deserialized: ConversionOptions = serde_json::from_str(&json).expect("Failed to deserialize");
437
457
 
438
- // Verify values
439
458
  assert_eq!(deserialized.list_indent_width, 4);
440
459
  assert_eq!(deserialized.bullets, "*");
441
460
  assert!(deserialized.escape_asterisks);
@@ -445,7 +464,6 @@ mod tests {
445
464
 
446
465
  #[test]
447
466
  fn test_conversion_options_partial_deserialization() {
448
- // Test that partial JSON can be deserialized using defaults for missing fields
449
467
  let partial_json = r#"{
450
468
  "headingStyle": "atxClosed",
451
469
  "listIndentWidth": 4,
@@ -455,14 +473,28 @@ mod tests {
455
473
  let deserialized: ConversionOptions =
456
474
  serde_json::from_str(partial_json).expect("Failed to deserialize partial JSON");
457
475
 
458
- // Verify specified values
459
476
  assert_eq!(deserialized.heading_style, HeadingStyle::AtxClosed);
460
477
  assert_eq!(deserialized.list_indent_width, 4);
461
478
  assert_eq!(deserialized.bullets, "*");
479
+ assert!(!deserialized.escape_asterisks);
480
+ assert!(!deserialized.escape_underscores);
481
+ assert_eq!(deserialized.list_indent_type, ListIndentType::Spaces);
482
+ }
462
483
 
463
- // Verify missing fields use defaults
464
- assert!(!deserialized.escape_asterisks); // default
465
- assert!(!deserialized.escape_underscores); // default
466
- assert_eq!(deserialized.list_indent_type, ListIndentType::Spaces); // default
484
+ #[test]
485
+ fn test_builder_pattern() {
486
+ let options = ConversionOptions::builder()
487
+ .heading_style(HeadingStyle::Underlined)
488
+ .wrap(true)
489
+ .wrap_width(100)
490
+ .include_document_structure(true)
491
+ .extract_images(true)
492
+ .build();
493
+
494
+ assert_eq!(options.heading_style, HeadingStyle::Underlined);
495
+ assert!(options.wrap);
496
+ assert_eq!(options.wrap_width, 100);
497
+ assert!(options.include_document_structure);
498
+ assert!(options.extract_images);
467
499
  }
468
500
  }