html-to-markdown 2.30.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -14
  3. data/README.md +37 -50
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +13 -701
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
  6. data/ext/html-to-markdown-rb/native/README.md +4 -13
  7. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
  8. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
  9. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
  10. data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
  11. data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
  12. data/lib/html_to_markdown/version.rb +1 -1
  13. data/lib/html_to_markdown.rb +13 -194
  14. data/sig/html_to_markdown.rbs +12 -373
  15. data/vendor/Cargo.toml +5 -2
  16. data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
  17. data/vendor/html-to-markdown-rs/README.md +126 -52
  18. data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
  19. data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
  20. data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
  21. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
  22. data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
  23. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
  24. data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
  25. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
  26. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
  27. data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
  28. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
  29. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
  30. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
  31. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
  32. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
  33. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
  38. data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
  43. data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
  44. data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
  45. data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
  46. data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
  47. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
  48. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
  49. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
  50. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
  52. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
  53. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
  54. data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
  56. data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -68
  57. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
  58. data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
  59. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
  60. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
  61. data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
  62. data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
  63. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
  65. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
  66. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
  67. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
  68. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
  69. data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
  70. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
  71. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
  72. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
  73. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
  74. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
  75. data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
  76. data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
  77. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  78. data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
  79. data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
  80. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
  81. data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
  82. data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -323
  83. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
  84. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
  85. data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
  86. data/vendor/html-to-markdown-rs/src/text.rs +25 -14
  87. data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
  88. data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
  89. data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
  90. data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
  91. data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
  92. data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
  93. data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
  94. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
  95. data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
  96. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
  97. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
  98. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
  99. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
  100. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
  101. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
  102. data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
  103. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
  104. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
  105. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
  106. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
  107. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
  108. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
  109. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
  110. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
  111. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
  112. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
  113. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
  114. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
  115. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
  116. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
  117. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
  118. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
  119. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
  120. data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
  121. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
  122. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
  123. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
  124. data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
  125. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
  126. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
  127. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
  128. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
  129. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
  130. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
  131. metadata +9 -37
  132. data/bin/benchmark.rb +0 -232
  133. data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
  134. data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
  135. data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
  136. data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
  137. data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
  138. data/spec/convert_spec.rb +0 -77
  139. data/spec/convert_with_tables_spec.rb +0 -194
  140. data/spec/metadata_extraction_spec.rb +0 -437
  141. data/spec/visitor_issue_187_spec.rb +0 -605
  142. data/spec/visitor_spec.rb +0 -1149
  143. data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
  144. data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
  145. data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
  146. data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
  147. data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
  148. data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
  149. data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
  150. data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
  151. data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
  152. data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -42
  153. data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
  154. data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
  155. data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
  156. data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
  157. data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
  158. data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
  159. data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
  160. data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
  161. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
  162. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
  163. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
  164. data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
  165. data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
  166. data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
@@ -1,18 +1,27 @@
1
1
  #![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]
2
2
 
3
- //! Main conversion options and their builder utilities.
4
- //!
5
- //! This module provides the primary `ConversionOptions` struct with all configuration
6
- //! settings for HTML to Markdown conversion, along with partial update support for
7
- //! selective option modifications.
3
+ //! Main conversion options with builder pattern.
8
4
 
9
5
  use crate::options::preprocessing::PreprocessingOptions;
10
- use crate::options::preprocessing::PreprocessingOptionsUpdate;
11
6
  use crate::options::validation::{
12
7
  CodeBlockStyle, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle, OutputFormat, WhitespaceMode,
13
8
  };
14
9
 
15
10
  /// Main conversion options for HTML to Markdown conversion.
11
+ ///
12
+ /// Use [`ConversionOptions::builder()`] to construct, or [`Default::default()`] for defaults.
13
+ ///
14
+ /// # Example
15
+ ///
16
+ /// ```rust,ignore
17
+ /// use html_to_markdown_rs::ConversionOptions;
18
+ ///
19
+ /// let options = ConversionOptions::builder()
20
+ /// .heading_style(HeadingStyle::Atx)
21
+ /// .wrap(true)
22
+ /// .wrap_width(100)
23
+ /// .build();
24
+ /// ```
16
25
  #[derive(Debug, Clone)]
17
26
  #[cfg_attr(
18
27
  any(feature = "serde", feature = "metadata"),
@@ -20,390 +29,399 @@ use crate::options::validation::{
20
29
  )]
21
30
  #[cfg_attr(
22
31
  any(feature = "serde", feature = "metadata"),
23
- serde(rename_all = "camelCase", default)
32
+ serde(rename_all = "camelCase", default, deny_unknown_fields)
24
33
  )]
25
34
  pub struct ConversionOptions {
26
- /// Heading style (Underlined, Atx, `AtxClosed`)
35
+ /// Heading style to use in Markdown output (ATX `#` or Setext underline).
27
36
  pub heading_style: HeadingStyle,
28
-
29
- /// List indentation type (Spaces or Tabs)
37
+ /// How to indent nested list items (spaces or tab).
30
38
  pub list_indent_type: ListIndentType,
31
-
32
- /// List indentation width in spaces (applied if using spaces indentation)
39
+ /// Number of spaces (or tabs) to use for each level of list indentation.
33
40
  pub list_indent_width: usize,
34
-
35
- /// Bullet characters for unordered lists (e.g., "-", "*", "+")
41
+ /// Bullet character(s) to use for unordered list items (e.g. `"-"`, `"*"`).
36
42
  pub bullets: String,
37
-
38
- /// Symbol for strong/emphasis emphasis rendering (* or _)
43
+ /// Character used for bold/italic emphasis markers (`*` or `_`).
39
44
  pub strong_em_symbol: char,
40
-
41
- /// Escape asterisks (*) in text to prevent accidental formatting
45
+ /// Escape `*` characters in plain text to avoid unintended bold/italic.
42
46
  pub escape_asterisks: bool,
43
-
44
- /// Escape underscores (_) in text to prevent accidental formatting
47
+ /// Escape `_` characters in plain text to avoid unintended bold/italic.
45
48
  pub escape_underscores: bool,
46
-
47
- /// Escape miscellaneous markdown characters (\ & < ` [ > ~ # = + | -)
49
+ /// Escape miscellaneous Markdown metacharacters (`[]()#` etc.) in plain text.
48
50
  pub escape_misc: bool,
49
-
50
- /// Escape all ASCII punctuation characters (for `CommonMark` spec compliance tests)
51
+ /// Escape ASCII characters that have special meaning in certain Markdown dialects.
51
52
  pub escape_ascii: bool,
52
-
53
- /// Default code language for fenced code blocks when not specified
53
+ /// Default language annotation for fenced code blocks that have no language hint.
54
54
  pub code_language: String,
55
-
56
- /// Use autolinks syntax for bare URLs (<http://example.com>)
55
+ /// Automatically convert bare URLs into Markdown autolinks.
57
56
  pub autolinks: bool,
58
-
59
- /// Add default title element to HTML if none exists before conversion
57
+ /// Emit a default title when no `<title>` tag is present.
60
58
  pub default_title: bool,
61
-
62
- /// Use HTML <br> elements in tables instead of spaces for line breaks
59
+ /// Render `<br>` elements inside table cells as literal line breaks.
63
60
  pub br_in_tables: bool,
64
-
65
- /// Enable spatial table reconstruction in hOCR documents (via spatial positioning analysis).
66
- ///
67
- /// **Deprecated since 2.30.0**: hOCR support will be removed in v3.
68
- pub hocr_spatial_tables: bool,
69
-
70
- /// Highlight style for <mark> elements (`DoubleEqual`, Html, Bold, None)
61
+ /// Style used for `<mark>` / highlighted text (e.g. `==text==`).
71
62
  pub highlight_style: HighlightStyle,
72
-
73
- /// Extract metadata from HTML (title, description, images, links, etc.)
63
+ /// Extract `<meta>` and `<head>` information into the result metadata.
74
64
  pub extract_metadata: bool,
75
-
76
- /// Whitespace handling mode (Normalized collapses multiple spaces, Strict preserves)
65
+ /// Controls how whitespace is normalised during conversion.
77
66
  pub whitespace_mode: WhitespaceMode,
78
-
79
- /// Strip newline characters from HTML before processing
67
+ /// Strip all newlines from the output, producing a single-line result.
80
68
  pub strip_newlines: bool,
81
-
82
- /// Enable automatic text wrapping at `wrap_width`
69
+ /// Wrap long lines at [`wrap_width`](Self::wrap_width) characters.
83
70
  pub wrap: bool,
84
-
85
- /// Text wrapping width in characters (default 80)
71
+ /// Maximum line width when [`wrap`](Self::wrap) is enabled (default `80`).
86
72
  pub wrap_width: usize,
87
-
88
- /// Treat block-level elements as inline during conversion
73
+ /// Treat the entire document as inline content (no block-level wrappers).
89
74
  pub convert_as_inline: bool,
90
-
91
- /// Custom symbol for subscript content (e.g., "~")
75
+ /// Markdown notation for subscript text (e.g. `"~"`).
92
76
  pub sub_symbol: String,
93
-
94
- /// Custom symbol for superscript content (e.g., "^")
77
+ /// Markdown notation for superscript text (e.g. `"^"`).
95
78
  pub sup_symbol: String,
96
-
97
- /// Newline style in markdown output (Spaces adds two spaces, Backslash adds \)
79
+ /// How to encode hard line breaks (`<br>`) in Markdown.
98
80
  pub newline_style: NewlineStyle,
99
-
100
- /// Code block fence style (Indented, Backticks, Tildes)
81
+ /// Style used for fenced code blocks (backticks or tilde).
101
82
  pub code_block_style: CodeBlockStyle,
102
-
103
- /// HTML elements where images should remain as markdown links (not converted to alt text)
83
+ /// HTML tag names whose `<img>` children are kept inline instead of block.
104
84
  pub keep_inline_images_in: Vec<String>,
105
-
106
- /// HTML preprocessing options (remove nav, forms, etc.)
85
+ /// Pre-processing options applied to the HTML before conversion.
107
86
  pub preprocessing: PreprocessingOptions,
108
-
109
- /// Source document encoding (informational, typically "utf-8")
87
+ /// Expected character encoding of the input HTML (default `"utf-8"`).
110
88
  pub encoding: String,
111
-
112
- /// Enable debug mode with diagnostic warnings on conversion issues
89
+ /// Emit debug information during conversion.
113
90
  pub debug: bool,
114
-
115
- /// HTML tags to strip (extract text content, no markdown conversion)
91
+ /// HTML tag names whose content is stripped from the output entirely.
116
92
  pub strip_tags: Vec<String>,
117
-
118
- /// HTML tags to preserve as-is in output (keep original HTML, useful for complex tables)
93
+ /// HTML tag names that are preserved verbatim in the output.
119
94
  pub preserve_tags: Vec<String>,
120
-
121
- /// Skip all images during conversion.
122
- /// When enabled, all `<img>` elements are completely omitted from output.
123
- /// Useful for text-only extraction or filtering out visual content.
95
+ /// Skip conversion of `<img>` elements (omit images from output).
124
96
  pub skip_images: bool,
125
-
126
- /// Output format for conversion (Markdown, Djot, or Plain)
97
+ /// Target output format (Markdown, plain text, etc.).
127
98
  pub output_format: OutputFormat,
99
+ /// Include structured document tree in result.
100
+ pub include_document_structure: bool,
101
+ /// Extract inline images from data URIs and SVGs.
102
+ pub extract_images: bool,
103
+ /// Maximum decoded image size in bytes (default 5MB).
104
+ pub max_image_size: u64,
105
+ /// Capture SVG elements as images.
106
+ pub capture_svg: bool,
107
+ /// Infer image dimensions from data.
108
+ pub infer_dimensions: bool,
109
+ }
110
+
111
+ impl Default for ConversionOptions {
112
+ fn default() -> Self {
113
+ Self {
114
+ heading_style: HeadingStyle::default(),
115
+ list_indent_type: ListIndentType::default(),
116
+ list_indent_width: 2,
117
+ bullets: "-".to_string(),
118
+ strong_em_symbol: '*',
119
+ escape_asterisks: false,
120
+ escape_underscores: false,
121
+ escape_misc: false,
122
+ escape_ascii: false,
123
+ code_language: String::new(),
124
+ autolinks: true,
125
+ default_title: false,
126
+ br_in_tables: false,
127
+ highlight_style: HighlightStyle::default(),
128
+ extract_metadata: true,
129
+ whitespace_mode: WhitespaceMode::default(),
130
+ strip_newlines: false,
131
+ wrap: false,
132
+ wrap_width: 80,
133
+ convert_as_inline: false,
134
+ sub_symbol: String::new(),
135
+ sup_symbol: String::new(),
136
+ newline_style: NewlineStyle::Spaces,
137
+ code_block_style: CodeBlockStyle::default(),
138
+ keep_inline_images_in: Vec::new(),
139
+ preprocessing: PreprocessingOptions::default(),
140
+ encoding: "utf-8".to_string(),
141
+ debug: false,
142
+ strip_tags: Vec::new(),
143
+ preserve_tags: Vec::new(),
144
+ skip_images: false,
145
+ output_format: OutputFormat::default(),
146
+ include_document_structure: false,
147
+ extract_images: false,
148
+ max_image_size: 5_242_880,
149
+ capture_svg: false,
150
+ infer_dimensions: true,
151
+ }
152
+ }
153
+ }
154
+
155
+ impl ConversionOptions {
156
+ /// Create a new builder with default values.
157
+ #[must_use]
158
+ pub fn builder() -> ConversionOptionsBuilder {
159
+ ConversionOptionsBuilder(Self::default())
160
+ }
161
+ }
162
+
163
+ // ── Builder ─────────────────────────────────────────────────────────────────
164
+
165
+ /// Builder for [`ConversionOptions`].
166
+ ///
167
+ /// All fields start with default values. Call `.build()` to produce the final options.
168
+ #[derive(Debug, Clone)]
169
+ pub struct ConversionOptionsBuilder(ConversionOptions);
170
+
171
+ macro_rules! builder_setter {
172
+ ($name:ident, $ty:ty) => {
173
+ /// Set the value.
174
+ #[must_use]
175
+ pub fn $name(mut self, value: $ty) -> Self {
176
+ self.0.$name = value;
177
+ self
178
+ }
179
+ };
180
+ }
181
+
182
+ macro_rules! builder_setter_into {
183
+ ($name:ident, $ty:ty) => {
184
+ /// Set the value.
185
+ #[must_use]
186
+ pub fn $name(mut self, value: impl Into<$ty>) -> Self {
187
+ self.0.$name = value.into();
188
+ self
189
+ }
190
+ };
191
+ }
192
+
193
+ impl ConversionOptionsBuilder {
194
+ // Output control
195
+ builder_setter!(output_format, OutputFormat);
196
+ builder_setter!(include_document_structure, bool);
197
+ builder_setter!(extract_metadata, bool);
198
+ builder_setter!(extract_images, bool);
199
+
200
+ // Markdown formatting
201
+ builder_setter!(heading_style, HeadingStyle);
202
+ builder_setter!(list_indent_type, ListIndentType);
203
+ builder_setter!(list_indent_width, usize);
204
+ builder_setter_into!(bullets, String);
205
+ builder_setter!(strong_em_symbol, char);
206
+ builder_setter!(code_block_style, CodeBlockStyle);
207
+ builder_setter!(newline_style, NewlineStyle);
208
+ builder_setter!(highlight_style, HighlightStyle);
209
+ builder_setter_into!(code_language, String);
210
+ builder_setter!(autolinks, bool);
211
+ builder_setter!(default_title, bool);
212
+ builder_setter!(br_in_tables, bool);
213
+ builder_setter_into!(sub_symbol, String);
214
+ builder_setter_into!(sup_symbol, String);
215
+
216
+ // Escaping
217
+ builder_setter!(escape_asterisks, bool);
218
+ builder_setter!(escape_underscores, bool);
219
+ builder_setter!(escape_misc, bool);
220
+ builder_setter!(escape_ascii, bool);
221
+
222
+ // Whitespace / wrapping
223
+ builder_setter!(whitespace_mode, WhitespaceMode);
224
+ builder_setter!(strip_newlines, bool);
225
+ builder_setter!(wrap, bool);
226
+ builder_setter!(wrap_width, usize);
227
+
228
+ // Element handling
229
+ builder_setter!(convert_as_inline, bool);
230
+ builder_setter!(skip_images, bool);
231
+
232
+ /// Set the list of HTML tag names whose content is stripped from output.
233
+ #[must_use]
234
+ pub fn strip_tags(mut self, tags: Vec<String>) -> Self {
235
+ self.0.strip_tags = tags;
236
+ self
237
+ }
238
+
239
+ /// Set the list of HTML tag names that are preserved verbatim in output.
240
+ #[must_use]
241
+ pub fn preserve_tags(mut self, tags: Vec<String>) -> Self {
242
+ self.0.preserve_tags = tags;
243
+ self
244
+ }
245
+
246
+ /// Set the list of HTML tag names whose `<img>` children are kept inline.
247
+ #[must_use]
248
+ pub fn keep_inline_images_in(mut self, tags: Vec<String>) -> Self {
249
+ self.0.keep_inline_images_in = tags;
250
+ self
251
+ }
252
+
253
+ // Image extraction config
254
+ builder_setter!(max_image_size, u64);
255
+ builder_setter!(capture_svg, bool);
256
+ builder_setter!(infer_dimensions, bool);
257
+
258
+ // Preprocessing
259
+ /// Set the pre-processing options applied to the HTML before conversion.
260
+ #[must_use]
261
+ pub fn preprocessing(mut self, preprocessing: PreprocessingOptions) -> Self {
262
+ self.0.preprocessing = preprocessing;
263
+ self
264
+ }
265
+
266
+ // Encoding
267
+ builder_setter_into!(encoding, String);
268
+
269
+ // Debug
270
+ builder_setter!(debug, bool);
271
+
272
+ /// Build the final [`ConversionOptions`].
273
+ #[must_use]
274
+ pub fn build(self) -> ConversionOptions {
275
+ self.0
276
+ }
128
277
  }
129
278
 
279
+ // ── ConversionOptionsUpdate (for binding crate compatibility) ────────────
280
+
281
+ use crate::options::preprocessing::PreprocessingOptionsUpdate;
282
+
130
283
  /// Partial update for `ConversionOptions`.
131
284
  ///
132
- /// This struct uses `Option<T>` to represent optional fields that can be selectively updated.
133
- /// Only specified fields (Some values) will override existing options; None values leave the
134
- /// corresponding fields unchanged when applied via [`ConversionOptions::apply_update`].
285
+ /// Uses `Option<T>` fields for selective updates. Bindings use this to construct
286
+ /// options from language-native types. Prefer [`ConversionOptionsBuilder`] for Rust code.
135
287
  #[derive(Debug, Clone, Default)]
136
288
  #[cfg_attr(
137
289
  any(feature = "serde", feature = "metadata"),
138
290
  derive(serde::Serialize, serde::Deserialize)
139
291
  )]
140
- #[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
292
+ #[cfg_attr(
293
+ any(feature = "serde", feature = "metadata"),
294
+ serde(rename_all = "camelCase", deny_unknown_fields)
295
+ )]
141
296
  pub struct ConversionOptionsUpdate {
142
- /// Optional heading style override (Underlined, Atx, `AtxClosed`)
297
+ /// Optional override for [`ConversionOptions::heading_style`].
143
298
  pub heading_style: Option<HeadingStyle>,
144
-
145
- /// Optional list indentation type override (Spaces or Tabs)
299
+ /// Optional override for [`ConversionOptions::list_indent_type`].
146
300
  pub list_indent_type: Option<ListIndentType>,
147
-
148
- /// Optional list indentation width override in spaces
301
+ /// Optional override for [`ConversionOptions::list_indent_width`].
149
302
  pub list_indent_width: Option<usize>,
150
-
151
- /// Optional bullet characters override for unordered lists
303
+ /// Optional override for [`ConversionOptions::bullets`].
152
304
  pub bullets: Option<String>,
153
-
154
- /// Optional strong/emphasis symbol override (* or _)
305
+ /// Optional override for [`ConversionOptions::strong_em_symbol`].
155
306
  pub strong_em_symbol: Option<char>,
156
-
157
- /// Optional asterisk escaping override in text content
307
+ /// Optional override for [`ConversionOptions::escape_asterisks`].
158
308
  pub escape_asterisks: Option<bool>,
159
-
160
- /// Optional underscore escaping override in text content
309
+ /// Optional override for [`ConversionOptions::escape_underscores`].
161
310
  pub escape_underscores: Option<bool>,
162
-
163
- /// Optional miscellaneous character escaping override (\ & < ` [ > ~ # = + | -)
311
+ /// Optional override for [`ConversionOptions::escape_misc`].
164
312
  pub escape_misc: Option<bool>,
165
-
166
- /// Optional ASCII punctuation escaping override (for spec compliance testing)
313
+ /// Optional override for [`ConversionOptions::escape_ascii`].
167
314
  pub escape_ascii: Option<bool>,
168
-
169
- /// Optional default code language override for fenced code blocks
315
+ /// Optional override for [`ConversionOptions::code_language`].
170
316
  pub code_language: Option<String>,
171
-
172
- /// Optional autolinks syntax override for bare URLs
317
+ /// Optional override for [`ConversionOptions::autolinks`].
173
318
  pub autolinks: Option<bool>,
174
-
175
- /// Optional default title element injection override
319
+ /// Optional override for [`ConversionOptions::default_title`].
176
320
  pub default_title: Option<bool>,
177
-
178
- /// Optional HTML <br> usage in tables override
321
+ /// Optional override for [`ConversionOptions::br_in_tables`].
179
322
  pub br_in_tables: Option<bool>,
180
-
181
- /// Optional spatial table reconstruction for hOCR documents override.
182
- ///
183
- /// **Deprecated since 2.30.0**: hOCR support will be removed in v3.
184
- pub hocr_spatial_tables: Option<bool>,
185
-
186
- /// Optional highlight style override for <mark> elements
323
+ /// Optional override for [`ConversionOptions::highlight_style`].
187
324
  pub highlight_style: Option<HighlightStyle>,
188
-
189
- /// Optional metadata extraction override (title, description, images, links)
325
+ /// Optional override for [`ConversionOptions::extract_metadata`].
190
326
  pub extract_metadata: Option<bool>,
191
-
192
- /// Optional whitespace handling mode override (Normalized or Strict)
327
+ /// Optional override for [`ConversionOptions::whitespace_mode`].
193
328
  pub whitespace_mode: Option<WhitespaceMode>,
194
-
195
- /// Optional newline stripping override before processing
329
+ /// Optional override for [`ConversionOptions::strip_newlines`].
196
330
  pub strip_newlines: Option<bool>,
197
-
198
- /// Optional automatic text wrapping override
331
+ /// Optional override for [`ConversionOptions::wrap`].
199
332
  pub wrap: Option<bool>,
200
-
201
- /// Optional text wrapping width override in characters
333
+ /// Optional override for [`ConversionOptions::wrap_width`].
202
334
  pub wrap_width: Option<usize>,
203
-
204
- /// Optional block-level to inline conversion override
335
+ /// Optional override for [`ConversionOptions::convert_as_inline`].
205
336
  pub convert_as_inline: Option<bool>,
206
-
207
- /// Optional subscript symbol override
337
+ /// Optional override for [`ConversionOptions::sub_symbol`].
208
338
  pub sub_symbol: Option<String>,
209
-
210
- /// Optional superscript symbol override
339
+ /// Optional override for [`ConversionOptions::sup_symbol`].
211
340
  pub sup_symbol: Option<String>,
212
-
213
- /// Optional newline style override for markdown output
341
+ /// Optional override for [`ConversionOptions::newline_style`].
214
342
  pub newline_style: Option<NewlineStyle>,
215
-
216
- /// Optional code block fence style override (Indented, Backticks, Tildes)
343
+ /// Optional override for [`ConversionOptions::code_block_style`].
217
344
  pub code_block_style: Option<CodeBlockStyle>,
218
-
219
- /// Optional context elements where images remain as markdown links override
345
+ /// Optional override for [`ConversionOptions::keep_inline_images_in`].
220
346
  pub keep_inline_images_in: Option<Vec<String>>,
221
-
222
- /// Optional preprocessing options partial update
347
+ /// Optional override for [`ConversionOptions::preprocessing`].
223
348
  pub preprocessing: Option<PreprocessingOptionsUpdate>,
224
-
225
- /// Optional source document encoding override
349
+ /// Optional override for [`ConversionOptions::encoding`].
226
350
  pub encoding: Option<String>,
227
-
228
- /// Optional debug mode override for diagnostic warnings
351
+ /// Optional override for [`ConversionOptions::debug`].
229
352
  pub debug: Option<bool>,
230
-
231
- /// Optional HTML tags to strip override (extract text, no conversion)
353
+ /// Optional override for [`ConversionOptions::strip_tags`].
232
354
  pub strip_tags: Option<Vec<String>>,
233
-
234
- /// Optional HTML tags to preserve as-is override in output
355
+ /// Optional override for [`ConversionOptions::preserve_tags`].
235
356
  pub preserve_tags: Option<Vec<String>>,
236
-
237
- /// Optional skip images override
357
+ /// Optional override for [`ConversionOptions::skip_images`].
238
358
  pub skip_images: Option<bool>,
239
-
240
- /// Optional output format override (Markdown, Djot, or Plain)
359
+ /// Optional override for [`ConversionOptions::output_format`].
241
360
  pub output_format: Option<OutputFormat>,
242
- }
243
-
244
- impl Default for ConversionOptions {
245
- fn default() -> Self {
246
- Self {
247
- heading_style: HeadingStyle::default(),
248
- list_indent_type: ListIndentType::default(),
249
- list_indent_width: 2,
250
- bullets: "-".to_string(),
251
- strong_em_symbol: '*',
252
- escape_asterisks: false,
253
- escape_underscores: false,
254
- escape_misc: false,
255
- escape_ascii: false,
256
- code_language: String::new(),
257
- autolinks: true,
258
- default_title: false,
259
- br_in_tables: false,
260
- hocr_spatial_tables: true,
261
- highlight_style: HighlightStyle::default(),
262
- extract_metadata: true,
263
- whitespace_mode: WhitespaceMode::default(),
264
- strip_newlines: false,
265
- wrap: false,
266
- wrap_width: 80,
267
- convert_as_inline: false,
268
- sub_symbol: String::new(),
269
- sup_symbol: String::new(),
270
- newline_style: NewlineStyle::Spaces,
271
- code_block_style: CodeBlockStyle::default(),
272
- keep_inline_images_in: Vec::new(),
273
- preprocessing: PreprocessingOptions::default(),
274
- encoding: "utf-8".to_string(),
275
- debug: false,
276
- strip_tags: Vec::new(),
277
- preserve_tags: Vec::new(),
278
- skip_images: false,
279
- output_format: OutputFormat::default(),
280
- }
281
- }
361
+ /// Optional override for [`ConversionOptions::include_document_structure`].
362
+ pub include_document_structure: Option<bool>,
363
+ /// Optional override for [`ConversionOptions::extract_images`].
364
+ pub extract_images: Option<bool>,
365
+ /// Optional override for [`ConversionOptions::max_image_size`].
366
+ pub max_image_size: Option<u64>,
367
+ /// Optional override for [`ConversionOptions::capture_svg`].
368
+ pub capture_svg: Option<bool>,
369
+ /// Optional override for [`ConversionOptions::infer_dimensions`].
370
+ pub infer_dimensions: Option<bool>,
282
371
  }
283
372
 
284
373
  impl ConversionOptions {
285
374
  /// Apply a partial update to these conversion options.
286
- ///
287
- /// Any specified fields in the update will override the current values.
288
- /// Unspecified fields (None) are left unchanged.
289
- ///
290
- /// # Arguments
291
- ///
292
- /// * `update` - Partial options update with fields to override
293
375
  pub fn apply_update(&mut self, update: ConversionOptionsUpdate) {
294
- if let Some(heading_style) = update.heading_style {
295
- self.heading_style = heading_style;
296
- }
297
- if let Some(list_indent_type) = update.list_indent_type {
298
- self.list_indent_type = list_indent_type;
299
- }
300
- if let Some(list_indent_width) = update.list_indent_width {
301
- self.list_indent_width = list_indent_width;
302
- }
303
- if let Some(bullets) = update.bullets {
304
- self.bullets = bullets;
305
- }
306
- if let Some(strong_em_symbol) = update.strong_em_symbol {
307
- self.strong_em_symbol = strong_em_symbol;
308
- }
309
- if let Some(escape_asterisks) = update.escape_asterisks {
310
- self.escape_asterisks = escape_asterisks;
311
- }
312
- if let Some(escape_underscores) = update.escape_underscores {
313
- self.escape_underscores = escape_underscores;
314
- }
315
- if let Some(escape_misc) = update.escape_misc {
316
- self.escape_misc = escape_misc;
317
- }
318
- if let Some(escape_ascii) = update.escape_ascii {
319
- self.escape_ascii = escape_ascii;
320
- }
321
- if let Some(code_language) = update.code_language {
322
- self.code_language = code_language;
323
- }
324
- if let Some(autolinks) = update.autolinks {
325
- self.autolinks = autolinks;
326
- }
327
- if let Some(default_title) = update.default_title {
328
- self.default_title = default_title;
329
- }
330
- if let Some(br_in_tables) = update.br_in_tables {
331
- self.br_in_tables = br_in_tables;
332
- }
333
- if let Some(hocr_spatial_tables) = update.hocr_spatial_tables {
334
- self.hocr_spatial_tables = hocr_spatial_tables;
335
- }
336
- if let Some(highlight_style) = update.highlight_style {
337
- self.highlight_style = highlight_style;
338
- }
339
- if let Some(extract_metadata) = update.extract_metadata {
340
- self.extract_metadata = extract_metadata;
341
- }
342
- if let Some(whitespace_mode) = update.whitespace_mode {
343
- self.whitespace_mode = whitespace_mode;
344
- }
345
- if let Some(strip_newlines) = update.strip_newlines {
346
- self.strip_newlines = strip_newlines;
347
- }
348
- if let Some(wrap) = update.wrap {
349
- self.wrap = wrap;
350
- }
351
- if let Some(wrap_width) = update.wrap_width {
352
- self.wrap_width = wrap_width;
353
- }
354
- if let Some(convert_as_inline) = update.convert_as_inline {
355
- self.convert_as_inline = convert_as_inline;
356
- }
357
- if let Some(sub_symbol) = update.sub_symbol {
358
- self.sub_symbol = sub_symbol;
359
- }
360
- if let Some(sup_symbol) = update.sup_symbol {
361
- self.sup_symbol = sup_symbol;
362
- }
363
- if let Some(newline_style) = update.newline_style {
364
- self.newline_style = newline_style;
365
- }
366
- if let Some(code_block_style) = update.code_block_style {
367
- self.code_block_style = code_block_style;
368
- }
369
- if let Some(keep_inline_images_in) = update.keep_inline_images_in {
370
- self.keep_inline_images_in = keep_inline_images_in;
371
- }
376
+ macro_rules! apply {
377
+ ($field:ident) => {
378
+ if let Some(v) = update.$field {
379
+ self.$field = v;
380
+ }
381
+ };
382
+ }
383
+ apply!(heading_style);
384
+ apply!(list_indent_type);
385
+ apply!(list_indent_width);
386
+ apply!(bullets);
387
+ apply!(strong_em_symbol);
388
+ apply!(escape_asterisks);
389
+ apply!(escape_underscores);
390
+ apply!(escape_misc);
391
+ apply!(escape_ascii);
392
+ apply!(code_language);
393
+ apply!(autolinks);
394
+ apply!(default_title);
395
+ apply!(br_in_tables);
396
+ apply!(highlight_style);
397
+ apply!(extract_metadata);
398
+ apply!(whitespace_mode);
399
+ apply!(strip_newlines);
400
+ apply!(wrap);
401
+ apply!(wrap_width);
402
+ apply!(convert_as_inline);
403
+ apply!(sub_symbol);
404
+ apply!(sup_symbol);
405
+ apply!(newline_style);
406
+ apply!(code_block_style);
407
+ apply!(keep_inline_images_in);
408
+ apply!(encoding);
409
+ apply!(debug);
410
+ apply!(strip_tags);
411
+ apply!(preserve_tags);
412
+ apply!(skip_images);
413
+ apply!(output_format);
414
+ apply!(include_document_structure);
415
+ apply!(extract_images);
416
+ apply!(max_image_size);
417
+ apply!(capture_svg);
418
+ apply!(infer_dimensions);
372
419
  if let Some(preprocessing) = update.preprocessing {
373
420
  self.preprocessing.apply_update(preprocessing);
374
421
  }
375
- if let Some(encoding) = update.encoding {
376
- self.encoding = encoding;
377
- }
378
- if let Some(debug) = update.debug {
379
- self.debug = debug;
380
- }
381
- if let Some(strip_tags) = update.strip_tags {
382
- self.strip_tags = strip_tags;
383
- }
384
- if let Some(preserve_tags) = update.preserve_tags {
385
- self.preserve_tags = preserve_tags;
386
- }
387
- if let Some(skip_images) = update.skip_images {
388
- self.skip_images = skip_images;
389
- }
390
- if let Some(output_format) = update.output_format {
391
- self.output_format = output_format;
392
- }
393
422
  }
394
423
 
395
- /// Create new conversion options from a partial update.
396
- ///
397
- /// Creates a new `ConversionOptions` struct with defaults, then applies the update.
398
- /// Fields not specified in the update keep their default values.
399
- ///
400
- /// # Arguments
401
- ///
402
- /// * `update` - Partial options update with fields to set
403
- ///
404
- /// # Returns
405
- ///
406
- /// New `ConversionOptions` with specified updates applied to defaults
424
+ /// Create from a partial update, applying to defaults.
407
425
  #[must_use]
408
426
  pub fn from_update(update: ConversionOptionsUpdate) -> Self {
409
427
  let mut options = Self::default();
@@ -418,28 +436,25 @@ impl From<ConversionOptionsUpdate> for ConversionOptions {
418
436
  }
419
437
  }
420
438
 
439
+ // ── Tests ───────────────────────────────────────────────────────────────────
440
+
421
441
  #[cfg(all(test, any(feature = "serde", feature = "metadata")))]
422
442
  mod tests {
423
443
  use super::*;
424
444
 
425
445
  #[test]
426
446
  fn test_conversion_options_serde() {
427
- let options = ConversionOptions {
428
- heading_style: HeadingStyle::AtxClosed,
429
- list_indent_width: 4,
430
- bullets: "*".to_string(),
431
- escape_asterisks: true,
432
- whitespace_mode: WhitespaceMode::Strict,
433
- ..Default::default()
434
- };
435
-
436
- // Serialize to JSON
437
- let json = serde_json::to_string(&options).expect("Failed to serialize");
447
+ let options = ConversionOptions::builder()
448
+ .heading_style(HeadingStyle::AtxClosed)
449
+ .list_indent_width(4)
450
+ .bullets("*")
451
+ .escape_asterisks(true)
452
+ .whitespace_mode(WhitespaceMode::Strict)
453
+ .build();
438
454
 
439
- // Deserialize back
455
+ let json = serde_json::to_string(&options).expect("Failed to serialize");
440
456
  let deserialized: ConversionOptions = serde_json::from_str(&json).expect("Failed to deserialize");
441
457
 
442
- // Verify values
443
458
  assert_eq!(deserialized.list_indent_width, 4);
444
459
  assert_eq!(deserialized.bullets, "*");
445
460
  assert!(deserialized.escape_asterisks);
@@ -449,7 +464,6 @@ mod tests {
449
464
 
450
465
  #[test]
451
466
  fn test_conversion_options_partial_deserialization() {
452
- // Test that partial JSON can be deserialized using defaults for missing fields
453
467
  let partial_json = r#"{
454
468
  "headingStyle": "atxClosed",
455
469
  "listIndentWidth": 4,
@@ -459,14 +473,28 @@ mod tests {
459
473
  let deserialized: ConversionOptions =
460
474
  serde_json::from_str(partial_json).expect("Failed to deserialize partial JSON");
461
475
 
462
- // Verify specified values
463
476
  assert_eq!(deserialized.heading_style, HeadingStyle::AtxClosed);
464
477
  assert_eq!(deserialized.list_indent_width, 4);
465
478
  assert_eq!(deserialized.bullets, "*");
479
+ assert!(!deserialized.escape_asterisks);
480
+ assert!(!deserialized.escape_underscores);
481
+ assert_eq!(deserialized.list_indent_type, ListIndentType::Spaces);
482
+ }
466
483
 
467
- // Verify missing fields use defaults
468
- assert!(!deserialized.escape_asterisks); // default
469
- assert!(!deserialized.escape_underscores); // default
470
- assert_eq!(deserialized.list_indent_type, ListIndentType::Spaces); // default
484
+ #[test]
485
+ fn test_builder_pattern() {
486
+ let options = ConversionOptions::builder()
487
+ .heading_style(HeadingStyle::Underlined)
488
+ .wrap(true)
489
+ .wrap_width(100)
490
+ .include_document_structure(true)
491
+ .extract_images(true)
492
+ .build();
493
+
494
+ assert_eq!(options.heading_style, HeadingStyle::Underlined);
495
+ assert!(options.wrap);
496
+ assert_eq!(options.wrap_width, 100);
497
+ assert!(options.include_document_structure);
498
+ assert!(options.extract_images);
471
499
  }
472
500
  }