html-to-markdown 2.29.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +18 -41
  3. data/README.md +37 -50
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +17 -705
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
  6. data/ext/html-to-markdown-rb/native/README.md +4 -13
  7. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
  8. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
  9. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
  10. data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
  11. data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
  12. data/lib/html_to_markdown/version.rb +1 -1
  13. data/lib/html_to_markdown.rb +13 -194
  14. data/sig/html_to_markdown.rbs +12 -373
  15. data/vendor/Cargo.toml +7 -4
  16. data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
  17. data/vendor/html-to-markdown-rs/README.md +127 -51
  18. data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
  19. data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
  20. data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
  21. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
  22. data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
  23. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
  24. data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
  25. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
  26. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
  27. data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
  28. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
  29. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
  30. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
  31. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
  32. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
  33. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
  38. data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
  43. data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
  44. data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
  45. data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
  46. data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
  47. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
  48. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
  49. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
  50. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
  52. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
  53. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
  54. data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
  56. data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -67
  57. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
  58. data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
  59. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
  60. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
  61. data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
  62. data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
  63. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
  65. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
  66. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
  67. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
  68. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
  69. data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
  70. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
  71. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
  72. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
  73. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
  74. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
  75. data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
  76. data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
  77. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  78. data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
  79. data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
  80. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
  81. data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
  82. data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -319
  83. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
  84. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
  85. data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
  86. data/vendor/html-to-markdown-rs/src/text.rs +25 -14
  87. data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
  88. data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
  89. data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
  90. data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
  91. data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
  92. data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
  93. data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
  94. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
  95. data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
  96. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
  97. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
  98. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
  99. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
  100. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
  101. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
  102. data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
  103. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
  104. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
  105. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
  106. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
  107. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
  108. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
  109. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
  110. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
  111. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
  112. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
  113. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
  114. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
  115. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
  116. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
  117. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
  118. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
  119. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
  120. data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
  121. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
  122. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
  123. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
  124. data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
  125. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
  126. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
  127. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
  128. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
  129. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
  130. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
  131. metadata +9 -37
  132. data/bin/benchmark.rb +0 -232
  133. data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
  134. data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
  135. data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
  136. data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
  137. data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
  138. data/spec/convert_spec.rb +0 -77
  139. data/spec/convert_with_tables_spec.rb +0 -194
  140. data/spec/metadata_extraction_spec.rb +0 -437
  141. data/spec/visitor_issue_187_spec.rb +0 -605
  142. data/spec/visitor_spec.rb +0 -1149
  143. data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
  144. data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
  145. data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
  146. data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
  147. data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
  148. data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
  149. data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
  150. data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
  151. data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
  152. data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -31
  153. data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
  154. data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
  155. data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
  156. data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
  157. data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
  158. data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
  159. data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
  160. data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
  161. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
  162. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
  163. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
  164. data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
  165. data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
  166. data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
@@ -1,215 +0,0 @@
1
- use html_to_markdown_rs::{ConversionError, Result};
2
- use std::path::PathBuf;
3
-
4
- #[cfg(all(not(target_os = "windows"), feature = "profiling"))]
5
- mod enabled {
6
- use super::{ConversionError, PathBuf, Result};
7
- use std::sync::atomic::{AtomicBool, Ordering};
8
- use std::sync::{Mutex, OnceLock};
9
-
10
- const ENV_OUTPUT: &str = "HTML_TO_MARKDOWN_PROFILE_OUTPUT";
11
- const ENV_FREQUENCY: &str = "HTML_TO_MARKDOWN_PROFILE_FREQUENCY";
12
- const ENV_ONCE: &str = "HTML_TO_MARKDOWN_PROFILE_ONCE";
13
-
14
- static PROFILED_ONCE: AtomicBool = AtomicBool::new(false);
15
- static PROFILE_ACTIVE: AtomicBool = AtomicBool::new(false);
16
-
17
- struct EnvProfileConfig {
18
- output: Option<PathBuf>,
19
- profile_once: bool,
20
- frequency: i32,
21
- }
22
-
23
- fn env_profile_config() -> &'static EnvProfileConfig {
24
- static ENV_CONFIG: OnceLock<EnvProfileConfig> = OnceLock::new();
25
- ENV_CONFIG.get_or_init(|| {
26
- let output = match std::env::var(ENV_OUTPUT) {
27
- Ok(value) if !value.trim().is_empty() => Some(PathBuf::from(value)),
28
- _ => None,
29
- };
30
-
31
- let profile_once = match std::env::var(ENV_ONCE) {
32
- Ok(value) => !matches!(value.as_str(), "0" | "false" | "no"),
33
- Err(_) => true,
34
- };
35
-
36
- let frequency = std::env::var(ENV_FREQUENCY)
37
- .ok()
38
- .and_then(|value| value.parse::<i32>().ok())
39
- .unwrap_or(1000);
40
-
41
- EnvProfileConfig {
42
- output,
43
- profile_once,
44
- frequency,
45
- }
46
- })
47
- }
48
-
49
- struct ProfileState {
50
- guard: Option<pprof::ProfilerGuard<'static>>,
51
- output: Option<PathBuf>,
52
- }
53
-
54
- fn state() -> &'static Mutex<ProfileState> {
55
- static STATE: OnceLock<Mutex<ProfileState>> = OnceLock::new();
56
- STATE.get_or_init(|| {
57
- Mutex::new(ProfileState {
58
- guard: None,
59
- output: None,
60
- })
61
- })
62
- }
63
-
64
- pub fn start(output_path: PathBuf, frequency: i32) -> Result<()> {
65
- let mut state = state()
66
- .lock()
67
- .map_err(|_| ConversionError::Other("profiling state lock poisoned".to_string()))?;
68
-
69
- if state.guard.is_some() {
70
- return Err(ConversionError::Other("profiling already active".to_string()));
71
- }
72
-
73
- let guard = pprof::ProfilerGuardBuilder::default()
74
- .frequency(frequency)
75
- .blocklist(&["libc", "libpthread", "libgcc", "libm"])
76
- .build()
77
- .map_err(|err| ConversionError::Other(format!("Profiling init failed: {err}")))?;
78
-
79
- state.guard = Some(guard);
80
- state.output = Some(output_path);
81
- PROFILE_ACTIVE.store(true, Ordering::Release);
82
- Ok(())
83
- }
84
-
85
- pub fn stop() -> Result<()> {
86
- let (guard, output) = {
87
- let mut state = state()
88
- .lock()
89
- .map_err(|_| ConversionError::Other("profiling state lock poisoned".to_string()))?;
90
- let guard = state.guard.take();
91
- let output = state.output.take();
92
- (guard, output)
93
- };
94
- PROFILE_ACTIVE.store(false, Ordering::Release);
95
-
96
- let Some(guard) = guard else {
97
- return Err(ConversionError::Other("profiling not active".to_string()));
98
- };
99
- let Some(output_path) = output else {
100
- return Err(ConversionError::Other("profiling output path missing".to_string()));
101
- };
102
-
103
- if let Some(parent) = output_path.parent() {
104
- std::fs::create_dir_all(parent).map_err(ConversionError::IoError)?;
105
- }
106
-
107
- let report = guard
108
- .report()
109
- .build()
110
- .map_err(|err| ConversionError::Other(format!("Profiling report failed: {err}")))?;
111
-
112
- let file = std::fs::File::create(&output_path).map_err(ConversionError::IoError)?;
113
- report
114
- .flamegraph(file)
115
- .map_err(|err| ConversionError::Other(format!("Flamegraph write failed: {err}")))?;
116
- PROFILE_ACTIVE.store(false, Ordering::Release);
117
- Ok(())
118
- }
119
-
120
- pub fn maybe_profile<T, F>(f: F) -> Result<T>
121
- where
122
- F: FnOnce() -> Result<T>,
123
- {
124
- if PROFILE_ACTIVE.load(Ordering::Relaxed) {
125
- return f();
126
- }
127
-
128
- let config = env_profile_config();
129
- let Some(output_path) = config.output.as_ref() else {
130
- return f();
131
- };
132
-
133
- if config.profile_once && PROFILED_ONCE.swap(true, Ordering::SeqCst) {
134
- return f();
135
- }
136
-
137
- struct ActiveGuard;
138
- impl Drop for ActiveGuard {
139
- fn drop(&mut self) {
140
- PROFILE_ACTIVE.store(false, Ordering::Release);
141
- }
142
- }
143
- PROFILE_ACTIVE.store(true, Ordering::Release);
144
- let _active = ActiveGuard;
145
-
146
- let guard = pprof::ProfilerGuardBuilder::default()
147
- .frequency(config.frequency)
148
- .blocklist(&["libc", "libpthread", "libgcc", "libm"])
149
- .build()
150
- .map_err(|err| ConversionError::Other(format!("Profiling init failed: {err}")))?;
151
-
152
- let result = f();
153
-
154
- if result.is_ok() {
155
- if let Some(parent) = output_path.parent() {
156
- std::fs::create_dir_all(parent).map_err(ConversionError::IoError)?;
157
- }
158
-
159
- let report = guard
160
- .report()
161
- .build()
162
- .map_err(|err| ConversionError::Other(format!("Profiling report failed: {err}")))?;
163
-
164
- let file = std::fs::File::create(output_path).map_err(ConversionError::IoError)?;
165
- report
166
- .flamegraph(file)
167
- .map_err(|err| ConversionError::Other(format!("Flamegraph write failed: {err}")))?;
168
- }
169
-
170
- result
171
- }
172
- }
173
-
174
- #[cfg(all(not(target_os = "windows"), feature = "profiling"))]
175
- pub use enabled::{maybe_profile, start, stop};
176
-
177
- #[cfg(target_os = "windows")]
178
- #[allow(dead_code)]
179
- pub fn start(_output_path: PathBuf, _frequency: i32) -> Result<()> {
180
- Err(ConversionError::Other(
181
- "Profiling is not supported on Windows".to_string(),
182
- ))
183
- }
184
-
185
- #[cfg(all(not(target_os = "windows"), not(feature = "profiling")))]
186
- #[allow(dead_code)]
187
- pub fn start(_output_path: PathBuf, _frequency: i32) -> Result<()> {
188
- Err(ConversionError::Other(
189
- "Profiling is disabled; rebuild with the profiling feature".to_string(),
190
- ))
191
- }
192
-
193
- #[cfg(target_os = "windows")]
194
- #[allow(dead_code)]
195
- pub fn stop() -> Result<()> {
196
- Err(ConversionError::Other(
197
- "Profiling is not supported on Windows".to_string(),
198
- ))
199
- }
200
-
201
- #[cfg(all(not(target_os = "windows"), not(feature = "profiling")))]
202
- #[allow(dead_code)]
203
- pub fn stop() -> Result<()> {
204
- Err(ConversionError::Other(
205
- "Profiling is disabled; rebuild with the profiling feature".to_string(),
206
- ))
207
- }
208
-
209
- #[cfg(any(target_os = "windows", not(feature = "profiling")))]
210
- pub fn maybe_profile<T, F>(f: F) -> Result<T>
211
- where
212
- F: FnOnce() -> Result<T>,
213
- {
214
- f()
215
- }
@@ -1,252 +0,0 @@
1
- //! Ruby visitor wrapper and bridge utilities.
2
-
3
- use crate::types::{arg_error, symbol_to_string};
4
- use html_to_markdown_rs::visitor::{NodeContext, NodeType, VisitResult};
5
- use magnus::prelude::*;
6
- use magnus::{Error, RHash, Ruby, TryConvert, Value};
7
-
8
- /// Wrapper for a Ruby visitor object that implements the HtmlVisitor trait.
9
- #[derive(Clone)]
10
- pub struct RubyVisitorWrapper {
11
- pub ruby_visitor: Value,
12
- pub last_error: std::rc::Rc<std::cell::RefCell<Option<String>>>,
13
- }
14
-
15
- impl RubyVisitorWrapper {
16
- pub fn new(ruby_visitor: Value) -> Self {
17
- Self {
18
- ruby_visitor,
19
- last_error: std::rc::Rc::new(std::cell::RefCell::new(None)),
20
- }
21
- }
22
-
23
- pub fn utf8_str(&self, ruby: &Ruby, s: &str) -> Value {
24
- if let Ok(val) = ruby.eval::<Value>(&format!("String.new({s:?}, encoding: 'UTF-8')")) {
25
- val
26
- } else {
27
- let str_val = ruby.str_from_slice(s.as_bytes());
28
- str_val.as_value()
29
- }
30
- }
31
-
32
- pub fn call_visitor_method(&self, method_name: &str, args: &[Value]) -> Result<VisitResult, Error> {
33
- let ruby = Ruby::get().expect("Ruby not initialized");
34
-
35
- let result: Value = match args.len() {
36
- 0 => match self.ruby_visitor.funcall::<&str, (), Value>(method_name, ()) {
37
- Ok(val) => val,
38
- Err(e) => {
39
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
40
- return Err(e);
41
- }
42
- },
43
- 1 => match self
44
- .ruby_visitor
45
- .funcall::<&str, (Value,), Value>(method_name, (args[0],))
46
- {
47
- Ok(val) => val,
48
- Err(e) => {
49
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
50
- return Err(e);
51
- }
52
- },
53
- 2 => match self
54
- .ruby_visitor
55
- .funcall::<&str, (Value, Value), Value>(method_name, (args[0], args[1]))
56
- {
57
- Ok(val) => val,
58
- Err(e) => {
59
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
60
- return Err(e);
61
- }
62
- },
63
- 3 => match self
64
- .ruby_visitor
65
- .funcall::<&str, (Value, Value, Value), Value>(method_name, (args[0], args[1], args[2]))
66
- {
67
- Ok(val) => val,
68
- Err(e) => {
69
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
70
- return Err(e);
71
- }
72
- },
73
- 4 => match self
74
- .ruby_visitor
75
- .funcall::<&str, (Value, Value, Value, Value), Value>(method_name, (args[0], args[1], args[2], args[3]))
76
- {
77
- Ok(val) => val,
78
- Err(e) => {
79
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
80
- return Err(e);
81
- }
82
- },
83
- _ => {
84
- return Err(arg_error(format!(
85
- "Unsupported number of visitor method arguments: {}",
86
- args.len()
87
- )));
88
- }
89
- };
90
-
91
- let hash = RHash::from_value(result)
92
- .ok_or_else(|| arg_error(format!("visitor method {method_name} must return a Hash")))?;
93
-
94
- let type_value: Value = hash
95
- .get(ruby.intern("type"))
96
- .ok_or_else(|| arg_error(format!("visitor method {method_name} result Hash must have :type key")))?;
97
-
98
- let type_str = symbol_to_string(type_value)?;
99
-
100
- match type_str.as_str() {
101
- "continue" => Ok(VisitResult::Continue),
102
- "custom" => {
103
- let output_value: Value = hash.get(ruby.intern("output")).ok_or_else(|| {
104
- arg_error(format!(
105
- "visitor method {method_name} with type :custom must provide :output string"
106
- ))
107
- })?;
108
- let output = String::try_convert(output_value)?;
109
- Ok(VisitResult::Custom(output))
110
- }
111
- "skip" => Ok(VisitResult::Skip),
112
- "preserve_html" => Ok(VisitResult::PreserveHtml),
113
- "error" => {
114
- let message_value: Value = hash.get(ruby.intern("message")).ok_or_else(|| {
115
- arg_error(format!(
116
- "visitor method {method_name} with type :error must provide :message string"
117
- ))
118
- })?;
119
- let message = String::try_convert(message_value)?;
120
- Ok(VisitResult::Error(message))
121
- }
122
- other => Err(arg_error(format!(
123
- "visitor method {method_name} returned invalid type: {other}"
124
- ))),
125
- }
126
- }
127
-
128
- pub fn ruby_to_node_context(&self, ctx: &NodeContext, ruby: &Ruby) -> Result<Value, Error> {
129
- let hash = ruby.hash_new();
130
-
131
- let node_type_str = match ctx.node_type {
132
- NodeType::Text => "text",
133
- NodeType::Element => "element",
134
- NodeType::Heading => "heading",
135
- NodeType::Paragraph => "paragraph",
136
- NodeType::Div => "div",
137
- NodeType::Blockquote => "blockquote",
138
- NodeType::Pre => "pre",
139
- NodeType::Hr => "hr",
140
- NodeType::List => "list",
141
- NodeType::ListItem => "list_item",
142
- NodeType::DefinitionList => "definition_list",
143
- NodeType::DefinitionTerm => "definition_term",
144
- NodeType::DefinitionDescription => "definition_description",
145
- NodeType::Table => "table",
146
- NodeType::TableRow => "table_row",
147
- NodeType::TableCell => "table_cell",
148
- NodeType::TableHeader => "table_header",
149
- NodeType::TableBody => "table_body",
150
- NodeType::TableHead => "table_head",
151
- NodeType::TableFoot => "table_foot",
152
- NodeType::Link => "link",
153
- NodeType::Image => "image",
154
- NodeType::Strong => "strong",
155
- NodeType::Em => "em",
156
- NodeType::Code => "code",
157
- NodeType::Strikethrough => "strikethrough",
158
- NodeType::Underline => "underline",
159
- NodeType::Subscript => "subscript",
160
- NodeType::Superscript => "superscript",
161
- NodeType::Mark => "mark",
162
- NodeType::Small => "small",
163
- NodeType::Br => "br",
164
- NodeType::Span => "span",
165
- NodeType::Article => "article",
166
- NodeType::Section => "section",
167
- NodeType::Nav => "nav",
168
- NodeType::Aside => "aside",
169
- NodeType::Header => "header",
170
- NodeType::Footer => "footer",
171
- NodeType::Main => "main",
172
- NodeType::Figure => "figure",
173
- NodeType::Figcaption => "figcaption",
174
- NodeType::Time => "time",
175
- NodeType::Details => "details",
176
- NodeType::Summary => "summary",
177
- NodeType::Form => "form",
178
- NodeType::Input => "input",
179
- NodeType::Select => "select",
180
- NodeType::Option => "option",
181
- NodeType::Button => "button",
182
- NodeType::Textarea => "textarea",
183
- NodeType::Label => "label",
184
- NodeType::Fieldset => "fieldset",
185
- NodeType::Legend => "legend",
186
- NodeType::Audio => "audio",
187
- NodeType::Video => "video",
188
- NodeType::Picture => "picture",
189
- NodeType::Source => "source",
190
- NodeType::Iframe => "iframe",
191
- NodeType::Svg => "svg",
192
- NodeType::Canvas => "canvas",
193
- NodeType::Ruby => "ruby",
194
- NodeType::Rt => "rt",
195
- NodeType::Rp => "rp",
196
- NodeType::Abbr => "abbr",
197
- NodeType::Kbd => "kbd",
198
- NodeType::Samp => "samp",
199
- NodeType::Var => "var",
200
- NodeType::Cite => "cite",
201
- NodeType::Q => "q",
202
- NodeType::Del => "del",
203
- NodeType::Ins => "ins",
204
- NodeType::Data => "data",
205
- NodeType::Meter => "meter",
206
- NodeType::Progress => "progress",
207
- NodeType::Output => "output",
208
- NodeType::Template => "template",
209
- NodeType::Slot => "slot",
210
- NodeType::Html => "html",
211
- NodeType::Head => "head",
212
- NodeType::Body => "body",
213
- NodeType::Title => "title",
214
- NodeType::Meta => "meta",
215
- NodeType::LinkTag => "link_tag",
216
- NodeType::Style => "style",
217
- NodeType::Script => "script",
218
- NodeType::Base => "base",
219
- NodeType::Custom => "custom",
220
- };
221
- hash.aset(ruby.intern("node_type"), ruby.intern(node_type_str))?;
222
-
223
- hash.aset(ruby.intern("tag_name"), ctx.tag_name.as_str())?;
224
-
225
- let attrs_hash = ruby.hash_new();
226
- for (key, value) in &ctx.attributes {
227
- attrs_hash.aset(key.as_str(), value.as_str())?;
228
- }
229
- hash.aset(ruby.intern("attributes"), attrs_hash)?;
230
-
231
- hash.aset(ruby.intern("depth"), ctx.depth as i64)?;
232
-
233
- hash.aset(ruby.intern("index_in_parent"), ctx.index_in_parent as i64)?;
234
-
235
- match &ctx.parent_tag {
236
- Some(tag) => hash.aset(ruby.intern("parent_tag"), tag.as_str())?,
237
- None => hash.aset(ruby.intern("parent_tag"), ruby.qnil())?,
238
- }
239
-
240
- hash.aset(ruby.intern("is_inline"), ctx.is_inline)?;
241
-
242
- Ok(hash.as_value())
243
- }
244
- }
245
-
246
- impl std::fmt::Debug for RubyVisitorWrapper {
247
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
248
- f.debug_struct("RubyVisitorWrapper")
249
- .field("ruby_visitor", &self.ruby_visitor)
250
- .finish()
251
- }
252
- }