html-to-markdown 2.16.0 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,12 @@ use html_to_markdown_rs::{
6
6
  safety::guard_panic,
7
7
  };
8
8
 
9
+ #[cfg(feature = "visitor")]
10
+ use html_to_markdown_rs::{
11
+ convert_with_visitor as convert_with_visitor_inner,
12
+ visitor::{HtmlVisitor, NodeContext, NodeType, VisitResult},
13
+ };
14
+
9
15
  #[cfg(feature = "metadata")]
10
16
  use html_to_markdown_rs::convert_with_metadata as convert_with_metadata_inner;
11
17
  mod profiling;
@@ -17,13 +23,907 @@ use html_to_markdown_rs::metadata::{
17
23
  };
18
24
  use magnus::prelude::*;
19
25
  use magnus::r_hash::ForEach;
26
+ use magnus::value::ReprValue;
20
27
  use magnus::{Error, RArray, RHash, Ruby, Symbol, TryConvert, Value, function, scan_args::scan_args};
28
+ #[cfg(feature = "visitor")]
29
+ use std::panic::AssertUnwindSafe;
30
+ #[cfg(feature = "profiling")]
21
31
  use std::path::PathBuf;
22
32
 
23
33
  #[derive(Clone)]
24
34
  #[magnus::wrap(class = "HtmlToMarkdown::Options", free_immediately)]
25
35
  struct OptionsHandle(ConversionOptions);
26
36
 
37
+ #[cfg(feature = "visitor")]
38
+ #[derive(Clone)]
39
+ struct RubyVisitorWrapper {
40
+ ruby_visitor: Value,
41
+ last_error: std::rc::Rc<std::cell::RefCell<Option<String>>>,
42
+ }
43
+
44
+ #[cfg(feature = "visitor")]
45
+ impl RubyVisitorWrapper {
46
+ fn new(ruby_visitor: Value) -> Self {
47
+ Self {
48
+ ruby_visitor,
49
+ last_error: std::rc::Rc::new(std::cell::RefCell::new(None)),
50
+ }
51
+ }
52
+
53
+ fn utf8_str(&self, ruby: &Ruby, s: &str) -> Value {
54
+ match ruby.eval::<Value>(&format!("String.new({:?}, encoding: 'UTF-8')", s)) {
55
+ Ok(val) => val,
56
+ Err(_) => {
57
+ let str_val = ruby.str_from_slice(s.as_bytes());
58
+ str_val.as_value()
59
+ }
60
+ }
61
+ }
62
+
63
+ fn call_visitor_method(&self, method_name: &str, args: &[Value]) -> Result<VisitResult, Error> {
64
+ let ruby = Ruby::get().expect("Ruby not initialized");
65
+
66
+ let result: Value = match args.len() {
67
+ 0 => match self.ruby_visitor.funcall::<&str, (), Value>(method_name, ()) {
68
+ Ok(val) => val,
69
+ Err(e) => {
70
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
71
+ return Err(e);
72
+ }
73
+ },
74
+ 1 => match self
75
+ .ruby_visitor
76
+ .funcall::<&str, (Value,), Value>(method_name, (args[0],))
77
+ {
78
+ Ok(val) => val,
79
+ Err(e) => {
80
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
81
+ return Err(e);
82
+ }
83
+ },
84
+ 2 => match self
85
+ .ruby_visitor
86
+ .funcall::<&str, (Value, Value), Value>(method_name, (args[0], args[1]))
87
+ {
88
+ Ok(val) => val,
89
+ Err(e) => {
90
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
91
+ return Err(e);
92
+ }
93
+ },
94
+ 3 => match self
95
+ .ruby_visitor
96
+ .funcall::<&str, (Value, Value, Value), Value>(method_name, (args[0], args[1], args[2]))
97
+ {
98
+ Ok(val) => val,
99
+ Err(e) => {
100
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
101
+ return Err(e);
102
+ }
103
+ },
104
+ 4 => match self
105
+ .ruby_visitor
106
+ .funcall::<&str, (Value, Value, Value, Value), Value>(method_name, (args[0], args[1], args[2], args[3]))
107
+ {
108
+ Ok(val) => val,
109
+ Err(e) => {
110
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
111
+ return Err(e);
112
+ }
113
+ },
114
+ _ => {
115
+ return Err(arg_error(format!(
116
+ "Unsupported number of visitor method arguments: {}",
117
+ args.len()
118
+ )));
119
+ }
120
+ };
121
+
122
+ let hash = RHash::from_value(result)
123
+ .ok_or_else(|| arg_error(format!("visitor method {} must return a Hash", method_name)))?;
124
+
125
+ let type_value: Value = hash.get(ruby.intern("type")).ok_or_else(|| {
126
+ arg_error(format!(
127
+ "visitor method {} result Hash must have :type key",
128
+ method_name
129
+ ))
130
+ })?;
131
+
132
+ let type_str = symbol_to_string(type_value)?;
133
+
134
+ match type_str.as_str() {
135
+ "continue" => Ok(VisitResult::Continue),
136
+ "custom" => {
137
+ let output_value: Value = hash.get(ruby.intern("output")).ok_or_else(|| {
138
+ arg_error(format!(
139
+ "visitor method {} with type :custom must provide :output string",
140
+ method_name
141
+ ))
142
+ })?;
143
+ let output = String::try_convert(output_value)?;
144
+ Ok(VisitResult::Custom(output))
145
+ }
146
+ "skip" => Ok(VisitResult::Skip),
147
+ "preserve_html" => Ok(VisitResult::PreserveHtml),
148
+ "error" => {
149
+ let message_value: Value = hash.get(ruby.intern("message")).ok_or_else(|| {
150
+ arg_error(format!(
151
+ "visitor method {} with type :error must provide :message string",
152
+ method_name
153
+ ))
154
+ })?;
155
+ let message = String::try_convert(message_value)?;
156
+ Ok(VisitResult::Error(message))
157
+ }
158
+ other => Err(arg_error(format!(
159
+ "visitor method {} returned invalid type: {}",
160
+ method_name, other
161
+ ))),
162
+ }
163
+ }
164
+
165
+ fn ruby_to_node_context(&self, ctx: &NodeContext, ruby: &Ruby) -> Result<Value, Error> {
166
+ let hash = ruby.hash_new();
167
+
168
+ let node_type_str = match ctx.node_type {
169
+ NodeType::Text => "text",
170
+ NodeType::Element => "element",
171
+ NodeType::Heading => "heading",
172
+ NodeType::Paragraph => "paragraph",
173
+ NodeType::Div => "div",
174
+ NodeType::Blockquote => "blockquote",
175
+ NodeType::Pre => "pre",
176
+ NodeType::Hr => "hr",
177
+ NodeType::List => "list",
178
+ NodeType::ListItem => "list_item",
179
+ NodeType::DefinitionList => "definition_list",
180
+ NodeType::DefinitionTerm => "definition_term",
181
+ NodeType::DefinitionDescription => "definition_description",
182
+ NodeType::Table => "table",
183
+ NodeType::TableRow => "table_row",
184
+ NodeType::TableCell => "table_cell",
185
+ NodeType::TableHeader => "table_header",
186
+ NodeType::TableBody => "table_body",
187
+ NodeType::TableHead => "table_head",
188
+ NodeType::TableFoot => "table_foot",
189
+ NodeType::Link => "link",
190
+ NodeType::Image => "image",
191
+ NodeType::Strong => "strong",
192
+ NodeType::Em => "em",
193
+ NodeType::Code => "code",
194
+ NodeType::Strikethrough => "strikethrough",
195
+ NodeType::Underline => "underline",
196
+ NodeType::Subscript => "subscript",
197
+ NodeType::Superscript => "superscript",
198
+ NodeType::Mark => "mark",
199
+ NodeType::Small => "small",
200
+ NodeType::Br => "br",
201
+ NodeType::Span => "span",
202
+ NodeType::Article => "article",
203
+ NodeType::Section => "section",
204
+ NodeType::Nav => "nav",
205
+ NodeType::Aside => "aside",
206
+ NodeType::Header => "header",
207
+ NodeType::Footer => "footer",
208
+ NodeType::Main => "main",
209
+ NodeType::Figure => "figure",
210
+ NodeType::Figcaption => "figcaption",
211
+ NodeType::Time => "time",
212
+ NodeType::Details => "details",
213
+ NodeType::Summary => "summary",
214
+ NodeType::Form => "form",
215
+ NodeType::Input => "input",
216
+ NodeType::Select => "select",
217
+ NodeType::Option => "option",
218
+ NodeType::Button => "button",
219
+ NodeType::Textarea => "textarea",
220
+ NodeType::Label => "label",
221
+ NodeType::Fieldset => "fieldset",
222
+ NodeType::Legend => "legend",
223
+ NodeType::Audio => "audio",
224
+ NodeType::Video => "video",
225
+ NodeType::Picture => "picture",
226
+ NodeType::Source => "source",
227
+ NodeType::Iframe => "iframe",
228
+ NodeType::Svg => "svg",
229
+ NodeType::Canvas => "canvas",
230
+ NodeType::Ruby => "ruby",
231
+ NodeType::Rt => "rt",
232
+ NodeType::Rp => "rp",
233
+ NodeType::Abbr => "abbr",
234
+ NodeType::Kbd => "kbd",
235
+ NodeType::Samp => "samp",
236
+ NodeType::Var => "var",
237
+ NodeType::Cite => "cite",
238
+ NodeType::Q => "q",
239
+ NodeType::Del => "del",
240
+ NodeType::Ins => "ins",
241
+ NodeType::Data => "data",
242
+ NodeType::Meter => "meter",
243
+ NodeType::Progress => "progress",
244
+ NodeType::Output => "output",
245
+ NodeType::Template => "template",
246
+ NodeType::Slot => "slot",
247
+ NodeType::Html => "html",
248
+ NodeType::Head => "head",
249
+ NodeType::Body => "body",
250
+ NodeType::Title => "title",
251
+ NodeType::Meta => "meta",
252
+ NodeType::LinkTag => "link_tag",
253
+ NodeType::Style => "style",
254
+ NodeType::Script => "script",
255
+ NodeType::Base => "base",
256
+ NodeType::Custom => "custom",
257
+ };
258
+ hash.aset(ruby.intern("node_type"), ruby.intern(node_type_str))?;
259
+
260
+ hash.aset(ruby.intern("tag_name"), ctx.tag_name.as_str())?;
261
+
262
+ let attrs_hash = ruby.hash_new();
263
+ for (key, value) in &ctx.attributes {
264
+ attrs_hash.aset(key.as_str(), value.as_str())?;
265
+ }
266
+ hash.aset(ruby.intern("attributes"), attrs_hash)?;
267
+
268
+ hash.aset(ruby.intern("depth"), ctx.depth as i64)?;
269
+
270
+ hash.aset(ruby.intern("index_in_parent"), ctx.index_in_parent as i64)?;
271
+
272
+ match &ctx.parent_tag {
273
+ Some(tag) => hash.aset(ruby.intern("parent_tag"), tag.as_str())?,
274
+ None => hash.aset(ruby.intern("parent_tag"), ruby.qnil())?,
275
+ }
276
+
277
+ hash.aset(ruby.intern("is_inline"), ctx.is_inline)?;
278
+
279
+ Ok(hash.as_value())
280
+ }
281
+ }
282
+
283
+ #[cfg(feature = "visitor")]
284
+ impl std::fmt::Debug for RubyVisitorWrapper {
285
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
286
+ f.debug_struct("RubyVisitorWrapper")
287
+ .field("ruby_visitor", &self.ruby_visitor)
288
+ .finish()
289
+ }
290
+ }
291
+
292
+ #[cfg(feature = "visitor")]
293
+ impl HtmlVisitor for RubyVisitorWrapper {
294
+ fn visit_element_start(&mut self, ctx: &NodeContext) -> VisitResult {
295
+ if let Ok(ruby) = Ruby::get() {
296
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
297
+ if let Ok(result) = self.call_visitor_method("visit_element_start", &[node_ctx]) {
298
+ return result;
299
+ }
300
+ }
301
+ }
302
+ VisitResult::Continue
303
+ }
304
+
305
+ fn visit_element_end(&mut self, ctx: &NodeContext, output: &str) -> VisitResult {
306
+ if let Ok(ruby) = Ruby::get() {
307
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
308
+ if let Ok(result) = self.call_visitor_method(
309
+ "visit_element_end",
310
+ &[node_ctx, ruby.str_from_slice(output.as_bytes()).as_value()],
311
+ ) {
312
+ return result;
313
+ }
314
+ }
315
+ }
316
+ VisitResult::Continue
317
+ }
318
+
319
+ fn visit_text(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
320
+ if let Ok(ruby) = Ruby::get() {
321
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
322
+ if let Ok(result) = self.call_visitor_method("visit_text", &[node_ctx, self.utf8_str(&ruby, text)]) {
323
+ return result;
324
+ }
325
+ }
326
+ }
327
+ VisitResult::Continue
328
+ }
329
+
330
+ fn visit_link(&mut self, ctx: &NodeContext, href: &str, text: &str, title: Option<&str>) -> VisitResult {
331
+ if let Ok(ruby) = Ruby::get() {
332
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
333
+ let title_val = match title {
334
+ Some(t) => ruby.str_from_slice(t.as_bytes()).as_value(),
335
+ None => ruby.qnil().as_value(),
336
+ };
337
+ if let Ok(result) = self.call_visitor_method(
338
+ "visit_link",
339
+ &[
340
+ node_ctx,
341
+ ruby.str_from_slice(href.as_bytes()).as_value(),
342
+ ruby.str_from_slice(text.as_bytes()).as_value(),
343
+ title_val,
344
+ ],
345
+ ) {
346
+ return result;
347
+ }
348
+ }
349
+ }
350
+ VisitResult::Continue
351
+ }
352
+
353
+ fn visit_image(&mut self, ctx: &NodeContext, src: &str, alt: &str, title: Option<&str>) -> VisitResult {
354
+ if let Ok(ruby) = Ruby::get() {
355
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
356
+ let title_val = match title {
357
+ Some(t) => ruby.str_from_slice(t.as_bytes()).as_value(),
358
+ None => ruby.qnil().as_value(),
359
+ };
360
+ if let Ok(result) = self.call_visitor_method(
361
+ "visit_image",
362
+ &[
363
+ node_ctx,
364
+ ruby.str_from_slice(src.as_bytes()).as_value(),
365
+ ruby.str_from_slice(alt.as_bytes()).as_value(),
366
+ title_val,
367
+ ],
368
+ ) {
369
+ return result;
370
+ }
371
+ }
372
+ }
373
+ VisitResult::Continue
374
+ }
375
+
376
+ fn visit_heading(&mut self, ctx: &NodeContext, level: u32, text: &str, id: Option<&str>) -> VisitResult {
377
+ if let Ok(ruby) = Ruby::get() {
378
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
379
+ let id_val = match id {
380
+ Some(i) => ruby.str_from_slice(i.as_bytes()).as_value(),
381
+ None => ruby.qnil().as_value(),
382
+ };
383
+ if let Ok(result) = self.call_visitor_method(
384
+ "visit_heading",
385
+ &[
386
+ node_ctx,
387
+ ruby.integer_from_i64(level as i64).as_value(),
388
+ ruby.str_from_slice(text.as_bytes()).as_value(),
389
+ id_val,
390
+ ],
391
+ ) {
392
+ return result;
393
+ }
394
+ }
395
+ }
396
+ VisitResult::Continue
397
+ }
398
+
399
+ fn visit_code_block(&mut self, ctx: &NodeContext, lang: Option<&str>, code: &str) -> VisitResult {
400
+ if let Ok(ruby) = Ruby::get() {
401
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
402
+ let lang_val = match lang {
403
+ Some(l) => ruby.str_from_slice(l.as_bytes()).as_value(),
404
+ None => ruby.qnil().as_value(),
405
+ };
406
+ if let Ok(result) = self.call_visitor_method(
407
+ "visit_code_block",
408
+ &[node_ctx, lang_val, ruby.str_from_slice(code.as_bytes()).as_value()],
409
+ ) {
410
+ return result;
411
+ }
412
+ }
413
+ }
414
+ VisitResult::Continue
415
+ }
416
+
417
+ fn visit_code_inline(&mut self, ctx: &NodeContext, code: &str) -> VisitResult {
418
+ if let Ok(ruby) = Ruby::get() {
419
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
420
+ if let Ok(result) = self.call_visitor_method(
421
+ "visit_code_inline",
422
+ &[node_ctx, ruby.str_from_slice(code.as_bytes()).as_value()],
423
+ ) {
424
+ return result;
425
+ }
426
+ }
427
+ }
428
+ VisitResult::Continue
429
+ }
430
+
431
+ fn visit_list_item(&mut self, ctx: &NodeContext, ordered: bool, marker: &str, text: &str) -> VisitResult {
432
+ if let Ok(ruby) = Ruby::get() {
433
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
434
+ let ordered_val = if ordered {
435
+ ruby.qtrue().as_value()
436
+ } else {
437
+ ruby.qfalse().as_value()
438
+ };
439
+ if let Ok(result) = self.call_visitor_method(
440
+ "visit_list_item",
441
+ &[
442
+ node_ctx,
443
+ ordered_val,
444
+ ruby.str_from_slice(marker.as_bytes()).as_value(),
445
+ ruby.str_from_slice(text.as_bytes()).as_value(),
446
+ ],
447
+ ) {
448
+ return result;
449
+ }
450
+ }
451
+ }
452
+ VisitResult::Continue
453
+ }
454
+
455
+ fn visit_list_start(&mut self, ctx: &NodeContext, ordered: bool) -> VisitResult {
456
+ if let Ok(ruby) = Ruby::get() {
457
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
458
+ let ordered_val = if ordered {
459
+ ruby.qtrue().as_value()
460
+ } else {
461
+ ruby.qfalse().as_value()
462
+ };
463
+ if let Ok(result) = self.call_visitor_method("visit_list_start", &[node_ctx, ordered_val]) {
464
+ return result;
465
+ }
466
+ }
467
+ }
468
+ VisitResult::Continue
469
+ }
470
+
471
+ fn visit_list_end(&mut self, ctx: &NodeContext, ordered: bool, output: &str) -> VisitResult {
472
+ if let Ok(ruby) = Ruby::get() {
473
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
474
+ let ordered_val = if ordered {
475
+ ruby.qtrue().as_value()
476
+ } else {
477
+ ruby.qfalse().as_value()
478
+ };
479
+ if let Ok(result) = self.call_visitor_method(
480
+ "visit_list_end",
481
+ &[node_ctx, ordered_val, ruby.str_from_slice(output.as_bytes()).as_value()],
482
+ ) {
483
+ return result;
484
+ }
485
+ }
486
+ }
487
+ VisitResult::Continue
488
+ }
489
+
490
+ fn visit_table_start(&mut self, ctx: &NodeContext) -> VisitResult {
491
+ if let Ok(ruby) = Ruby::get() {
492
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
493
+ if let Ok(result) = self.call_visitor_method("visit_table_start", &[node_ctx]) {
494
+ return result;
495
+ }
496
+ }
497
+ }
498
+ VisitResult::Continue
499
+ }
500
+
501
+ fn visit_table_row(&mut self, ctx: &NodeContext, cells: &[String], is_header: bool) -> VisitResult {
502
+ if let Ok(ruby) = Ruby::get() {
503
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
504
+ let cells_array = ruby.ary_new();
505
+ for cell in cells {
506
+ let _ = cells_array.push(ruby.str_from_slice(cell.as_bytes()).as_value());
507
+ }
508
+ let is_header_val = if is_header {
509
+ ruby.qtrue().as_value()
510
+ } else {
511
+ ruby.qfalse().as_value()
512
+ };
513
+ if let Ok(result) =
514
+ self.call_visitor_method("visit_table_row", &[node_ctx, cells_array.as_value(), is_header_val])
515
+ {
516
+ return result;
517
+ }
518
+ }
519
+ }
520
+ VisitResult::Continue
521
+ }
522
+
523
+ fn visit_table_end(&mut self, ctx: &NodeContext, output: &str) -> VisitResult {
524
+ if let Ok(ruby) = Ruby::get() {
525
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
526
+ if let Ok(result) = self.call_visitor_method(
527
+ "visit_table_end",
528
+ &[node_ctx, ruby.str_from_slice(output.as_bytes()).as_value()],
529
+ ) {
530
+ return result;
531
+ }
532
+ }
533
+ }
534
+ VisitResult::Continue
535
+ }
536
+
537
+ fn visit_blockquote(&mut self, ctx: &NodeContext, content: &str, depth: usize) -> VisitResult {
538
+ if let Ok(ruby) = Ruby::get() {
539
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
540
+ if let Ok(result) = self.call_visitor_method(
541
+ "visit_blockquote",
542
+ &[
543
+ node_ctx,
544
+ ruby.str_from_slice(content.as_bytes()).as_value(),
545
+ ruby.integer_from_i64(depth as i64).as_value(),
546
+ ],
547
+ ) {
548
+ return result;
549
+ }
550
+ }
551
+ }
552
+ VisitResult::Continue
553
+ }
554
+
555
+ fn visit_strong(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
556
+ if let Ok(ruby) = Ruby::get() {
557
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
558
+ if let Ok(result) = self.call_visitor_method(
559
+ "visit_strong",
560
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
561
+ ) {
562
+ return result;
563
+ }
564
+ }
565
+ }
566
+ VisitResult::Continue
567
+ }
568
+
569
+ fn visit_emphasis(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
570
+ if let Ok(ruby) = Ruby::get() {
571
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
572
+ if let Ok(result) = self.call_visitor_method(
573
+ "visit_emphasis",
574
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
575
+ ) {
576
+ return result;
577
+ }
578
+ }
579
+ }
580
+ VisitResult::Continue
581
+ }
582
+
583
+ fn visit_strikethrough(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
584
+ if let Ok(ruby) = Ruby::get() {
585
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
586
+ if let Ok(result) = self.call_visitor_method(
587
+ "visit_strikethrough",
588
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
589
+ ) {
590
+ return result;
591
+ }
592
+ }
593
+ }
594
+ VisitResult::Continue
595
+ }
596
+
597
+ fn visit_underline(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
598
+ if let Ok(ruby) = Ruby::get() {
599
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
600
+ if let Ok(result) = self.call_visitor_method(
601
+ "visit_underline",
602
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
603
+ ) {
604
+ return result;
605
+ }
606
+ }
607
+ }
608
+ VisitResult::Continue
609
+ }
610
+
611
+ fn visit_subscript(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
612
+ if let Ok(ruby) = Ruby::get() {
613
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
614
+ if let Ok(result) = self.call_visitor_method(
615
+ "visit_subscript",
616
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
617
+ ) {
618
+ return result;
619
+ }
620
+ }
621
+ }
622
+ VisitResult::Continue
623
+ }
624
+
625
+ fn visit_superscript(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
626
+ if let Ok(ruby) = Ruby::get() {
627
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
628
+ if let Ok(result) = self.call_visitor_method(
629
+ "visit_superscript",
630
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
631
+ ) {
632
+ return result;
633
+ }
634
+ }
635
+ }
636
+ VisitResult::Continue
637
+ }
638
+
639
+ fn visit_mark(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
640
+ if let Ok(ruby) = Ruby::get() {
641
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
642
+ if let Ok(result) = self.call_visitor_method(
643
+ "visit_mark",
644
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
645
+ ) {
646
+ return result;
647
+ }
648
+ }
649
+ }
650
+ VisitResult::Continue
651
+ }
652
+
653
+ fn visit_line_break(&mut self, ctx: &NodeContext) -> VisitResult {
654
+ if let Ok(ruby) = Ruby::get() {
655
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
656
+ if let Ok(result) = self.call_visitor_method("visit_line_break", &[node_ctx]) {
657
+ return result;
658
+ }
659
+ }
660
+ }
661
+ VisitResult::Continue
662
+ }
663
+
664
+ fn visit_horizontal_rule(&mut self, ctx: &NodeContext) -> VisitResult {
665
+ if let Ok(ruby) = Ruby::get() {
666
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
667
+ if let Ok(result) = self.call_visitor_method("visit_horizontal_rule", &[node_ctx]) {
668
+ return result;
669
+ }
670
+ }
671
+ }
672
+ VisitResult::Continue
673
+ }
674
+
675
+ fn visit_custom_element(&mut self, ctx: &NodeContext, tag_name: &str, html: &str) -> VisitResult {
676
+ if let Ok(ruby) = Ruby::get() {
677
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
678
+ if let Ok(result) = self.call_visitor_method(
679
+ "visit_custom_element",
680
+ &[
681
+ node_ctx,
682
+ ruby.str_from_slice(tag_name.as_bytes()).as_value(),
683
+ ruby.str_from_slice(html.as_bytes()).as_value(),
684
+ ],
685
+ ) {
686
+ return result;
687
+ }
688
+ }
689
+ }
690
+ VisitResult::Continue
691
+ }
692
+
693
+ fn visit_definition_list_start(&mut self, ctx: &NodeContext) -> VisitResult {
694
+ if let Ok(ruby) = Ruby::get() {
695
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
696
+ if let Ok(result) = self.call_visitor_method("visit_definition_list_start", &[node_ctx]) {
697
+ return result;
698
+ }
699
+ }
700
+ }
701
+ VisitResult::Continue
702
+ }
703
+
704
+ fn visit_definition_term(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
705
+ if let Ok(ruby) = Ruby::get() {
706
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
707
+ if let Ok(result) = self.call_visitor_method(
708
+ "visit_definition_term",
709
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
710
+ ) {
711
+ return result;
712
+ }
713
+ }
714
+ }
715
+ VisitResult::Continue
716
+ }
717
+
718
+ fn visit_definition_description(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
719
+ if let Ok(ruby) = Ruby::get() {
720
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
721
+ if let Ok(result) = self.call_visitor_method(
722
+ "visit_definition_description",
723
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
724
+ ) {
725
+ return result;
726
+ }
727
+ }
728
+ }
729
+ VisitResult::Continue
730
+ }
731
+
732
+ fn visit_definition_list_end(&mut self, ctx: &NodeContext, output: &str) -> VisitResult {
733
+ if let Ok(ruby) = Ruby::get() {
734
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
735
+ if let Ok(result) = self.call_visitor_method(
736
+ "visit_definition_list_end",
737
+ &[node_ctx, ruby.str_from_slice(output.as_bytes()).as_value()],
738
+ ) {
739
+ return result;
740
+ }
741
+ }
742
+ }
743
+ VisitResult::Continue
744
+ }
745
+
746
+ fn visit_form(&mut self, ctx: &NodeContext, action: Option<&str>, method: Option<&str>) -> VisitResult {
747
+ if let Ok(ruby) = Ruby::get() {
748
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
749
+ let action_val = match action {
750
+ Some(a) => ruby.str_from_slice(a.as_bytes()).as_value(),
751
+ None => ruby.qnil().as_value(),
752
+ };
753
+ let method_val = match method {
754
+ Some(m) => ruby.str_from_slice(m.as_bytes()).as_value(),
755
+ None => ruby.qnil().as_value(),
756
+ };
757
+ if let Ok(result) = self.call_visitor_method("visit_form", &[node_ctx, action_val, method_val]) {
758
+ return result;
759
+ }
760
+ }
761
+ }
762
+ VisitResult::Continue
763
+ }
764
+
765
+ fn visit_input(
766
+ &mut self,
767
+ ctx: &NodeContext,
768
+ input_type: &str,
769
+ name: Option<&str>,
770
+ value: Option<&str>,
771
+ ) -> VisitResult {
772
+ if let Ok(ruby) = Ruby::get() {
773
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
774
+ let name_val = match name {
775
+ Some(n) => ruby.str_from_slice(n.as_bytes()).as_value(),
776
+ None => ruby.qnil().as_value(),
777
+ };
778
+ let value_val = match value {
779
+ Some(v) => ruby.str_from_slice(v.as_bytes()).as_value(),
780
+ None => ruby.qnil().as_value(),
781
+ };
782
+ if let Ok(result) = self.call_visitor_method(
783
+ "visit_input",
784
+ &[
785
+ node_ctx,
786
+ ruby.str_from_slice(input_type.as_bytes()).as_value(),
787
+ name_val,
788
+ value_val,
789
+ ],
790
+ ) {
791
+ return result;
792
+ }
793
+ }
794
+ }
795
+ VisitResult::Continue
796
+ }
797
+
798
+ fn visit_button(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
799
+ if let Ok(ruby) = Ruby::get() {
800
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
801
+ if let Ok(result) = self.call_visitor_method(
802
+ "visit_button",
803
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
804
+ ) {
805
+ return result;
806
+ }
807
+ }
808
+ }
809
+ VisitResult::Continue
810
+ }
811
+
812
+ fn visit_audio(&mut self, ctx: &NodeContext, src: Option<&str>) -> VisitResult {
813
+ if let Ok(ruby) = Ruby::get() {
814
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
815
+ let src_val = match src {
816
+ Some(s) => ruby.str_from_slice(s.as_bytes()).as_value(),
817
+ None => ruby.qnil().as_value(),
818
+ };
819
+ if let Ok(result) = self.call_visitor_method("visit_audio", &[node_ctx, src_val]) {
820
+ return result;
821
+ }
822
+ }
823
+ }
824
+ VisitResult::Continue
825
+ }
826
+
827
+ fn visit_video(&mut self, ctx: &NodeContext, src: Option<&str>) -> VisitResult {
828
+ if let Ok(ruby) = Ruby::get() {
829
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
830
+ let src_val = match src {
831
+ Some(s) => ruby.str_from_slice(s.as_bytes()).as_value(),
832
+ None => ruby.qnil().as_value(),
833
+ };
834
+ if let Ok(result) = self.call_visitor_method("visit_video", &[node_ctx, src_val]) {
835
+ return result;
836
+ }
837
+ }
838
+ }
839
+ VisitResult::Continue
840
+ }
841
+
842
+ fn visit_iframe(&mut self, ctx: &NodeContext, src: Option<&str>) -> VisitResult {
843
+ if let Ok(ruby) = Ruby::get() {
844
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
845
+ let src_val = match src {
846
+ Some(s) => ruby.str_from_slice(s.as_bytes()).as_value(),
847
+ None => ruby.qnil().as_value(),
848
+ };
849
+ if let Ok(result) = self.call_visitor_method("visit_iframe", &[node_ctx, src_val]) {
850
+ return result;
851
+ }
852
+ }
853
+ }
854
+ VisitResult::Continue
855
+ }
856
+
857
+ fn visit_details(&mut self, ctx: &NodeContext, open: bool) -> VisitResult {
858
+ if let Ok(ruby) = Ruby::get() {
859
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
860
+ let open_val = if open {
861
+ ruby.qtrue().as_value()
862
+ } else {
863
+ ruby.qfalse().as_value()
864
+ };
865
+ if let Ok(result) = self.call_visitor_method("visit_details", &[node_ctx, open_val]) {
866
+ return result;
867
+ }
868
+ }
869
+ }
870
+ VisitResult::Continue
871
+ }
872
+
873
+ fn visit_summary(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
874
+ if let Ok(ruby) = Ruby::get() {
875
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
876
+ if let Ok(result) = self.call_visitor_method(
877
+ "visit_summary",
878
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
879
+ ) {
880
+ return result;
881
+ }
882
+ }
883
+ }
884
+ VisitResult::Continue
885
+ }
886
+
887
+ fn visit_figure_start(&mut self, ctx: &NodeContext) -> VisitResult {
888
+ if let Ok(ruby) = Ruby::get() {
889
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
890
+ if let Ok(result) = self.call_visitor_method("visit_figure_start", &[node_ctx]) {
891
+ return result;
892
+ }
893
+ }
894
+ }
895
+ VisitResult::Continue
896
+ }
897
+
898
+ fn visit_figcaption(&mut self, ctx: &NodeContext, text: &str) -> VisitResult {
899
+ if let Ok(ruby) = Ruby::get() {
900
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
901
+ if let Ok(result) = self.call_visitor_method(
902
+ "visit_figcaption",
903
+ &[node_ctx, ruby.str_from_slice(text.as_bytes()).as_value()],
904
+ ) {
905
+ return result;
906
+ }
907
+ }
908
+ }
909
+ VisitResult::Continue
910
+ }
911
+
912
+ fn visit_figure_end(&mut self, ctx: &NodeContext, output: &str) -> VisitResult {
913
+ if let Ok(ruby) = Ruby::get() {
914
+ if let Ok(node_ctx) = self.ruby_to_node_context(ctx, &ruby) {
915
+ if let Ok(result) = self.call_visitor_method(
916
+ "visit_figure_end",
917
+ &[node_ctx, ruby.str_from_slice(output.as_bytes()).as_value()],
918
+ ) {
919
+ return result;
920
+ }
921
+ }
922
+ }
923
+ VisitResult::Continue
924
+ }
925
+ }
926
+
27
927
  fn conversion_error(err: ConversionError) -> Error {
28
928
  match err {
29
929
  ConversionError::ConfigError(msg) => arg_error(msg),
@@ -410,6 +1310,7 @@ fn convert_with_options_handle_fn(_ruby: &Ruby, args: &[Value]) -> Result<String
410
1310
  guard_panic(|| profiling::maybe_profile(|| convert_inner(&html, Some(options)))).map_err(conversion_error)
411
1311
  }
412
1312
 
1313
+ #[cfg(feature = "inline-images")]
413
1314
  fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
414
1315
  let parsed = scan_args::<(String,), (Option<Value>, Option<Value>), (), (), (), ()>(args)?;
415
1316
  let html = parsed.required.0;
@@ -422,6 +1323,7 @@ fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, E
422
1323
  extraction_to_value(ruby, extraction)
423
1324
  }
424
1325
 
1326
+ #[cfg(feature = "inline-images")]
425
1327
  fn convert_with_inline_images_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
426
1328
  let parsed = scan_args::<(String, &OptionsHandle), (Option<Value>,), (), (), (), ()>(args)?;
427
1329
  let html = parsed.required.0;
@@ -671,6 +1573,48 @@ fn convert_with_metadata_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<Value,
671
1573
  Ok(array.as_value())
672
1574
  }
673
1575
 
1576
+ #[cfg(feature = "visitor")]
1577
+ fn convert_with_visitor_fn(ruby: &Ruby, args: &[Value]) -> Result<String, Error> {
1578
+ let parsed = scan_args::<(String,), (Option<Value>, Option<Value>), (), (), (), ()>(args)?;
1579
+ let html = parsed.required.0;
1580
+
1581
+ let options = match parsed.optional.0 {
1582
+ Some(opt_val) => match <&OptionsHandle>::try_convert(opt_val) {
1583
+ Ok(handle) => handle.0.clone(),
1584
+ Err(_) => build_conversion_options(ruby, Some(opt_val))?,
1585
+ },
1586
+ None => ConversionOptions::default(),
1587
+ };
1588
+
1589
+ let visitor_value = match parsed.optional.1 {
1590
+ Some(val) => {
1591
+ if val.is_nil() {
1592
+ return guard_panic(AssertUnwindSafe(|| {
1593
+ profiling::maybe_profile(|| convert_inner(&html, Some(options)))
1594
+ }))
1595
+ .map_err(conversion_error);
1596
+ }
1597
+ val
1598
+ }
1599
+ None => return Err(arg_error("visitor argument is required")),
1600
+ };
1601
+
1602
+ let visitor_wrapper = RubyVisitorWrapper::new(visitor_value);
1603
+ let visitor_handle = std::rc::Rc::new(std::cell::RefCell::new(visitor_wrapper.clone()));
1604
+
1605
+ let result = guard_panic(AssertUnwindSafe(|| {
1606
+ profiling::maybe_profile(|| convert_with_visitor_inner(&html, Some(options), Some(visitor_handle)))
1607
+ }))
1608
+ .map_err(conversion_error)?;
1609
+
1610
+ if let Some(error_msg) = visitor_wrapper.last_error.borrow().as_ref() {
1611
+ return Err(runtime_error(error_msg.clone()));
1612
+ }
1613
+
1614
+ Ok(result)
1615
+ }
1616
+
1617
+ #[cfg(feature = "profiling")]
674
1618
  fn start_profiling_fn(_ruby: &Ruby, args: &[Value]) -> Result<bool, Error> {
675
1619
  let output = args.first().ok_or_else(|| arg_error("output_path required"))?;
676
1620
  let output: String = String::try_convert(*output)?;
@@ -683,6 +1627,7 @@ fn start_profiling_fn(_ruby: &Ruby, args: &[Value]) -> Result<bool, Error> {
683
1627
  Ok(true)
684
1628
  }
685
1629
 
1630
+ #[cfg(feature = "profiling")]
686
1631
  fn stop_profiling_fn(_ruby: &Ruby, _args: &[Value]) -> Result<bool, Error> {
687
1632
  profiling::stop().map_err(conversion_error)?;
688
1633
  Ok(true)
@@ -710,7 +1655,13 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
710
1655
  "convert_with_metadata_handle",
711
1656
  function!(convert_with_metadata_handle_fn, -1),
712
1657
  )?;
1658
+
1659
+ #[cfg(feature = "visitor")]
1660
+ module.define_singleton_method("convert_with_visitor", function!(convert_with_visitor_fn, -1))?;
1661
+
1662
+ #[cfg(feature = "profiling")]
713
1663
  module.define_singleton_method("start_profiling", function!(start_profiling_fn, -1))?;
1664
+ #[cfg(feature = "profiling")]
714
1665
  module.define_singleton_method("stop_profiling", function!(stop_profiling_fn, -1))?;
715
1666
 
716
1667
  Ok(())