html-to-markdown 3.4.1-aarch64-linux → 3.5.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,59 +1,1227 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:8104ea3e0a2d7ef26ed519cf9d400a92c4ae7564f726fb32ed02076450a2d277
2
+ # alef:hash:5517c8b26656b6a2f5d23d4fcf6d5595fb6c10c6a1693db1f0f9f2052d4e3750
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # Issues & docs: https://github.com/kreuzberg-dev/alef
6
6
  # frozen_string_literal: true
7
7
 
8
- require 'json'
9
- require 'html_to_markdown_rb'
8
+ require "json"
9
+ require "sorbet-runtime"
10
+ require "html_to_markdown_rb"
10
11
  module HtmlToMarkdown
11
- # Re-export all public module functions from the native extension
12
- HtmlToMarkdownRs.methods(false).each do |m|
13
- define_singleton_method(m) { |*args, **kwargs, &blk| HtmlToMarkdownRs.public_send(m, *args, **kwargs, &blk) }
12
+ # Re-export public types from the native extension (curated list, excludes Update/Builder types)
13
+ ConversionOptions = HtmlToMarkdownRs.const_get(:ConversionOptions)
14
+ ConversionResult = HtmlToMarkdownRs.const_get(:ConversionResult)
15
+ DocumentMetadata = HtmlToMarkdownRs.const_get(:DocumentMetadata)
16
+ DocumentNode = HtmlToMarkdownRs.const_get(:DocumentNode)
17
+ DocumentStructure = HtmlToMarkdownRs.const_get(:DocumentStructure)
18
+ GridCell = HtmlToMarkdownRs.const_get(:GridCell)
19
+ HeaderMetadata = HtmlToMarkdownRs.const_get(:HeaderMetadata)
20
+ HtmlMetadata = HtmlToMarkdownRs.const_get(:HtmlMetadata)
21
+ ImageMetadata = HtmlToMarkdownRs.const_get(:ImageMetadata)
22
+ LinkMetadata = HtmlToMarkdownRs.const_get(:LinkMetadata)
23
+ NodeContext = HtmlToMarkdownRs.const_get(:NodeContext)
24
+ PreprocessingOptions = HtmlToMarkdownRs.const_get(:PreprocessingOptions)
25
+ ProcessingWarning = HtmlToMarkdownRs.const_get(:ProcessingWarning)
26
+ StructuredData = HtmlToMarkdownRs.const_get(:StructuredData)
27
+ TableData = HtmlToMarkdownRs.const_get(:TableData)
28
+ TableGrid = HtmlToMarkdownRs.const_get(:TableGrid)
29
+ TextAnnotation = HtmlToMarkdownRs.const_get(:TextAnnotation)
30
+ # Re-export public module functions from the native extension (curated list)
31
+ define_singleton_method(:convert) { |*args, **kwargs, &blk| HtmlToMarkdownRs.public_send(:convert, *args, **kwargs, &blk) }
32
+ end
33
+ module HtmlToMarkdown
34
+ # The semantic content type of a document node.
35
+ #
36
+ # Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
37
+ module NodeContent
38
+ extend T::Helpers
39
+ extend T::Sig
40
+
41
+ interface!
42
+
43
+ # Dispatch from a Hash to the appropriate variant constructor.
44
+ # @param hash [Hash] with discriminator field and variant-specific fields
45
+ # @return [variant_class] an instance of the appropriate variant
46
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) }
47
+ def self.from_hash(hash)
48
+ discriminator = hash[:node_type] || hash["node_type"]
49
+ case discriminator
50
+ when "heading" then NodeContentHeading.from_hash(hash)
51
+ when "paragraph" then NodeContentParagraph.from_hash(hash)
52
+ when "list" then NodeContentList.from_hash(hash)
53
+ when "list_item" then NodeContentListItem.from_hash(hash)
54
+ when "table" then NodeContentTable.from_hash(hash)
55
+ when "image" then NodeContentImage.from_hash(hash)
56
+ when "code" then NodeContentCode.from_hash(hash)
57
+ when "quote" then NodeContentQuote.from_hash(hash)
58
+ when "definition_list" then NodeContentDefinitionList.from_hash(hash)
59
+ when "definition_item" then NodeContentDefinitionItem.from_hash(hash)
60
+ when "raw_block" then NodeContentRawBlock.from_hash(hash)
61
+ when "metadata_block" then NodeContentMetadataBlock.from_hash(hash)
62
+ when "group" then NodeContentGroup.from_hash(hash)
63
+ else raise "Unknown discriminator: #{discriminator}"
64
+ end
65
+ end
66
+ end
67
+
68
+ # A heading element (h1-h6).
69
+ NodeContentHeading = Data.define(:level, :text) do
70
+ include NodeContent
71
+ extend T::Sig
72
+
73
+ # Heading level (1-6).
74
+ sig { returns(Integer) }
75
+ def level = super # rubocop:disable Lint/UselessMethodDefinition
76
+
77
+ # The heading text content.
78
+ sig { returns(String) }
79
+ def text = super # rubocop:disable Lint/UselessMethodDefinition
80
+
81
+ sig { returns(T::Boolean) }
82
+ def heading? = true
83
+
84
+ sig { returns(T::Boolean) }
85
+ def paragraph? = false
86
+
87
+ sig { returns(T::Boolean) }
88
+ def list? = false
89
+
90
+ sig { returns(T::Boolean) }
91
+ def list_item? = false
92
+
93
+ sig { returns(T::Boolean) }
94
+ def table? = false
95
+
96
+ sig { returns(T::Boolean) }
97
+ def image? = false
98
+
99
+ sig { returns(T::Boolean) }
100
+ def code? = false
101
+
102
+ sig { returns(T::Boolean) }
103
+ def quote? = false
104
+
105
+ sig { returns(T::Boolean) }
106
+ def definition_list? = false
107
+
108
+ sig { returns(T::Boolean) }
109
+ def definition_item? = false
110
+
111
+ sig { returns(T::Boolean) }
112
+ def raw_block? = false
113
+
114
+ sig { returns(T::Boolean) }
115
+ def metadata_block? = false
116
+
117
+ sig { returns(T::Boolean) }
118
+ def group? = false
119
+
120
+ # @param hash [Hash] deserialized from the native extension
121
+ # @return [self]
122
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
123
+ def self.from_hash(hash)
124
+ new(level: hash[:level] || hash["level"], text: hash[:text] || hash["text"])
125
+ end
126
+ end
127
+
128
+ # A paragraph of text.
129
+ NodeContentParagraph = Data.define(:text) do
130
+ include NodeContent
131
+ extend T::Sig
132
+
133
+ # The paragraph text content.
134
+ sig { returns(String) }
135
+ def text = super # rubocop:disable Lint/UselessMethodDefinition
136
+
137
+ sig { returns(T::Boolean) }
138
+ def heading? = false
139
+
140
+ sig { returns(T::Boolean) }
141
+ def paragraph? = true
142
+
143
+ sig { returns(T::Boolean) }
144
+ def list? = false
145
+
146
+ sig { returns(T::Boolean) }
147
+ def list_item? = false
148
+
149
+ sig { returns(T::Boolean) }
150
+ def table? = false
151
+
152
+ sig { returns(T::Boolean) }
153
+ def image? = false
154
+
155
+ sig { returns(T::Boolean) }
156
+ def code? = false
157
+
158
+ sig { returns(T::Boolean) }
159
+ def quote? = false
160
+
161
+ sig { returns(T::Boolean) }
162
+ def definition_list? = false
163
+
164
+ sig { returns(T::Boolean) }
165
+ def definition_item? = false
166
+
167
+ sig { returns(T::Boolean) }
168
+ def raw_block? = false
169
+
170
+ sig { returns(T::Boolean) }
171
+ def metadata_block? = false
172
+
173
+ sig { returns(T::Boolean) }
174
+ def group? = false
175
+
176
+ # @param hash [Hash] deserialized from the native extension
177
+ # @return [self]
178
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
179
+ def self.from_hash(hash)
180
+ new(text: hash[:text] || hash["text"])
181
+ end
182
+ end
183
+
184
+ # A list container (ordered or unordered). Children are `ListItem` nodes.
185
+ NodeContentList = Data.define(:ordered) do
186
+ include NodeContent
187
+ extend T::Sig
188
+
189
+ # Whether this is an ordered list.
190
+ sig { returns(T::Boolean) }
191
+ def ordered = super # rubocop:disable Lint/UselessMethodDefinition
192
+
193
+ sig { returns(T::Boolean) }
194
+ def heading? = false
195
+
196
+ sig { returns(T::Boolean) }
197
+ def paragraph? = false
198
+
199
+ sig { returns(T::Boolean) }
200
+ def list? = true
201
+
202
+ sig { returns(T::Boolean) }
203
+ def list_item? = false
204
+
205
+ sig { returns(T::Boolean) }
206
+ def table? = false
207
+
208
+ sig { returns(T::Boolean) }
209
+ def image? = false
210
+
211
+ sig { returns(T::Boolean) }
212
+ def code? = false
213
+
214
+ sig { returns(T::Boolean) }
215
+ def quote? = false
216
+
217
+ sig { returns(T::Boolean) }
218
+ def definition_list? = false
219
+
220
+ sig { returns(T::Boolean) }
221
+ def definition_item? = false
222
+
223
+ sig { returns(T::Boolean) }
224
+ def raw_block? = false
225
+
226
+ sig { returns(T::Boolean) }
227
+ def metadata_block? = false
228
+
229
+ sig { returns(T::Boolean) }
230
+ def group? = false
231
+
232
+ # @param hash [Hash] deserialized from the native extension
233
+ # @return [self]
234
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
235
+ def self.from_hash(hash)
236
+ new(ordered: hash[:ordered] || hash["ordered"])
237
+ end
14
238
  end
15
239
 
16
- # Re-export all constants (classes, structs, etc.) from the native extension
17
- HtmlToMarkdownRs.constants.each do |c|
18
- const_set(c, HtmlToMarkdownRs.const_get(c)) unless const_defined?(c)
240
+ # A single list item.
241
+ NodeContentListItem = Data.define(:text) do
242
+ include NodeContent
243
+ extend T::Sig
244
+
245
+ # The list item text content.
246
+ sig { returns(String) }
247
+ def text = super # rubocop:disable Lint/UselessMethodDefinition
248
+
249
+ sig { returns(T::Boolean) }
250
+ def heading? = false
251
+
252
+ sig { returns(T::Boolean) }
253
+ def paragraph? = false
254
+
255
+ sig { returns(T::Boolean) }
256
+ def list? = false
257
+
258
+ sig { returns(T::Boolean) }
259
+ def list_item? = true
260
+
261
+ sig { returns(T::Boolean) }
262
+ def table? = false
263
+
264
+ sig { returns(T::Boolean) }
265
+ def image? = false
266
+
267
+ sig { returns(T::Boolean) }
268
+ def code? = false
269
+
270
+ sig { returns(T::Boolean) }
271
+ def quote? = false
272
+
273
+ sig { returns(T::Boolean) }
274
+ def definition_list? = false
275
+
276
+ sig { returns(T::Boolean) }
277
+ def definition_item? = false
278
+
279
+ sig { returns(T::Boolean) }
280
+ def raw_block? = false
281
+
282
+ sig { returns(T::Boolean) }
283
+ def metadata_block? = false
284
+
285
+ sig { returns(T::Boolean) }
286
+ def group? = false
287
+
288
+ # @param hash [Hash] deserialized from the native extension
289
+ # @return [self]
290
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
291
+ def self.from_hash(hash)
292
+ new(text: hash[:text] || hash["text"])
293
+ end
294
+ end
295
+
296
+ # A table with structured cell data.
297
+ NodeContentTable = Data.define(:grid) do
298
+ include NodeContent
299
+ extend T::Sig
300
+
301
+ # The table grid structure.
302
+ sig { returns(TableGrid) }
303
+ def grid = super # rubocop:disable Lint/UselessMethodDefinition
304
+
305
+ sig { returns(T::Boolean) }
306
+ def heading? = false
307
+
308
+ sig { returns(T::Boolean) }
309
+ def paragraph? = false
310
+
311
+ sig { returns(T::Boolean) }
312
+ def list? = false
313
+
314
+ sig { returns(T::Boolean) }
315
+ def list_item? = false
316
+
317
+ sig { returns(T::Boolean) }
318
+ def table? = true
319
+
320
+ sig { returns(T::Boolean) }
321
+ def image? = false
322
+
323
+ sig { returns(T::Boolean) }
324
+ def code? = false
325
+
326
+ sig { returns(T::Boolean) }
327
+ def quote? = false
328
+
329
+ sig { returns(T::Boolean) }
330
+ def definition_list? = false
331
+
332
+ sig { returns(T::Boolean) }
333
+ def definition_item? = false
334
+
335
+ sig { returns(T::Boolean) }
336
+ def raw_block? = false
337
+
338
+ sig { returns(T::Boolean) }
339
+ def metadata_block? = false
340
+
341
+ sig { returns(T::Boolean) }
342
+ def group? = false
343
+
344
+ # @param hash [Hash] deserialized from the native extension
345
+ # @return [self]
346
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
347
+ def self.from_hash(hash)
348
+ new(grid: hash[:grid] || hash["grid"])
349
+ end
350
+ end
351
+
352
+ # An image element.
353
+ NodeContentImage = Data.define(:description, :src, :image_index) do
354
+ include NodeContent
355
+ extend T::Sig
356
+
357
+ # Alt text or caption.
358
+ sig { returns(T.nilable(String)) }
359
+ def description = super # rubocop:disable Lint/UselessMethodDefinition
360
+
361
+ # Image source URL.
362
+ sig { returns(T.nilable(String)) }
363
+ def src = super # rubocop:disable Lint/UselessMethodDefinition
364
+
365
+ # Index into `ConversionResult.images` when image extraction is enabled.
366
+ sig { returns(T.nilable(Integer)) }
367
+ def image_index = super # rubocop:disable Lint/UselessMethodDefinition
368
+
369
+ sig { returns(T::Boolean) }
370
+ def heading? = false
371
+
372
+ sig { returns(T::Boolean) }
373
+ def paragraph? = false
374
+
375
+ sig { returns(T::Boolean) }
376
+ def list? = false
377
+
378
+ sig { returns(T::Boolean) }
379
+ def list_item? = false
380
+
381
+ sig { returns(T::Boolean) }
382
+ def table? = false
383
+
384
+ sig { returns(T::Boolean) }
385
+ def image? = true
386
+
387
+ sig { returns(T::Boolean) }
388
+ def code? = false
389
+
390
+ sig { returns(T::Boolean) }
391
+ def quote? = false
392
+
393
+ sig { returns(T::Boolean) }
394
+ def definition_list? = false
395
+
396
+ sig { returns(T::Boolean) }
397
+ def definition_item? = false
398
+
399
+ sig { returns(T::Boolean) }
400
+ def raw_block? = false
401
+
402
+ sig { returns(T::Boolean) }
403
+ def metadata_block? = false
404
+
405
+ sig { returns(T::Boolean) }
406
+ def group? = false
407
+
408
+ # @param hash [Hash] deserialized from the native extension
409
+ # @return [self]
410
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
411
+ def self.from_hash(hash)
412
+ new(description: hash[:description] || hash["description"], src: hash[:src] || hash["src"], image_index: hash[:image_index] || hash["image_index"])
413
+ end
414
+ end
415
+
416
+ # A code block or inline code.
417
+ NodeContentCode = Data.define(:text, :language) do
418
+ include NodeContent
419
+ extend T::Sig
420
+
421
+ # The code text content.
422
+ sig { returns(String) }
423
+ def text = super # rubocop:disable Lint/UselessMethodDefinition
424
+
425
+ # Programming language (from class="language-*" or similar).
426
+ sig { returns(T.nilable(String)) }
427
+ def language = super # rubocop:disable Lint/UselessMethodDefinition
428
+
429
+ sig { returns(T::Boolean) }
430
+ def heading? = false
431
+
432
+ sig { returns(T::Boolean) }
433
+ def paragraph? = false
434
+
435
+ sig { returns(T::Boolean) }
436
+ def list? = false
437
+
438
+ sig { returns(T::Boolean) }
439
+ def list_item? = false
440
+
441
+ sig { returns(T::Boolean) }
442
+ def table? = false
443
+
444
+ sig { returns(T::Boolean) }
445
+ def image? = false
446
+
447
+ sig { returns(T::Boolean) }
448
+ def code? = true
449
+
450
+ sig { returns(T::Boolean) }
451
+ def quote? = false
452
+
453
+ sig { returns(T::Boolean) }
454
+ def definition_list? = false
455
+
456
+ sig { returns(T::Boolean) }
457
+ def definition_item? = false
458
+
459
+ sig { returns(T::Boolean) }
460
+ def raw_block? = false
461
+
462
+ sig { returns(T::Boolean) }
463
+ def metadata_block? = false
464
+
465
+ sig { returns(T::Boolean) }
466
+ def group? = false
467
+
468
+ # @param hash [Hash] deserialized from the native extension
469
+ # @return [self]
470
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
471
+ def self.from_hash(hash)
472
+ new(text: hash[:text] || hash["text"], language: hash[:language] || hash["language"])
473
+ end
474
+ end
475
+
476
+ # A block quote container.
477
+ NodeContentQuote = Data.define do
478
+ include NodeContent
479
+ extend T::Sig
480
+
481
+ sig { returns(T::Boolean) }
482
+ def heading? = false
483
+
484
+ sig { returns(T::Boolean) }
485
+ def paragraph? = false
486
+
487
+ sig { returns(T::Boolean) }
488
+ def list? = false
489
+
490
+ sig { returns(T::Boolean) }
491
+ def list_item? = false
492
+
493
+ sig { returns(T::Boolean) }
494
+ def table? = false
495
+
496
+ sig { returns(T::Boolean) }
497
+ def image? = false
498
+
499
+ sig { returns(T::Boolean) }
500
+ def code? = false
501
+
502
+ sig { returns(T::Boolean) }
503
+ def quote? = true
504
+
505
+ sig { returns(T::Boolean) }
506
+ def definition_list? = false
507
+
508
+ sig { returns(T::Boolean) }
509
+ def definition_item? = false
510
+
511
+ sig { returns(T::Boolean) }
512
+ def raw_block? = false
513
+
514
+ sig { returns(T::Boolean) }
515
+ def metadata_block? = false
516
+
517
+ sig { returns(T::Boolean) }
518
+ def group? = false
519
+
520
+ # @param hash [Hash] deserialized from the native extension
521
+ # @return [self]
522
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
523
+ def self.from_hash(hash)
524
+ new
525
+ end
526
+ end
527
+
528
+ # A definition list container.
529
+ NodeContentDefinitionList = Data.define do
530
+ include NodeContent
531
+ extend T::Sig
532
+
533
+ sig { returns(T::Boolean) }
534
+ def heading? = false
535
+
536
+ sig { returns(T::Boolean) }
537
+ def paragraph? = false
538
+
539
+ sig { returns(T::Boolean) }
540
+ def list? = false
541
+
542
+ sig { returns(T::Boolean) }
543
+ def list_item? = false
544
+
545
+ sig { returns(T::Boolean) }
546
+ def table? = false
547
+
548
+ sig { returns(T::Boolean) }
549
+ def image? = false
550
+
551
+ sig { returns(T::Boolean) }
552
+ def code? = false
553
+
554
+ sig { returns(T::Boolean) }
555
+ def quote? = false
556
+
557
+ sig { returns(T::Boolean) }
558
+ def definition_list? = true
559
+
560
+ sig { returns(T::Boolean) }
561
+ def definition_item? = false
562
+
563
+ sig { returns(T::Boolean) }
564
+ def raw_block? = false
565
+
566
+ sig { returns(T::Boolean) }
567
+ def metadata_block? = false
568
+
569
+ sig { returns(T::Boolean) }
570
+ def group? = false
571
+
572
+ # @param hash [Hash] deserialized from the native extension
573
+ # @return [self]
574
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
575
+ def self.from_hash(hash)
576
+ new
577
+ end
578
+ end
579
+
580
+ # A definition list entry with term and description.
581
+ NodeContentDefinitionItem = Data.define(:term, :definition) do
582
+ include NodeContent
583
+ extend T::Sig
584
+
585
+ # The term being defined.
586
+ sig { returns(String) }
587
+ def term = super # rubocop:disable Lint/UselessMethodDefinition
588
+
589
+ # The definition text.
590
+ sig { returns(String) }
591
+ def definition = super # rubocop:disable Lint/UselessMethodDefinition
592
+
593
+ sig { returns(T::Boolean) }
594
+ def heading? = false
595
+
596
+ sig { returns(T::Boolean) }
597
+ def paragraph? = false
598
+
599
+ sig { returns(T::Boolean) }
600
+ def list? = false
601
+
602
+ sig { returns(T::Boolean) }
603
+ def list_item? = false
604
+
605
+ sig { returns(T::Boolean) }
606
+ def table? = false
607
+
608
+ sig { returns(T::Boolean) }
609
+ def image? = false
610
+
611
+ sig { returns(T::Boolean) }
612
+ def code? = false
613
+
614
+ sig { returns(T::Boolean) }
615
+ def quote? = false
616
+
617
+ sig { returns(T::Boolean) }
618
+ def definition_list? = false
619
+
620
+ sig { returns(T::Boolean) }
621
+ def definition_item? = true
622
+
623
+ sig { returns(T::Boolean) }
624
+ def raw_block? = false
625
+
626
+ sig { returns(T::Boolean) }
627
+ def metadata_block? = false
628
+
629
+ sig { returns(T::Boolean) }
630
+ def group? = false
631
+
632
+ # @param hash [Hash] deserialized from the native extension
633
+ # @return [self]
634
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
635
+ def self.from_hash(hash)
636
+ new(term: hash[:term] || hash["term"], definition: hash[:definition] || hash["definition"])
637
+ end
638
+ end
639
+
640
+ # A raw block preserved as-is (e.g. `<script>`, `<style>` content).
641
+ NodeContentRawBlock = Data.define(:format, :content) do
642
+ include NodeContent
643
+ extend T::Sig
644
+
645
+ # The format of the raw content (e.g. "html", "css", "javascript").
646
+ sig { returns(String) }
647
+ def format = super # rubocop:disable Lint/UselessMethodDefinition
648
+
649
+ # The raw content.
650
+ sig { returns(String) }
651
+ def content = super # rubocop:disable Lint/UselessMethodDefinition
652
+
653
+ sig { returns(T::Boolean) }
654
+ def heading? = false
655
+
656
+ sig { returns(T::Boolean) }
657
+ def paragraph? = false
658
+
659
+ sig { returns(T::Boolean) }
660
+ def list? = false
661
+
662
+ sig { returns(T::Boolean) }
663
+ def list_item? = false
664
+
665
+ sig { returns(T::Boolean) }
666
+ def table? = false
667
+
668
+ sig { returns(T::Boolean) }
669
+ def image? = false
670
+
671
+ sig { returns(T::Boolean) }
672
+ def code? = false
673
+
674
+ sig { returns(T::Boolean) }
675
+ def quote? = false
676
+
677
+ sig { returns(T::Boolean) }
678
+ def definition_list? = false
679
+
680
+ sig { returns(T::Boolean) }
681
+ def definition_item? = false
682
+
683
+ sig { returns(T::Boolean) }
684
+ def raw_block? = true
685
+
686
+ sig { returns(T::Boolean) }
687
+ def metadata_block? = false
688
+
689
+ sig { returns(T::Boolean) }
690
+ def group? = false
691
+
692
+ # @param hash [Hash] deserialized from the native extension
693
+ # @return [self]
694
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
695
+ def self.from_hash(hash)
696
+ new(format: hash[:format] || hash["format"], content: hash[:content] || hash["content"])
697
+ end
698
+ end
699
+
700
+ # A block of key-value metadata pairs (from `<head>` meta tags).
701
+ NodeContentMetadataBlock = Data.define(:entries) do
702
+ include NodeContent
703
+ extend T::Sig
704
+
705
+ # Key-value metadata pairs.
706
+ sig { returns(T::Array[T::Array[String]]) }
707
+ def entries = super # rubocop:disable Lint/UselessMethodDefinition
708
+
709
+ sig { returns(T::Boolean) }
710
+ def heading? = false
711
+
712
+ sig { returns(T::Boolean) }
713
+ def paragraph? = false
714
+
715
+ sig { returns(T::Boolean) }
716
+ def list? = false
717
+
718
+ sig { returns(T::Boolean) }
719
+ def list_item? = false
720
+
721
+ sig { returns(T::Boolean) }
722
+ def table? = false
723
+
724
+ sig { returns(T::Boolean) }
725
+ def image? = false
726
+
727
+ sig { returns(T::Boolean) }
728
+ def code? = false
729
+
730
+ sig { returns(T::Boolean) }
731
+ def quote? = false
732
+
733
+ sig { returns(T::Boolean) }
734
+ def definition_list? = false
735
+
736
+ sig { returns(T::Boolean) }
737
+ def definition_item? = false
738
+
739
+ sig { returns(T::Boolean) }
740
+ def raw_block? = false
741
+
742
+ sig { returns(T::Boolean) }
743
+ def metadata_block? = true
744
+
745
+ sig { returns(T::Boolean) }
746
+ def group? = false
747
+
748
+ # @param hash [Hash] deserialized from the native extension
749
+ # @return [self]
750
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
751
+ def self.from_hash(hash)
752
+ new(entries: hash[:entries] || hash["entries"])
753
+ end
754
+ end
755
+
756
+ # A section grouping container (auto-generated from heading hierarchy).
757
+ NodeContentGroup = Data.define(:label, :heading_level, :heading_text) do
758
+ include NodeContent
759
+ extend T::Sig
760
+
761
+ # Optional section label.
762
+ sig { returns(T.nilable(String)) }
763
+ def label = super # rubocop:disable Lint/UselessMethodDefinition
764
+
765
+ # The heading level that created this group.
766
+ sig { returns(T.nilable(Integer)) }
767
+ def heading_level = super # rubocop:disable Lint/UselessMethodDefinition
768
+
769
+ # The heading text that created this group.
770
+ sig { returns(T.nilable(String)) }
771
+ def heading_text = super # rubocop:disable Lint/UselessMethodDefinition
772
+
773
+ sig { returns(T::Boolean) }
774
+ def heading? = false
775
+
776
+ sig { returns(T::Boolean) }
777
+ def paragraph? = false
778
+
779
+ sig { returns(T::Boolean) }
780
+ def list? = false
781
+
782
+ sig { returns(T::Boolean) }
783
+ def list_item? = false
784
+
785
+ sig { returns(T::Boolean) }
786
+ def table? = false
787
+
788
+ sig { returns(T::Boolean) }
789
+ def image? = false
790
+
791
+ sig { returns(T::Boolean) }
792
+ def code? = false
793
+
794
+ sig { returns(T::Boolean) }
795
+ def quote? = false
796
+
797
+ sig { returns(T::Boolean) }
798
+ def definition_list? = false
799
+
800
+ sig { returns(T::Boolean) }
801
+ def definition_item? = false
802
+
803
+ sig { returns(T::Boolean) }
804
+ def raw_block? = false
805
+
806
+ sig { returns(T::Boolean) }
807
+ def metadata_block? = false
808
+
809
+ sig { returns(T::Boolean) }
810
+ def group? = true
811
+
812
+ # @param hash [Hash] deserialized from the native extension
813
+ # @return [self]
814
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
815
+ def self.from_hash(hash)
816
+ new(label: hash[:label] || hash["label"], heading_level: hash[:heading_level] || hash["heading_level"], heading_text: hash[:heading_text] || hash["heading_text"])
817
+ end
19
818
  end
20
819
  end
21
820
 
22
- # Add accessor methods to Hash-based internally-tagged enum instances
23
- class Hash
24
- # Support internally-tagged enum accessors like format.excel, format.email, etc.
25
- # Also support direct field access like format.sheet_count
26
- # rubocop:disable Metrics/CyclomaticComplexity
27
- def method_missing(method_name, *args, &block)
28
- # Try symbol key first (how Magnus converts JSON keys)
29
- return self[method_name] if key?(method_name)
821
+ module HtmlToMarkdown
822
+ # The type of an inline text annotation.
823
+ #
824
+ # Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
825
+ module AnnotationKind
826
+ extend T::Helpers
827
+ extend T::Sig
828
+
829
+ interface!
830
+
831
+ # Dispatch from a Hash to the appropriate variant constructor.
832
+ # @param hash [Hash] with discriminator field and variant-specific fields
833
+ # @return [variant_class] an instance of the appropriate variant
834
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) }
835
+ def self.from_hash(hash)
836
+ discriminator = hash[:annotation_type] || hash["annotation_type"]
837
+ case discriminator
838
+ when "bold" then AnnotationKindBold.from_hash(hash)
839
+ when "italic" then AnnotationKindItalic.from_hash(hash)
840
+ when "underline" then AnnotationKindUnderline.from_hash(hash)
841
+ when "strikethrough" then AnnotationKindStrikethrough.from_hash(hash)
842
+ when "code" then AnnotationKindCode.from_hash(hash)
843
+ when "subscript" then AnnotationKindSubscript.from_hash(hash)
844
+ when "superscript" then AnnotationKindSuperscript.from_hash(hash)
845
+ when "highlight" then AnnotationKindHighlight.from_hash(hash)
846
+ when "link" then AnnotationKindLink.from_hash(hash)
847
+ else raise "Unknown discriminator: #{discriminator}"
848
+ end
849
+ end
850
+ end
851
+
852
+ # Bold / strong emphasis.
853
+ AnnotationKindBold = Data.define do
854
+ include AnnotationKind
855
+ extend T::Sig
856
+
857
+ sig { returns(T::Boolean) }
858
+ def bold? = true
859
+
860
+ sig { returns(T::Boolean) }
861
+ def italic? = false
862
+
863
+ sig { returns(T::Boolean) }
864
+ def underline? = false
865
+
866
+ sig { returns(T::Boolean) }
867
+ def strikethrough? = false
868
+
869
+ sig { returns(T::Boolean) }
870
+ def code? = false
871
+
872
+ sig { returns(T::Boolean) }
873
+ def subscript? = false
874
+
875
+ sig { returns(T::Boolean) }
876
+ def superscript? = false
877
+
878
+ sig { returns(T::Boolean) }
879
+ def highlight? = false
880
+
881
+ sig { returns(T::Boolean) }
882
+ def link? = false
883
+
884
+ # @param hash [Hash] deserialized from the native extension
885
+ # @return [self]
886
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
887
+ def self.from_hash(hash)
888
+ new
889
+ end
890
+ end
891
+
892
+ # Italic / emphasis.
893
+ AnnotationKindItalic = Data.define do
894
+ include AnnotationKind
895
+ extend T::Sig
896
+
897
+ sig { returns(T::Boolean) }
898
+ def bold? = false
899
+
900
+ sig { returns(T::Boolean) }
901
+ def italic? = true
902
+
903
+ sig { returns(T::Boolean) }
904
+ def underline? = false
905
+
906
+ sig { returns(T::Boolean) }
907
+ def strikethrough? = false
908
+
909
+ sig { returns(T::Boolean) }
910
+ def code? = false
911
+
912
+ sig { returns(T::Boolean) }
913
+ def subscript? = false
30
914
 
31
- # Try string key
32
- return self[method_name.to_s] if key?(method_name.to_s)
915
+ sig { returns(T::Boolean) }
916
+ def superscript? = false
33
917
 
34
- # Check if this hash has a 'format_type' field (indicating an internally-tagged enum)
35
- format_type = self[:'format_type'] || self['format_type']
36
- return super unless format_type
918
+ sig { returns(T::Boolean) }
919
+ def highlight? = false
37
920
 
38
- # If the method name matches the format_type (snake_case), extract and return the variant's wrapped data
39
- # Internally-tagged enums store variant data in the '_0' field (from alef's struct variant conversion)
40
- # This allows format.excel to return the ExcelMetadata hash with sheet_count, sheet_names, etc.
41
- snake_case_method = method_name.to_s.downcase
42
- if snake_case_method == format_type.to_s.downcase
43
- return self[:'_0'] || self['_0'] || self
921
+ sig { returns(T::Boolean) }
922
+ def link? = false
923
+
924
+ # @param hash [Hash] deserialized from the native extension
925
+ # @return [self]
926
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
927
+ def self.from_hash(hash)
928
+ new
44
929
  end
930
+ end
931
+
932
+ # Underline.
933
+ AnnotationKindUnderline = Data.define do
934
+ include AnnotationKind
935
+ extend T::Sig
936
+
937
+ sig { returns(T::Boolean) }
938
+ def bold? = false
939
+
940
+ sig { returns(T::Boolean) }
941
+ def italic? = false
942
+
943
+ sig { returns(T::Boolean) }
944
+ def underline? = true
945
+
946
+ sig { returns(T::Boolean) }
947
+ def strikethrough? = false
948
+
949
+ sig { returns(T::Boolean) }
950
+ def code? = false
951
+
952
+ sig { returns(T::Boolean) }
953
+ def subscript? = false
954
+
955
+ sig { returns(T::Boolean) }
956
+ def superscript? = false
957
+
958
+ sig { returns(T::Boolean) }
959
+ def highlight? = false
960
+
961
+ sig { returns(T::Boolean) }
962
+ def link? = false
45
963
 
46
- super
964
+ # @param hash [Hash] deserialized from the native extension
965
+ # @return [self]
966
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
967
+ def self.from_hash(hash)
968
+ new
969
+ end
47
970
  end
48
- # rubocop:enable Metrics/CyclomaticComplexity
49
971
 
50
- def respond_to_missing?(method_name, include_private = false)
51
- return true if key?(method_name) || key?(method_name.to_s)
972
+ # Strikethrough / deleted text.
973
+ AnnotationKindStrikethrough = Data.define do
974
+ include AnnotationKind
975
+ extend T::Sig
976
+
977
+ sig { returns(T::Boolean) }
978
+ def bold? = false
979
+
980
+ sig { returns(T::Boolean) }
981
+ def italic? = false
982
+
983
+ sig { returns(T::Boolean) }
984
+ def underline? = false
985
+
986
+ sig { returns(T::Boolean) }
987
+ def strikethrough? = true
988
+
989
+ sig { returns(T::Boolean) }
990
+ def code? = false
991
+
992
+ sig { returns(T::Boolean) }
993
+ def subscript? = false
994
+
995
+ sig { returns(T::Boolean) }
996
+ def superscript? = false
997
+
998
+ sig { returns(T::Boolean) }
999
+ def highlight? = false
1000
+
1001
+ sig { returns(T::Boolean) }
1002
+ def link? = false
1003
+
1004
+ # @param hash [Hash] deserialized from the native extension
1005
+ # @return [self]
1006
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
1007
+ def self.from_hash(hash)
1008
+ new
1009
+ end
1010
+ end
1011
+
1012
+ # Inline code.
1013
+ AnnotationKindCode = Data.define do
1014
+ include AnnotationKind
1015
+ extend T::Sig
1016
+
1017
+ sig { returns(T::Boolean) }
1018
+ def bold? = false
1019
+
1020
+ sig { returns(T::Boolean) }
1021
+ def italic? = false
1022
+
1023
+ sig { returns(T::Boolean) }
1024
+ def underline? = false
1025
+
1026
+ sig { returns(T::Boolean) }
1027
+ def strikethrough? = false
1028
+
1029
+ sig { returns(T::Boolean) }
1030
+ def code? = true
1031
+
1032
+ sig { returns(T::Boolean) }
1033
+ def subscript? = false
1034
+
1035
+ sig { returns(T::Boolean) }
1036
+ def superscript? = false
1037
+
1038
+ sig { returns(T::Boolean) }
1039
+ def highlight? = false
1040
+
1041
+ sig { returns(T::Boolean) }
1042
+ def link? = false
1043
+
1044
+ # @param hash [Hash] deserialized from the native extension
1045
+ # @return [self]
1046
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
1047
+ def self.from_hash(hash)
1048
+ new
1049
+ end
1050
+ end
1051
+
1052
+ # Subscript text.
1053
+ AnnotationKindSubscript = Data.define do
1054
+ include AnnotationKind
1055
+ extend T::Sig
1056
+
1057
+ sig { returns(T::Boolean) }
1058
+ def bold? = false
1059
+
1060
+ sig { returns(T::Boolean) }
1061
+ def italic? = false
1062
+
1063
+ sig { returns(T::Boolean) }
1064
+ def underline? = false
1065
+
1066
+ sig { returns(T::Boolean) }
1067
+ def strikethrough? = false
1068
+
1069
+ sig { returns(T::Boolean) }
1070
+ def code? = false
1071
+
1072
+ sig { returns(T::Boolean) }
1073
+ def subscript? = true
52
1074
 
53
- format_type = self[:'format_type'] || self['format_type']
54
- return false unless format_type
1075
+ sig { returns(T::Boolean) }
1076
+ def superscript? = false
55
1077
 
56
- snake_case_method = method_name.to_s.downcase
57
- snake_case_method == format_type.to_s.downcase || super
1078
+ sig { returns(T::Boolean) }
1079
+ def highlight? = false
1080
+
1081
+ sig { returns(T::Boolean) }
1082
+ def link? = false
1083
+
1084
+ # @param hash [Hash] deserialized from the native extension
1085
+ # @return [self]
1086
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
1087
+ def self.from_hash(hash)
1088
+ new
1089
+ end
1090
+ end
1091
+
1092
+ # Superscript text.
1093
+ AnnotationKindSuperscript = Data.define do
1094
+ include AnnotationKind
1095
+ extend T::Sig
1096
+
1097
+ sig { returns(T::Boolean) }
1098
+ def bold? = false
1099
+
1100
+ sig { returns(T::Boolean) }
1101
+ def italic? = false
1102
+
1103
+ sig { returns(T::Boolean) }
1104
+ def underline? = false
1105
+
1106
+ sig { returns(T::Boolean) }
1107
+ def strikethrough? = false
1108
+
1109
+ sig { returns(T::Boolean) }
1110
+ def code? = false
1111
+
1112
+ sig { returns(T::Boolean) }
1113
+ def subscript? = false
1114
+
1115
+ sig { returns(T::Boolean) }
1116
+ def superscript? = true
1117
+
1118
+ sig { returns(T::Boolean) }
1119
+ def highlight? = false
1120
+
1121
+ sig { returns(T::Boolean) }
1122
+ def link? = false
1123
+
1124
+ # @param hash [Hash] deserialized from the native extension
1125
+ # @return [self]
1126
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
1127
+ def self.from_hash(hash)
1128
+ new
1129
+ end
1130
+ end
1131
+
1132
+ # Highlighted / marked text.
1133
+ AnnotationKindHighlight = Data.define do
1134
+ include AnnotationKind
1135
+ extend T::Sig
1136
+
1137
+ sig { returns(T::Boolean) }
1138
+ def bold? = false
1139
+
1140
+ sig { returns(T::Boolean) }
1141
+ def italic? = false
1142
+
1143
+ sig { returns(T::Boolean) }
1144
+ def underline? = false
1145
+
1146
+ sig { returns(T::Boolean) }
1147
+ def strikethrough? = false
1148
+
1149
+ sig { returns(T::Boolean) }
1150
+ def code? = false
1151
+
1152
+ sig { returns(T::Boolean) }
1153
+ def subscript? = false
1154
+
1155
+ sig { returns(T::Boolean) }
1156
+ def superscript? = false
1157
+
1158
+ sig { returns(T::Boolean) }
1159
+ def highlight? = true
1160
+
1161
+ sig { returns(T::Boolean) }
1162
+ def link? = false
1163
+
1164
+ # @param hash [Hash] deserialized from the native extension
1165
+ # @return [self]
1166
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
1167
+ def self.from_hash(hash)
1168
+ new
1169
+ end
1170
+ end
1171
+
1172
+ # A hyperlink sourced from an `<a href="...">` element.
1173
+ AnnotationKindLink = Data.define(:url, :title) do
1174
+ include AnnotationKind
1175
+ extend T::Sig
1176
+
1177
+ # The URL from the `href` attribute, copied verbatim from the source HTML.
1178
+ #
1179
+ # No URL decoding or normalization is performed: percent-encoded sequences, relative
1180
+ # paths, and protocol-relative URLs (`//example.com`) are all preserved exactly as
1181
+ # written in the source. Callers that need an absolute URL must resolve it against the
1182
+ # document base URL themselves.
1183
+ sig { returns(String) }
1184
+ def url = super # rubocop:disable Lint/UselessMethodDefinition
1185
+
1186
+ # The `title` attribute of the `<a>` element, if present.
1187
+ #
1188
+ # `None` when the `<a>` tag has no `title="..."` attribute. When present, the value
1189
+ # is copied verbatim — HTML entities within the title are not decoded.
1190
+ sig { returns(T.nilable(String)) }
1191
+ def title = super # rubocop:disable Lint/UselessMethodDefinition
1192
+
1193
+ sig { returns(T::Boolean) }
1194
+ def bold? = false
1195
+
1196
+ sig { returns(T::Boolean) }
1197
+ def italic? = false
1198
+
1199
+ sig { returns(T::Boolean) }
1200
+ def underline? = false
1201
+
1202
+ sig { returns(T::Boolean) }
1203
+ def strikethrough? = false
1204
+
1205
+ sig { returns(T::Boolean) }
1206
+ def code? = false
1207
+
1208
+ sig { returns(T::Boolean) }
1209
+ def subscript? = false
1210
+
1211
+ sig { returns(T::Boolean) }
1212
+ def superscript? = false
1213
+
1214
+ sig { returns(T::Boolean) }
1215
+ def highlight? = false
1216
+
1217
+ sig { returns(T::Boolean) }
1218
+ def link? = true
1219
+
1220
+ # @param hash [Hash] deserialized from the native extension
1221
+ # @return [self]
1222
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
1223
+ def self.from_hash(hash)
1224
+ new(url: hash[:url] || hash["url"], title: hash[:title] || hash["title"])
1225
+ end
58
1226
  end
59
1227
  end