coradoc-html 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/html/cleaner.rb +128 -0
  3. data/lib/coradoc/html/converters/a.rb +77 -0
  4. data/lib/coradoc/html/converters/aside.rb +20 -0
  5. data/lib/coradoc/html/converters/audio.rb +19 -0
  6. data/lib/coradoc/html/converters/base.rb +98 -0
  7. data/lib/coradoc/html/converters/blockquote.rb +25 -0
  8. data/lib/coradoc/html/converters/br.rb +17 -0
  9. data/lib/coradoc/html/converters/bypass.rb +82 -0
  10. data/lib/coradoc/html/converters/code.rb +25 -0
  11. data/lib/coradoc/html/converters/div.rb +23 -0
  12. data/lib/coradoc/html/converters/dl.rb +82 -0
  13. data/lib/coradoc/html/converters/drop.rb +26 -0
  14. data/lib/coradoc/html/converters/em.rb +23 -0
  15. data/lib/coradoc/html/converters/figure.rb +33 -0
  16. data/lib/coradoc/html/converters/h.rb +58 -0
  17. data/lib/coradoc/html/converters/head.rb +29 -0
  18. data/lib/coradoc/html/converters/hr.rb +17 -0
  19. data/lib/coradoc/html/converters/img.rb +103 -0
  20. data/lib/coradoc/html/converters/li.rb +35 -0
  21. data/lib/coradoc/html/converters/mark.rb +21 -0
  22. data/lib/coradoc/html/converters/markup.rb +93 -0
  23. data/lib/coradoc/html/converters/math.rb +37 -0
  24. data/lib/coradoc/html/converters/media_base.rb +48 -0
  25. data/lib/coradoc/html/converters/ol.rb +42 -0
  26. data/lib/coradoc/html/converters/p.rb +64 -0
  27. data/lib/coradoc/html/converters/pass_through.rb +15 -0
  28. data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
  29. data/lib/coradoc/html/converters/pre.rb +57 -0
  30. data/lib/coradoc/html/converters/q.rb +25 -0
  31. data/lib/coradoc/html/converters/strong.rb +22 -0
  32. data/lib/coradoc/html/converters/sub.rb +20 -0
  33. data/lib/coradoc/html/converters/sup.rb +20 -0
  34. data/lib/coradoc/html/converters/table.rb +64 -0
  35. data/lib/coradoc/html/converters/td.rb +42 -0
  36. data/lib/coradoc/html/converters/text.rb +66 -0
  37. data/lib/coradoc/html/converters/tr.rb +27 -0
  38. data/lib/coradoc/html/converters/video.rb +27 -0
  39. data/lib/coradoc/html/converters.rb +104 -0
  40. data/lib/coradoc/html/drop/drop_factory.rb +14 -22
  41. data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
  42. data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
  43. data/lib/coradoc/html/drop.rb +30 -8
  44. data/lib/coradoc/html/errors.rb +11 -0
  45. data/lib/coradoc/html/html_converter.rb +78 -0
  46. data/lib/coradoc/html/input_config.rb +66 -0
  47. data/lib/coradoc/html/plugin.rb +90 -0
  48. data/lib/coradoc/html/plugins/plateau.rb +212 -0
  49. data/lib/coradoc/html/postprocessor.rb +19 -0
  50. data/lib/coradoc/html/spa.rb +0 -2
  51. data/lib/coradoc/html/static.rb +0 -2
  52. data/lib/coradoc/html/tag_mapping.rb +3 -1
  53. data/lib/coradoc/html/transform/from_core_model.rb +2 -2
  54. data/lib/coradoc/html/transform/to_core_model.rb +3 -3
  55. data/lib/coradoc/html/version.rb +1 -1
  56. data/lib/coradoc/html.rb +30 -5
  57. metadata +46 -47
  58. data/lib/coradoc/html/input/cleaner.rb +0 -134
  59. data/lib/coradoc/html/input/config.rb +0 -80
  60. data/lib/coradoc/html/input/converters/a.rb +0 -79
  61. data/lib/coradoc/html/input/converters/aside.rb +0 -22
  62. data/lib/coradoc/html/input/converters/audio.rb +0 -21
  63. data/lib/coradoc/html/input/converters/base.rb +0 -118
  64. data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
  65. data/lib/coradoc/html/input/converters/br.rb +0 -19
  66. data/lib/coradoc/html/input/converters/bypass.rb +0 -84
  67. data/lib/coradoc/html/input/converters/code.rb +0 -27
  68. data/lib/coradoc/html/input/converters/div.rb +0 -25
  69. data/lib/coradoc/html/input/converters/dl.rb +0 -84
  70. data/lib/coradoc/html/input/converters/drop.rb +0 -28
  71. data/lib/coradoc/html/input/converters/em.rb +0 -25
  72. data/lib/coradoc/html/input/converters/figure.rb +0 -35
  73. data/lib/coradoc/html/input/converters/h.rb +0 -74
  74. data/lib/coradoc/html/input/converters/head.rb +0 -31
  75. data/lib/coradoc/html/input/converters/hr.rb +0 -19
  76. data/lib/coradoc/html/input/converters/img.rb +0 -105
  77. data/lib/coradoc/html/input/converters/li.rb +0 -37
  78. data/lib/coradoc/html/input/converters/mark.rb +0 -23
  79. data/lib/coradoc/html/input/converters/markup.rb +0 -103
  80. data/lib/coradoc/html/input/converters/math.rb +0 -39
  81. data/lib/coradoc/html/input/converters/media_base.rb +0 -50
  82. data/lib/coradoc/html/input/converters/ol.rb +0 -44
  83. data/lib/coradoc/html/input/converters/p.rb +0 -90
  84. data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
  85. data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
  86. data/lib/coradoc/html/input/converters/pre.rb +0 -59
  87. data/lib/coradoc/html/input/converters/q.rb +0 -27
  88. data/lib/coradoc/html/input/converters/strong.rb +0 -24
  89. data/lib/coradoc/html/input/converters/sub.rb +0 -22
  90. data/lib/coradoc/html/input/converters/sup.rb +0 -22
  91. data/lib/coradoc/html/input/converters/table.rb +0 -66
  92. data/lib/coradoc/html/input/converters/td.rb +0 -44
  93. data/lib/coradoc/html/input/converters/text.rb +0 -68
  94. data/lib/coradoc/html/input/converters/tr.rb +0 -29
  95. data/lib/coradoc/html/input/converters/video.rb +0 -29
  96. data/lib/coradoc/html/input/converters.rb +0 -107
  97. data/lib/coradoc/html/input/errors.rb +0 -22
  98. data/lib/coradoc/html/input/html_converter.rb +0 -98
  99. data/lib/coradoc/html/input/plugin.rb +0 -120
  100. data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
  101. data/lib/coradoc/html/input/postprocessor.rb +0 -25
  102. data/lib/coradoc/html/input.rb +0 -86
  103. data/lib/coradoc/html/output.rb +0 -89
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Q < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ content = treat_children_coradoc(node, state)
11
+ cite = node['cite']
12
+
13
+ Coradoc::CoreModel::InlineElement.new(
14
+ format_type: 'quotation',
15
+ nested_elements: content,
16
+ content: extract_text_from_content(content),
17
+ target: cite
18
+ )
19
+ end
20
+ end
21
+
22
+ register :q, Q::INSTANCE
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Strong < Markup
7
+ INSTANCE = new
8
+
9
+ def coradoc_format_type
10
+ 'bold'
11
+ end
12
+
13
+ def markup_ancestor_tag_names
14
+ %w[strong b]
15
+ end
16
+ end
17
+
18
+ register :strong, Strong::INSTANCE
19
+ register :b, Strong::INSTANCE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Sub < Base
7
+ INSTANCE = new
8
+ include PositionalFormatting
9
+
10
+ private
11
+
12
+ def element_class
13
+ Coradoc::CoreModel::SubscriptElement
14
+ end
15
+ end
16
+
17
+ register :sub, Sub::INSTANCE
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Sup < Base
7
+ INSTANCE = new
8
+ include PositionalFormatting
9
+
10
+ private
11
+
12
+ def element_class
13
+ Coradoc::CoreModel::SuperscriptElement
14
+ end
15
+ end
16
+
17
+ register :sup, Sup::INSTANCE
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Table < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+ title = extract_title(node)
12
+ content = treat_children_coradoc(node, state)
13
+
14
+ # Apply frame and grid attributes if available
15
+ frame_attr = frame(node)
16
+ grid_attr = rules(node)
17
+
18
+ Coradoc::CoreModel::Table.new(
19
+ title: title,
20
+ rows: content,
21
+ id: id,
22
+ frame: frame_attr,
23
+ grid: grid_attr
24
+ )
25
+ end
26
+
27
+ def extract_title(node)
28
+ title = node.at('./caption')
29
+ return nil if title.nil?
30
+
31
+ title.text.strip
32
+ end
33
+
34
+ def frame(node)
35
+ case node['frame']
36
+ when 'void'
37
+ 'none'
38
+ when 'hsides'
39
+ 'topbot'
40
+ when 'vsides'
41
+ 'sides'
42
+ when 'box', 'border'
43
+ 'all'
44
+ end
45
+ end
46
+
47
+ def rules(node)
48
+ case node['rules']
49
+ when 'all'
50
+ 'all'
51
+ when 'rows'
52
+ 'rows'
53
+ when 'cols'
54
+ 'cols'
55
+ when 'none'
56
+ 'none'
57
+ end
58
+ end
59
+ end
60
+
61
+ register :table, Table::INSTANCE
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Td < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ colspan = node['colspan']&.to_i
11
+ rowspan = node['rowspan']&.to_i
12
+ alignment = extract_alignment(node)
13
+
14
+ singlepara = node.elements.size == 1 && node.elements.first.name == 'p'
15
+ state[:tdsinglepara] = singlepara if singlepara
16
+
17
+ content = treat_children_coradoc(node, state)
18
+
19
+ Coradoc::CoreModel::TableCell.new(
20
+ content: extract_text_from_content(content),
21
+ alignment: alignment,
22
+ colspan: colspan && colspan > 1 ? colspan : nil,
23
+ rowspan: rowspan && rowspan > 1 ? rowspan : nil,
24
+ header: node.name == 'th'
25
+ )
26
+ end
27
+
28
+ def extract_alignment(node)
29
+ align = node['align']
30
+ case align
31
+ when 'left' then 'left'
32
+ when 'center' then 'center'
33
+ when 'right' then 'right'
34
+ end
35
+ end
36
+ end
37
+
38
+ register :td, Td::INSTANCE
39
+ register :th, Td::INSTANCE
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Text < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ return treat_empty(node, state) if node.text.strip.empty?
11
+
12
+ # HTML cleanup is performed in the converter layer
13
+ cleaned_content = cleanup_html_text(node.text)
14
+
15
+ # Return as CoreModel::InlineElement with format_type "text"
16
+ Coradoc::CoreModel::TextElement.new(
17
+ content: cleaned_content
18
+ )
19
+ end
20
+
21
+ private
22
+
23
+ def treat_empty(node, state)
24
+ parent = node.parent.name.to_sym
25
+ if %i[ol ul].include?(parent) # Otherwise the indentation is broken
26
+ nil
27
+ elsif state[:tdsinglepara]
28
+ nil
29
+ elsif node.text == ' ' # Regular whitespace text node
30
+ ' '
31
+ else
32
+ nil
33
+ end
34
+ end
35
+
36
+ # HTML-to-CoreModel text cleanup
37
+ def cleanup_html_text(text)
38
+ text = preserve_nbsp(text)
39
+ text = remove_border_newlines(text)
40
+ text = remove_inner_newlines(text)
41
+ escape_links(text)
42
+ end
43
+
44
+ def preserve_nbsp(text)
45
+ text.gsub("\u00A0", '&nbsp;')
46
+ end
47
+
48
+ def escape_links(text)
49
+ text.gsub(/<<([^ ][^>]*)>>/, '\\<<\\1>>')
50
+ end
51
+
52
+ def remove_border_newlines(text)
53
+ text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
54
+ end
55
+
56
+ def remove_inner_newlines(text)
57
+ # Convert newlines/tabs to spaces and squeeze multiple spaces
58
+ # Preserve single leading/trailing space for inline contexts
59
+ text.tr("\n\t", ' ').squeeze(' ')
60
+ end
61
+ end
62
+
63
+ register :text, Text::INSTANCE
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Tr < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ content = treat_children_coradoc(node, state)
11
+ header = table_header_row?(node)
12
+ # Use CoreModel::TableRow with cells (not columns)
13
+ Coradoc::CoreModel::TableRow.new(
14
+ cells: content,
15
+ header: header
16
+ )
17
+ end
18
+
19
+ def table_header_row?(node)
20
+ node.previous_element.nil?
21
+ end
22
+ end
23
+
24
+ register :tr, Tr::INSTANCE
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Video < MediaBase
7
+ INSTANCE = new
8
+
9
+ private
10
+
11
+ def semantic_type
12
+ :video
13
+ end
14
+
15
+ def build_attributes(node)
16
+ base_attributes(node).merge(
17
+ poster: node['poster'],
18
+ width: node['width'],
19
+ height: node['height']
20
+ ).compact
21
+ end
22
+ end
23
+
24
+ register :video, Video::INSTANCE
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ CONVERTERS = {
7
+ Base: 'coradoc/html/converters/base',
8
+ Markup: 'coradoc/html/converters/markup',
9
+ A: 'coradoc/html/converters/a',
10
+ Aside: 'coradoc/html/converters/aside',
11
+ Audio: 'coradoc/html/converters/audio',
12
+ Blockquote: 'coradoc/html/converters/blockquote',
13
+ Br: 'coradoc/html/converters/br',
14
+ Bypass: 'coradoc/html/converters/bypass',
15
+ Code: 'coradoc/html/converters/code',
16
+ Div: 'coradoc/html/converters/div',
17
+ Dl: 'coradoc/html/converters/dl',
18
+ Skip: 'coradoc/html/converters/drop',
19
+ Em: 'coradoc/html/converters/em',
20
+ Figure: 'coradoc/html/converters/figure',
21
+ H: 'coradoc/html/converters/h',
22
+ Head: 'coradoc/html/converters/head',
23
+ Hr: 'coradoc/html/converters/hr',
24
+ Img: 'coradoc/html/converters/img',
25
+ Li: 'coradoc/html/converters/li',
26
+ Mark: 'coradoc/html/converters/mark',
27
+ Math: 'coradoc/html/converters/math',
28
+ MediaBase: 'coradoc/html/converters/media_base',
29
+ Ol: 'coradoc/html/converters/ol',
30
+ P: 'coradoc/html/converters/p',
31
+ PassThrough: 'coradoc/html/converters/pass_through',
32
+ PositionalFormatting: 'coradoc/html/converters/positional_formatting',
33
+ Pre: 'coradoc/html/converters/pre',
34
+ Q: 'coradoc/html/converters/q',
35
+ Strong: 'coradoc/html/converters/strong',
36
+ Sup: 'coradoc/html/converters/sup',
37
+ Sub: 'coradoc/html/converters/sub',
38
+ Table: 'coradoc/html/converters/table',
39
+ Td: 'coradoc/html/converters/td',
40
+ Text: 'coradoc/html/converters/text',
41
+ Tr: 'coradoc/html/converters/tr',
42
+ Video: 'coradoc/html/converters/video'
43
+ }.freeze
44
+ private_constant :CONVERTERS
45
+
46
+ CONVERTERS.each do |name, path|
47
+ autoload name, path
48
+ end
49
+
50
+ @converters = {}
51
+ @converters_loaded = false
52
+
53
+ def self.register(tag_name, converter)
54
+ @converters[tag_name.to_sym] = converter
55
+ end
56
+
57
+ def self.unregister(tag_name)
58
+ @converters.delete(tag_name.to_sym)
59
+ end
60
+
61
+ def self.ensure_converters_loaded
62
+ return if @converters_loaded
63
+
64
+ @converters_loaded = true
65
+ CONVERTERS.each_key { |name| const_get(name) }
66
+ end
67
+
68
+ def self.lookup(tag_name)
69
+ ensure_converters_loaded
70
+ @converters[tag_name.to_sym] || default_converter(tag_name)
71
+ end
72
+
73
+ def self.process_coradoc(node, state)
74
+ node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
75
+ return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
76
+
77
+ plugins = state[:plugin_instances] || {}
78
+ process = proc { lookup(node.name).to_coradoc(node, state) }
79
+ plugins.each do |i|
80
+ prev_process = process
81
+ process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
82
+ end
83
+ process.call(node, state)
84
+ end
85
+
86
+ def self.default_converter(tag_name)
87
+ case Html.input_config.unknown_tags.to_sym
88
+ when :pass_through
89
+ PassThrough::INSTANCE
90
+ when :drop
91
+ Skip::INSTANCE
92
+ when :bypass
93
+ Bypass::INSTANCE
94
+ when :raise
95
+ raise Errors::UnknownTagError, "unknown tag: #{tag_name}"
96
+ else
97
+ raise Errors::InvalidConfigurationError,
98
+ "unknown value #{Html.input_config.unknown_tags.inspect} " \
99
+ 'for Coradoc::Html.input_config.unknown_tags'
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -33,6 +33,20 @@ module Coradoc
33
33
  drop&.new(model)&.template_type
34
34
  end
35
35
 
36
+ # Walk the Drop namespace and trigger each declared autoload so the
37
+ # drop class body evaluates and self-registers. Called eagerly from
38
+ # drop.rb after autoloads are declared.
39
+ EAGER_LOAD_ORDER = %i[Base DropFactory AnnotationDrop BlockDrop ListBlockDrop ListItemDrop
40
+ TableDrop TableRowDrop TableCellDrop ImageDrop InlineElementDrop RawInlineElementDrop
41
+ BibliographyEntryDrop BibliographyDrop TocEntryDrop TocDrop DefinitionItemDrop
42
+ DefinitionListDrop TermDrop FootnoteDrop TextContentDrop DocumentDrop].freeze
43
+ private_constant :EAGER_LOAD_ORDER
44
+
45
+ def self.eager_load!
46
+ EAGER_LOAD_ORDER.each { |sym| Drop.const_get(sym) }
47
+ true
48
+ end
49
+
36
50
  class << self
37
51
  private
38
52
 
@@ -48,25 +62,3 @@ module Coradoc
48
62
  end
49
63
  end
50
64
  end
51
-
52
- # Load all drops — each self-registers with DropFactory.
53
- # Registration order doesn't matter (sorted by ancestor depth).
54
- require_relative 'annotation_drop'
55
- require_relative 'block_drop'
56
- require_relative 'list_block_drop'
57
- require_relative 'list_item_drop'
58
- require_relative 'table_drop'
59
- require_relative 'table_row_drop'
60
- require_relative 'table_cell_drop'
61
- require_relative 'image_drop'
62
- require_relative 'inline_element_drop'
63
- require_relative 'bibliography_entry_drop'
64
- require_relative 'bibliography_drop'
65
- require_relative 'toc_entry_drop'
66
- require_relative 'toc_drop'
67
- require_relative 'definition_item_drop'
68
- require_relative 'definition_list_drop'
69
- require_relative 'term_drop'
70
- require_relative 'footnote_drop'
71
- require_relative 'text_content_drop'
72
- require_relative 'document_drop'
@@ -27,11 +27,9 @@ module Coradoc
27
27
  end
28
28
 
29
29
  def css_class
30
- case format_type
31
- when 'stem' then 'stem'
32
- when 'term' then 'term'
33
- when 'span' then @model.metadata('class')
34
- end
30
+ return @model.metadata('class') if format_type == 'span'
31
+
32
+ TagMapping.css_class_for(format_type)
35
33
  end
36
34
 
37
35
  def term_ref
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ # Drop for CoreModel::RawInlineElement.
7
+ #
8
+ # Passthrough content is raw output-format markup (typically HTML)
9
+ # that the source author explicitly marked as "emit verbatim." The
10
+ # generic InlineElementDrop escapes content; this subclass skips
11
+ # escaping so the rendered output mirrors the author's intent.
12
+ #
13
+ # The Liquid template is shared with InlineElementDrop — only the
14
+ # data preparation differs. InlineElementDrop is autoloaded by the
15
+ # Drop namespace shell (drop.rb) and is guaranteed to load before
16
+ # this class via DropFactory.eager_load! ordering.
17
+ class RawInlineElementDrop < InlineElementDrop
18
+ def text
19
+ extract_text(@model.content).to_s
20
+ end
21
+
22
+ def template_type
23
+ 'inline_element'
24
+ end
25
+ end
26
+
27
+ DropFactory.register(CoreModel::RawInlineElement, RawInlineElementDrop)
28
+ end
29
+ end
30
+ end
@@ -1,18 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Drop namespace — manages Liquid drop layer for template rendering.
3
+ # Drop namespace — Liquid drop layer for template rendering.
4
4
  #
5
- # Loading order matters: Base must load before DropFactory, and all
6
- # concrete drops must load after DropFactory (they self-register).
7
- # Each drop calls DropFactory.register at load time.
5
+ # Each drop class is autoloaded from its own file (one class per file,
6
+ # mirroring the mirror/ReverseBuilder pattern). Eager loading is delegated
7
+ # to DropFactory.eager_load!, which triggers each autoload in dependency
8
+ # order so drops self-register with DropFactory at load time.
8
9
  module Coradoc
9
10
  module Html
10
11
  module Drop
12
+ autoload :Base, "#{__dir__}/drop/base"
13
+ autoload :DropFactory, "#{__dir__}/drop/drop_factory"
14
+ autoload :AnnotationDrop, "#{__dir__}/drop/annotation_drop"
15
+ autoload :BlockDrop, "#{__dir__}/drop/block_drop"
16
+ autoload :ListBlockDrop, "#{__dir__}/drop/list_block_drop"
17
+ autoload :ListItemDrop, "#{__dir__}/drop/list_item_drop"
18
+ autoload :TableDrop, "#{__dir__}/drop/table_drop"
19
+ autoload :TableRowDrop, "#{__dir__}/drop/table_row_drop"
20
+ autoload :TableCellDrop, "#{__dir__}/drop/table_cell_drop"
21
+ autoload :ImageDrop, "#{__dir__}/drop/image_drop"
22
+ # InlineElementDrop must load before RawInlineElementDrop (subclass).
23
+ autoload :InlineElementDrop, "#{__dir__}/drop/inline_element_drop"
24
+ autoload :RawInlineElementDrop, "#{__dir__}/drop/raw_inline_element_drop"
25
+ autoload :BibliographyEntryDrop, "#{__dir__}/drop/bibliography_entry_drop"
26
+ autoload :BibliographyDrop, "#{__dir__}/drop/bibliography_drop"
27
+ autoload :TocEntryDrop, "#{__dir__}/drop/toc_entry_drop"
28
+ autoload :TocDrop, "#{__dir__}/drop/toc_drop"
29
+ autoload :DefinitionItemDrop, "#{__dir__}/drop/definition_item_drop"
30
+ autoload :DefinitionListDrop, "#{__dir__}/drop/definition_list_drop"
31
+ autoload :TermDrop, "#{__dir__}/drop/term_drop"
32
+ autoload :FootnoteDrop, "#{__dir__}/drop/footnote_drop"
33
+ autoload :TextContentDrop, "#{__dir__}/drop/text_content_drop"
34
+ autoload :DocumentDrop, "#{__dir__}/drop/document_drop"
11
35
  end
12
36
  end
13
37
  end
14
38
 
15
- # Base must load first (DropFactory depends on it)
16
- require_relative 'drop/base'
17
- # DropFactory loads next
18
- require_relative 'drop/drop_factory'
39
+ # Trigger eager load so every drop class body evaluates and self-registers.
40
+ Coradoc::Html::Drop::DropFactory.eager_load!
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Errors
6
+ class Error < Coradoc::Error; end
7
+ class UnknownTagError < Error; end
8
+ class InvalidConfigurationError < Error; end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ class HtmlConverter
6
+ def self.to_core_model(input, options = {})
7
+ Html.input_config.with(options) do
8
+ plugin_instances = prepare_plugin_instances(options)
9
+
10
+ root = track_time 'Loading input HTML document' do
11
+ case input
12
+ when String
13
+ Nokogiri::HTML(input).root
14
+ when Nokogiri::XML::Document
15
+ input.root
16
+ when Nokogiri::XML::Node
17
+ input
18
+ end
19
+ end
20
+
21
+ return nil unless root
22
+
23
+ plugin_instances.each do |plugin|
24
+ plugin.html_tree = root
25
+ track_time "Preprocessing document with #{plugin.name} plugin" do
26
+ plugin.preprocess_html_tree
27
+ end
28
+ root = plugin.html_tree
29
+ end
30
+
31
+ coremodel = track_time 'Converting input document tree to CoreModel' do
32
+ Converters.process_coradoc(
33
+ root,
34
+ plugin_instances: plugin_instances
35
+ )
36
+ end
37
+
38
+ coremodel = track_time 'Post-process CoreModel tree' do
39
+ Postprocessor.process(coremodel)
40
+ end
41
+
42
+ plugin_instances.each do |plugin|
43
+ plugin.coremodel_tree = coremodel
44
+ track_time "Postprocessing CoreModel tree with #{plugin.name} plugin" do
45
+ plugin.postprocess_coremodel_tree
46
+ end
47
+ coremodel = plugin.coremodel_tree
48
+ end
49
+
50
+ options[:plugin_instances] = plugin_instances unless options.frozen?
51
+
52
+ coremodel
53
+ end
54
+ end
55
+
56
+ def self.prepare_plugin_instances(options)
57
+ options[:plugin_instances] || Html.input_config.plugins.map(&:new)
58
+ end
59
+
60
+ @track_time_indentation = 0
61
+ def self.track_time(task)
62
+ if Html.input_config.track_time
63
+ warn (' ' * @track_time_indentation) + "* #{task} is starting..."
64
+ @track_time_indentation += 1
65
+ t0 = Time.now
66
+ ret = yield
67
+ time_elapsed = Time.now - t0
68
+ @track_time_indentation -= 1
69
+ warn (' ' * @track_time_indentation) +
70
+ "* #{task} took #{time_elapsed.round(3)} seconds"
71
+ ret
72
+ else
73
+ yield
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end