coradoc-html 1.1.7 → 1.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/html/config.rb +36 -12
  3. data/lib/coradoc/html/converter_base.rb +26 -68
  4. data/lib/coradoc/html/drop/annotation_drop.rb +31 -0
  5. data/lib/coradoc/html/drop/base.rb +72 -0
  6. data/lib/coradoc/html/drop/bibliography_drop.rb +15 -0
  7. data/lib/coradoc/html/drop/bibliography_entry_drop.rb +24 -0
  8. data/lib/coradoc/html/drop/block_drop.rb +69 -0
  9. data/lib/coradoc/html/drop/definition_item_drop.rb +36 -0
  10. data/lib/coradoc/html/drop/definition_list_drop.rb +15 -0
  11. data/lib/coradoc/html/drop/document_drop.rb +52 -0
  12. data/lib/coradoc/html/drop/drop_factory.rb +72 -0
  13. data/lib/coradoc/html/drop/footnote_drop.rb +24 -0
  14. data/lib/coradoc/html/drop/image_drop.rb +35 -0
  15. data/lib/coradoc/html/drop/inline_element_drop.rb +64 -0
  16. data/lib/coradoc/html/drop/list_block_drop.rb +23 -0
  17. data/lib/coradoc/html/drop/list_item_drop.rb +20 -0
  18. data/lib/coradoc/html/drop/table_cell_drop.rb +35 -0
  19. data/lib/coradoc/html/drop/table_drop.rb +15 -0
  20. data/lib/coradoc/html/drop/table_row_drop.rb +23 -0
  21. data/lib/coradoc/html/drop/term_drop.rb +24 -0
  22. data/lib/coradoc/html/drop/text_content_drop.rb +15 -0
  23. data/lib/coradoc/html/drop/toc_drop.rb +15 -0
  24. data/lib/coradoc/html/drop/toc_entry_drop.rb +32 -0
  25. data/lib/coradoc/html/drop.rb +18 -0
  26. data/lib/coradoc/html/escape.rb +29 -0
  27. data/lib/coradoc/html/input/cleaner.rb +4 -33
  28. data/lib/coradoc/html/input/config.rb +4 -3
  29. data/lib/coradoc/html/input/converters/a.rb +8 -19
  30. data/lib/coradoc/html/input/converters/aside.rb +4 -5
  31. data/lib/coradoc/html/input/converters/audio.rb +6 -35
  32. data/lib/coradoc/html/input/converters/base.rb +29 -27
  33. data/lib/coradoc/html/input/converters/blockquote.rb +4 -2
  34. data/lib/coradoc/html/input/converters/br.rb +4 -4
  35. data/lib/coradoc/html/input/converters/bypass.rb +68 -67
  36. data/lib/coradoc/html/input/converters/code.rb +7 -5
  37. data/lib/coradoc/html/input/converters/div.rb +4 -4
  38. data/lib/coradoc/html/input/converters/dl.rb +3 -25
  39. data/lib/coradoc/html/input/converters/drop.rb +13 -13
  40. data/lib/coradoc/html/input/converters/em.rb +5 -3
  41. data/lib/coradoc/html/input/converters/figure.rb +3 -26
  42. data/lib/coradoc/html/input/converters/h.rb +9 -11
  43. data/lib/coradoc/html/input/converters/head.rb +5 -4
  44. data/lib/coradoc/html/input/converters/hr.rb +4 -5
  45. data/lib/coradoc/html/input/converters/img.rb +4 -9
  46. data/lib/coradoc/html/input/converters/li.rb +3 -1
  47. data/lib/coradoc/html/input/converters/mark.rb +3 -1
  48. data/lib/coradoc/html/input/converters/markup.rb +4 -8
  49. data/lib/coradoc/html/input/converters/math.rb +7 -14
  50. data/lib/coradoc/html/input/converters/media_base.rb +50 -0
  51. data/lib/coradoc/html/input/converters/ol.rb +6 -8
  52. data/lib/coradoc/html/input/converters/p.rb +43 -34
  53. data/lib/coradoc/html/input/converters/pass_through.rb +2 -4
  54. data/lib/coradoc/html/input/converters/positional_formatting.rb +37 -0
  55. data/lib/coradoc/html/input/converters/pre.rb +3 -3
  56. data/lib/coradoc/html/input/converters/q.rb +6 -3
  57. data/lib/coradoc/html/input/converters/strong.rb +4 -2
  58. data/lib/coradoc/html/input/converters/sub.rb +5 -23
  59. data/lib/coradoc/html/input/converters/sup.rb +5 -23
  60. data/lib/coradoc/html/input/converters/table.rb +3 -1
  61. data/lib/coradoc/html/input/converters/td.rb +4 -30
  62. data/lib/coradoc/html/input/converters/text.rb +4 -3
  63. data/lib/coradoc/html/input/converters/tr.rb +3 -2
  64. data/lib/coradoc/html/input/converters/video.rb +12 -36
  65. data/lib/coradoc/html/input/converters.rb +55 -70
  66. data/lib/coradoc/html/input/html_converter.rb +2 -74
  67. data/lib/coradoc/html/input/plugin.rb +8 -57
  68. data/lib/coradoc/html/input/plugins/plateau.rb +4 -19
  69. data/lib/coradoc/html/input/postprocessor.rb +3 -9
  70. data/lib/coradoc/html/input.rb +26 -8
  71. data/lib/coradoc/html/layout_renderer.rb +163 -0
  72. data/lib/coradoc/html/output.rb +6 -12
  73. data/lib/coradoc/html/renderer.rb +86 -357
  74. data/lib/coradoc/html/section_numberable.rb +9 -0
  75. data/lib/coradoc/html/spa.rb +29 -270
  76. data/lib/coradoc/html/static.rb +29 -238
  77. data/lib/coradoc/html/template_caching.rb +31 -0
  78. data/lib/coradoc/html/template_config.rb +11 -70
  79. data/lib/coradoc/html/template_helpers.rb +39 -31
  80. data/lib/coradoc/html/template_locator.rb +17 -11
  81. data/lib/coradoc/html/theme.rb +1 -7
  82. data/lib/coradoc/html/title_text.rb +57 -0
  83. data/lib/coradoc/html/toc_builder.rb +112 -0
  84. data/lib/coradoc/html/toc_serializer.rb +31 -0
  85. data/lib/coradoc/html/transform/from_core_model.rb +13 -12
  86. data/lib/coradoc/html/transform/to_core_model.rb +10 -12
  87. data/lib/coradoc/html/version.rb +1 -1
  88. data/lib/coradoc/html.rb +41 -88
  89. metadata +38 -70
  90. data/lib/coradoc/html/base.rb +0 -157
  91. data/lib/coradoc/html/converters/admonition.rb +0 -180
  92. data/lib/coradoc/html/converters/attribute.rb +0 -68
  93. data/lib/coradoc/html/converters/attribute_reference.rb +0 -60
  94. data/lib/coradoc/html/converters/audio.rb +0 -165
  95. data/lib/coradoc/html/converters/base.rb +0 -615
  96. data/lib/coradoc/html/converters/bibliography.rb +0 -82
  97. data/lib/coradoc/html/converters/bibliography_entry.rb +0 -108
  98. data/lib/coradoc/html/converters/block_image.rb +0 -72
  99. data/lib/coradoc/html/converters/bold.rb +0 -34
  100. data/lib/coradoc/html/converters/break.rb +0 -32
  101. data/lib/coradoc/html/converters/comment_block.rb +0 -42
  102. data/lib/coradoc/html/converters/comment_line.rb +0 -54
  103. data/lib/coradoc/html/converters/cross_reference.rb +0 -59
  104. data/lib/coradoc/html/converters/document.rb +0 -108
  105. data/lib/coradoc/html/converters/example.rb +0 -114
  106. data/lib/coradoc/html/converters/highlight.rb +0 -34
  107. data/lib/coradoc/html/converters/include.rb +0 -68
  108. data/lib/coradoc/html/converters/inline_image.rb +0 -41
  109. data/lib/coradoc/html/converters/italic.rb +0 -34
  110. data/lib/coradoc/html/converters/line_break.rb +0 -31
  111. data/lib/coradoc/html/converters/link.rb +0 -46
  112. data/lib/coradoc/html/converters/list_item.rb +0 -75
  113. data/lib/coradoc/html/converters/listing.rb +0 -99
  114. data/lib/coradoc/html/converters/literal.rb +0 -102
  115. data/lib/coradoc/html/converters/monospace.rb +0 -34
  116. data/lib/coradoc/html/converters/open.rb +0 -78
  117. data/lib/coradoc/html/converters/ordered.rb +0 -53
  118. data/lib/coradoc/html/converters/paragraph.rb +0 -46
  119. data/lib/coradoc/html/converters/quote.rb +0 -113
  120. data/lib/coradoc/html/converters/reviewer_comment.rb +0 -74
  121. data/lib/coradoc/html/converters/reviewer_note.rb +0 -134
  122. data/lib/coradoc/html/converters/section.rb +0 -90
  123. data/lib/coradoc/html/converters/sidebar.rb +0 -113
  124. data/lib/coradoc/html/converters/source.rb +0 -137
  125. data/lib/coradoc/html/converters/source_code.rb +0 -16
  126. data/lib/coradoc/html/converters/span.rb +0 -61
  127. data/lib/coradoc/html/converters/strikethrough.rb +0 -34
  128. data/lib/coradoc/html/converters/subscript.rb +0 -34
  129. data/lib/coradoc/html/converters/superscript.rb +0 -34
  130. data/lib/coradoc/html/converters/table.rb +0 -85
  131. data/lib/coradoc/html/converters/table_cell.rb +0 -203
  132. data/lib/coradoc/html/converters/table_row.rb +0 -45
  133. data/lib/coradoc/html/converters/template_html_converter.rb +0 -105
  134. data/lib/coradoc/html/converters/term.rb +0 -58
  135. data/lib/coradoc/html/converters/text_element.rb +0 -44
  136. data/lib/coradoc/html/converters/underline.rb +0 -34
  137. data/lib/coradoc/html/converters/unordered.rb +0 -47
  138. data/lib/coradoc/html/converters/verse.rb +0 -105
  139. data/lib/coradoc/html/converters/video.rb +0 -179
  140. data/lib/coradoc/html/element_mapping.rb +0 -210
  141. data/lib/coradoc/html/entity.rb +0 -137
  142. data/lib/coradoc/html/input/converters/ignore.rb +0 -22
  143. data/lib/coradoc/html/input/converters/th.rb +0 -20
  144. data/lib/coradoc/html/theme/base.rb +0 -231
  145. data/lib/coradoc/html/theme/classic_renderer.rb +0 -390
  146. data/lib/coradoc/html/theme/modern/components/ui_components.rb +0 -344
  147. data/lib/coradoc/html/theme/modern/css_generator.rb +0 -311
  148. data/lib/coradoc/html/theme/modern/javascript_generator.rb +0 -314
  149. data/lib/coradoc/html/theme/modern/serializers/document_serializer.rb +0 -382
  150. data/lib/coradoc/html/theme/modern/tailwind_config_builder.rb +0 -164
  151. data/lib/coradoc/html/theme/modern/vue_template_generator.rb +0 -374
  152. data/lib/coradoc/html/theme/modern_renderer.rb +0 -250
  153. data/lib/coradoc/html/theme/registry.rb +0 -153
@@ -4,88 +4,73 @@ module Coradoc
4
4
  module Input
5
5
  module Html
6
6
  module Converters
7
- # Autoload converter classes - they will register themselves when first accessed
8
- autoload :Base, 'coradoc/html/input/converters/base'
9
- autoload :Markup, 'coradoc/html/input/converters/markup'
10
- autoload :A, 'coradoc/html/input/converters/a'
11
- autoload :Aside, 'coradoc/html/input/converters/aside'
12
- autoload :Audio, 'coradoc/html/input/converters/audio'
13
- autoload :Blockquote, 'coradoc/html/input/converters/blockquote'
14
- autoload :Br, 'coradoc/html/input/converters/br'
15
- autoload :Bypass, 'coradoc/html/input/converters/bypass'
16
- autoload :Code, 'coradoc/html/input/converters/code'
17
- autoload :Div, 'coradoc/html/input/converters/div'
18
- autoload :Dl, 'coradoc/html/input/converters/dl'
19
- autoload :Drop, 'coradoc/html/input/converters/drop'
20
- autoload :Em, 'coradoc/html/input/converters/em'
21
- autoload :Figure, 'coradoc/html/input/converters/figure'
22
- autoload :H, 'coradoc/html/input/converters/h'
23
- autoload :Head, 'coradoc/html/input/converters/head'
24
- autoload :Hr, 'coradoc/html/input/converters/hr'
25
- autoload :Ignore, 'coradoc/html/input/converters/ignore'
26
- autoload :Img, 'coradoc/html/input/converters/img'
27
- autoload :Li, 'coradoc/html/input/converters/li'
28
- autoload :Mark, 'coradoc/html/input/converters/mark'
29
- autoload :Ol, 'coradoc/html/input/converters/ol'
30
- autoload :P, 'coradoc/html/input/converters/p'
31
- autoload :PassThrough, 'coradoc/html/input/converters/pass_through'
32
- autoload :Pre, 'coradoc/html/input/converters/pre'
33
- autoload :Q, 'coradoc/html/input/converters/q'
34
- autoload :Strong, 'coradoc/html/input/converters/strong'
35
- autoload :Sup, 'coradoc/html/input/converters/sup'
36
- autoload :Sub, 'coradoc/html/input/converters/sub'
37
- autoload :Table, 'coradoc/html/input/converters/table'
38
- autoload :Td, 'coradoc/html/input/converters/td'
39
- autoload :Text, 'coradoc/html/input/converters/text'
40
- autoload :Th, 'coradoc/html/input/converters/th'
41
- autoload :Tr, 'coradoc/html/input/converters/tr'
42
- autoload :Video, 'coradoc/html/input/converters/video'
43
- autoload :Math, 'coradoc/html/input/converters/math'
7
+ # Autoload converter classes they self-register when loaded.
8
+ # Adding a new converter requires only adding one entry here.
9
+ CONVERTERS = {
10
+ Base: 'coradoc/html/input/converters/base',
11
+ Markup: 'coradoc/html/input/converters/markup',
12
+ A: 'coradoc/html/input/converters/a',
13
+ Aside: 'coradoc/html/input/converters/aside',
14
+ Audio: 'coradoc/html/input/converters/audio',
15
+ Blockquote: 'coradoc/html/input/converters/blockquote',
16
+ Br: 'coradoc/html/input/converters/br',
17
+ Bypass: 'coradoc/html/input/converters/bypass',
18
+ Code: 'coradoc/html/input/converters/code',
19
+ Div: 'coradoc/html/input/converters/div',
20
+ Dl: 'coradoc/html/input/converters/dl',
21
+ Skip: 'coradoc/html/input/converters/drop',
22
+ Em: 'coradoc/html/input/converters/em',
23
+ Figure: 'coradoc/html/input/converters/figure',
24
+ H: 'coradoc/html/input/converters/h',
25
+ Head: 'coradoc/html/input/converters/head',
26
+ Hr: 'coradoc/html/input/converters/hr',
27
+ Img: 'coradoc/html/input/converters/img',
28
+ Li: 'coradoc/html/input/converters/li',
29
+ Mark: 'coradoc/html/input/converters/mark',
30
+ Math: 'coradoc/html/input/converters/math',
31
+ MediaBase: 'coradoc/html/input/converters/media_base',
32
+ Ol: 'coradoc/html/input/converters/ol',
33
+ P: 'coradoc/html/input/converters/p',
34
+ PassThrough: 'coradoc/html/input/converters/pass_through',
35
+ PositionalFormatting: 'coradoc/html/input/converters/positional_formatting',
36
+ Pre: 'coradoc/html/input/converters/pre',
37
+ Q: 'coradoc/html/input/converters/q',
38
+ Strong: 'coradoc/html/input/converters/strong',
39
+ Sup: 'coradoc/html/input/converters/sup',
40
+ Sub: 'coradoc/html/input/converters/sub',
41
+ Table: 'coradoc/html/input/converters/table',
42
+ Td: 'coradoc/html/input/converters/td',
43
+ Text: 'coradoc/html/input/converters/text',
44
+ Tr: 'coradoc/html/input/converters/tr',
45
+ Video: 'coradoc/html/input/converters/video'
46
+ }.freeze
47
+ private_constant :CONVERTERS
48
+
49
+ CONVERTERS.each do |name, path|
50
+ autoload name, path
51
+ end
52
+
53
+ @converters = {}
54
+ @converters_loaded = false
44
55
 
45
- # Define class methods
46
56
  def self.register(tag_name, converter)
47
- @@converters ||= {}
48
- @@converters[tag_name.to_sym] = converter
57
+ @converters[tag_name.to_sym] = converter
49
58
  end
50
59
 
51
60
  def self.unregister(tag_name)
52
- @@converters.delete(tag_name.to_sym)
61
+ @converters.delete(tag_name.to_sym)
53
62
  end
54
63
 
55
- # Ensure all converters are loaded and registered before first use
56
64
  def self.ensure_converters_loaded
57
65
  return if @converters_loaded
58
66
 
59
67
  @converters_loaded = true
60
-
61
- # Access each autoloaded constant to trigger file load + registration
62
- # Only load converters that register HTML tag handlers
63
- # Note: Some converters may have gem dependencies (e.g., Img requires marcel)
64
- # so we only load the essential ones here
65
- [
66
- Base, Markup, A, Aside, Blockquote, Br, Bypass, Code, Div, Dl,
67
- Drop, Em, Figure, H, Head, Hr, Ignore, Li, Mark, Ol, P,
68
- PassThrough, Pre, Q, Strong, Sup, Sub, Table, Td, Text, Th, Tr
69
- ].each do |converter|
70
- # Just accessing the constant triggers autoload
71
- end
68
+ CONVERTERS.each_key { |name| const_get(name) }
72
69
  end
73
70
 
74
71
  def self.lookup(tag_name)
75
72
  ensure_converters_loaded
76
- converter = @@converters[tag_name.to_sym] || default_converter(tag_name)
77
- converter.is_a?(Class) ? converter.new : converter
78
- end
79
-
80
- # NOTE: process won't run plugin hooks
81
- def self.process(node, state)
82
- node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
83
- if node.is_a? Array
84
- return node.map { |i| process(i, state) }
85
- .join
86
- end
87
-
88
- lookup(node.name).convert(node, state)
73
+ @converters[tag_name.to_sym] || default_converter(tag_name)
89
74
  end
90
75
 
91
76
  def self.process_coradoc(node, state)
@@ -104,11 +89,11 @@ module Coradoc
104
89
  def self.default_converter(tag_name)
105
90
  case Html.config.unknown_tags.to_sym
106
91
  when :pass_through
107
- PassThrough.new
92
+ PassThrough::INSTANCE
108
93
  when :drop
109
- Drop.new
94
+ Skip::INSTANCE
110
95
  when :bypass
111
- Bypass.new
96
+ Bypass::INSTANCE
112
97
  when :raise
113
98
  raise Errors::UnknownTagError, "unknown tag: #{tag_name}"
114
99
  else
@@ -41,10 +41,8 @@ module Coradoc
41
41
 
42
42
  plugin_instances.each do |plugin|
43
43
  plugin.html_tree = root
44
- if plugin.public_methods.include?(:preprocess_html_tree)
45
- track_time "Preprocessing document with #{plugin.name} plugin" do
46
- plugin.preprocess_html_tree
47
- end
44
+ track_time "Preprocessing document with #{plugin.name} plugin" do
45
+ plugin.preprocess_html_tree
48
46
  end
49
47
  root = plugin.html_tree
50
48
  end
@@ -61,8 +59,6 @@ module Coradoc
61
59
  end
62
60
 
63
61
  plugin_instances.each do |plugin|
64
- next unless plugin.public_methods.include?(:postprocess_coremodel_tree)
65
-
66
62
  plugin.coremodel_tree = coremodel
67
63
  track_time "Postprocessing CoreModel tree with #{plugin.name} plugin" do
68
64
  plugin.postprocess_coremodel_tree
@@ -76,74 +72,6 @@ module Coradoc
76
72
  end
77
73
  end
78
74
 
79
- # Legacy method - returns CoreModel
80
- # @deprecated Use {#to_core_model} instead
81
- def self.to_coradoc(input, options = {})
82
- to_core_model(input, options)
83
- end
84
-
85
- # Legacy method for backward compatibility
86
- # Converts HTML to CoreModel, then serializes to target format
87
- #
88
- # @deprecated Use {#to_core_model} + Coradoc.serialize instead
89
- # @param input [String] HTML input
90
- # @param options [Hash] Conversion options
91
- # @param options [Symbol] :output_format Target format (default: :asciidoc)
92
- # @return [String] Serialized document in target format
93
- def self.convert(input, options = {})
94
- output_format = options.delete(:output_format) || :asciidoc
95
-
96
- coremodel = to_core_model(input, options)
97
-
98
- if coremodel.is_a?(Hash)
99
- coremodel.to_h do |file, tree|
100
- track_time "Serializing file #{file || 'main'}" do
101
- [file, serialize_core_model(tree, output_format, options)]
102
- end
103
- end
104
- else
105
- serialize_core_model(coremodel, output_format, options)
106
- end
107
- end
108
-
109
- # Serialize CoreModel to target format using the appropriate gem
110
- #
111
- # @param coremodel [Coradoc::CoreModel::Base] CoreModel document
112
- # @param format [Symbol] Target format
113
- # @param options [Hash] Serialization options
114
- # @return [String] Serialized document
115
- def self.serialize_core_model(coremodel, format, options = {})
116
- result = Coradoc.serialize(coremodel, to: format)
117
- cleanup_result(result, options)
118
- end
119
-
120
- # Clean up the serialized result
121
- #
122
- # @param result [String] Serialized result
123
- # @param options [Hash] Cleanup options
124
- # @return [String] Cleaned result
125
- def self.cleanup_result(result, options = {})
126
- Input::Html.config.with(options) do
127
- plugin_instances = prepare_plugin_instances(options)
128
-
129
- result = track_time 'Cleaning up the result' do
130
- Input::Html.cleaner.tidy(result)
131
- end
132
-
133
- plugin_instances.each do |plugin|
134
- next unless plugin.public_methods.include?(:postprocess_output_string)
135
-
136
- plugin.output_string = result
137
- track_time "Postprocessing output string with #{plugin.name} plugin" do
138
- plugin.postprocess_output_string
139
- end
140
- result = plugin.output_string
141
- end
142
-
143
- result
144
- end
145
- end
146
-
147
75
  def self.prepare_plugin_instances(options)
148
76
  options[:plugin_instances] || Html.config.plugins.map(&:new)
149
77
  end
@@ -11,9 +11,9 @@ module Coradoc
11
11
  # def name = "Test"
12
12
  # end
13
13
 
14
- def self.new(&block)
14
+ def self.new(&)
15
15
  if self == Plugin
16
- Class.new(Plugin, &block)
16
+ Class.new(Plugin, &)
17
17
  else
18
18
  super
19
19
  end
@@ -24,38 +24,19 @@ module Coradoc
24
24
  @html_tree_hooks_post = {}
25
25
  end
26
26
 
27
- # define name to name a Plugin
28
27
  def name
29
28
  self.class.name
30
29
  end
31
30
 
31
+ # Default no-op hooks. Plugins override these as needed.
32
+ def preprocess_html_tree; end
33
+ def postprocess_coremodel_tree; end
34
+ def postprocess_output_string; end
35
+
32
36
  #### HTML Tree functionalities
33
37
 
34
38
  attr_accessor :html_tree, :coremodel_tree, :output_string
35
39
 
36
- # Legacy accessors for backward compatibility
37
- # @deprecated Use coremodel_tree instead. Will be removed in v2.0.
38
- def coradoc_tree
39
- warn '[DEPRECATION] `coradoc_tree` is deprecated. Use `coremodel_tree` instead.'
40
- coremodel_tree
41
- end
42
-
43
- def coradoc_tree=(value)
44
- warn '[DEPRECATION] `coradoc_tree=` is deprecated. Use `coremodel_tree=` instead.'
45
- self.coremodel_tree = value
46
- end
47
-
48
- # @deprecated Use output_string instead. Will be removed in v2.0.
49
- def asciidoc_string
50
- warn '[DEPRECATION] `asciidoc_string` is deprecated. Use `output_string` instead.'
51
- output_string
52
- end
53
-
54
- def asciidoc_string=(value)
55
- warn '[DEPRECATION] `asciidoc_string=` is deprecated. Use `output_string=` instead.'
56
- self.output_string = value
57
- end
58
-
59
40
  def html_tree_change_tag_name_by_css(css, new_name)
60
41
  html_tree.css(css).each do |e|
61
42
  e.name = new_name
@@ -84,19 +65,6 @@ module Coradoc
84
65
  Coradoc::Html::Input::Converters.process_coradoc(tree, state)
85
66
  end
86
67
 
87
- # @deprecated Use html_tree_process_to_coremodel instead. Will be removed in v2.0.
88
- def html_tree_process_to_coradoc(tree, state = {})
89
- warn '[DEPRECATION] `html_tree_process_to_coradoc` is deprecated. Use `html_tree_process_to_coremodel` instead.'
90
- html_tree_process_to_coremodel(tree, state)
91
- end
92
-
93
- def html_tree_preview
94
- Tempfile.open(%w[coradoc .html]) do |i|
95
- i << html_tree.to_html
96
- system 'chromium-browser', '--no-sandbox', i.path
97
- end
98
- end
99
-
100
68
  # define preprocess_html_tree to process HTML trees
101
69
 
102
70
  # Creates a hook to be called instead of converting an element
@@ -131,7 +99,7 @@ module Coradoc
131
99
  end
132
100
  end
133
101
 
134
- def html_tree_run_hooks(node, state, &_block)
102
+ def html_tree_run_hooks(node, state, &)
135
103
  hook_pre = @html_tree_hooks_pre[node]
136
104
  hook_post = @html_tree_hooks_post[node]
137
105
 
@@ -145,24 +113,7 @@ module Coradoc
145
113
 
146
114
  #### CoreModel tree functionalities
147
115
 
148
- # define postprocess_coremodel_tree to change CoreModel tree
149
-
150
- # @deprecated Use postprocess_coremodel_tree instead. Will be removed in v2.0.
151
- def postprocess_coradoc_tree
152
- warn '[DEPRECATION] `postprocess_coradoc_tree` is deprecated. Use `postprocess_coremodel_tree` instead.'
153
- postprocess_coremodel_tree if public_methods.include?(:postprocess_coremodel_tree)
154
- end
155
-
156
116
  #### Output string functionalities
157
-
158
- # define postprocess_output_string to change the output string
159
- # (regardless of target format)
160
-
161
- # @deprecated Use postprocess_output_string instead. Will be removed in v2.0.
162
- def postprocess_asciidoc_string
163
- warn '[DEPRECATION] `postprocess_asciidoc_string` is deprecated. Use `postprocess_output_string` instead.'
164
- postprocess_output_string if public_methods.include?(:postprocess_output_string)
165
- end
166
117
  end
167
118
  end
168
119
  end
@@ -16,15 +16,6 @@ module Coradoc
16
16
  end
17
17
 
18
18
  def preprocess_html_tree
19
- # Let's simplify the tree by removing what's extraneous
20
- # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
21
- # html_tree_replace_with_children_by_css("div.container_box")
22
- # html_tree_replace_with_children_by_css("div.col.col-12")
23
- # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
24
- # html_tree_replace_with_children_by_css("div.row")
25
- #
26
- # We can remove that, but it messes up the images and paragraphs.
27
-
28
19
  # Remove side menu, so we can generate TOC ourselves
29
20
  html_tree_remove_by_css('.sideMenu')
30
21
 
@@ -87,7 +78,7 @@ module Coradoc
87
78
 
88
79
  # Handle non-semantic lists and indentation
89
80
  html_tree_add_hook_pre_by_css '.text2data' do |node,|
90
- text = html_tree_process_to_adoc(node).strip
81
+ text = html_tree_process_to_coremodel(node).strip
91
82
  next '' if text.empty? || text == "\u3000"
92
83
 
93
84
  if text.start_with?(/\d+\./)
@@ -101,7 +92,7 @@ module Coradoc
101
92
 
102
93
  (3..4).each do |i|
103
94
  html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
104
- text = html_tree_process_to_adoc(node).strip
95
+ text = html_tree_process_to_coremodel(node).strip
105
96
  next '' if text.empty? || text == "\u3000"
106
97
 
107
98
  text = text.strip.gsub(/^/, "#{'*' * i} ")
@@ -111,7 +102,7 @@ module Coradoc
111
102
 
112
103
  (2..3).each do |i|
113
104
  html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
114
- text = html_tree_process_to_adoc(node.children.first.children).strip
105
+ text = html_tree_process_to_coremodel(node.children.first.children).strip
115
106
 
116
107
  "#{'*' * i} #{text}\n"
117
108
  end
@@ -119,13 +110,11 @@ module Coradoc
119
110
 
120
111
  (1..20).each do |i|
121
112
  html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
122
- text = html_tree_process_to_adoc(node).strip
113
+ text = html_tree_process_to_coremodel(node).strip
123
114
 
124
115
  "[start=#{i}]\n. #{text}\n"
125
116
  end
126
117
  end
127
-
128
- # html_tree_preview
129
118
  end
130
119
 
131
120
  IM = /[A-Z0-9]{1,3}/
@@ -216,10 +205,6 @@ module Coradoc
216
205
 
217
206
  self.output_string = str
218
207
  end
219
-
220
- # Legacy alias for backward compatibility
221
- # @deprecated Use postprocess_output_string instead. Will be removed in v2.0.
222
- alias postprocess_asciidoc_string postprocess_output_string
223
208
  end
224
209
  end
225
210
  end
@@ -3,12 +3,10 @@
3
3
  module Coradoc
4
4
  module Input
5
5
  module Html
6
- # Postprocessor's aim is to convert a Coradoc tree from
7
- # a mess that has been created from HTML into a tree that
8
- # is compatible with what we would get out of Coradoc, if
9
- # it parsed it directly.
6
+ # Postprocessor hook for CoreModel tree transformations after HTML parsing.
10
7
  #
11
- # Now operates on CoreModel types exclusively.
8
+ # Override or extend to apply post-parse cleanup. The default
9
+ # implementation returns the tree unchanged.
12
10
  class Postprocessor
13
11
  def self.process(coradoc)
14
12
  new(coradoc).process
@@ -18,11 +16,7 @@ module Coradoc
18
16
  @tree = coradoc
19
17
  end
20
18
 
21
- # Main processing entry point
22
19
  def process
23
- # For now, just return the tree as-is since CoreModel
24
- # structure is already clean and well-formed.
25
- # Future: implement CoreModel-based postprocessing
26
20
  @tree
27
21
  end
28
22
  end
@@ -17,11 +17,11 @@ module Coradoc
17
17
  autoload :HtmlConverter, 'coradoc/html/input/html_converter'
18
18
 
19
19
  def self.convert(input, options = {})
20
- HtmlConverter.convert(input, options)
20
+ HtmlConverter.to_core_model(input, options)
21
21
  end
22
22
 
23
23
  def self.to_coradoc(input, options = {})
24
- HtmlConverter.to_coradoc(input, options)
24
+ HtmlConverter.to_core_model(input, options)
25
25
  end
26
26
 
27
27
  def self.config
@@ -38,8 +38,10 @@ module Coradoc
38
38
  :html
39
39
  end
40
40
 
41
+ extend Coradoc::Html::FormatDetection
42
+
41
43
  def self.processor_match?(filename)
42
- %w[.html .htm].any? { |i| filename.downcase.end_with?(i) }
44
+ html_extension?(filename)
43
45
  end
44
46
 
45
47
  def self.processor_execute(input, options = {})
@@ -48,20 +50,36 @@ module Coradoc
48
50
 
49
51
  def self.processor_postprocess(data, options)
50
52
  if options[:output_processor] == :adoc
51
- data.transform_values do |v|
52
- Input::Html::HtmlConverter.cleanup_result(v, options)
53
- end
53
+ data.transform_values { |v| clean_output(v, options) }
54
54
  else
55
55
  data
56
56
  end
57
57
  end
58
58
 
59
+ def self.clean_output(result, options = {})
60
+ config.with(options) do
61
+ plugin_instances = HtmlConverter.prepare_plugin_instances(options)
62
+
63
+ result = HtmlConverter.track_time('Cleaning up the result') do
64
+ cleaner.tidy(result)
65
+ end
66
+
67
+ plugin_instances.each do |plugin|
68
+ plugin.output_string = result
69
+ HtmlConverter.track_time("Postprocessing output string with #{plugin.name} plugin") do
70
+ plugin.postprocess_output_string
71
+ end
72
+ result = plugin.output_string
73
+ end
74
+
75
+ result
76
+ end
77
+ end
78
+
59
79
  Coradoc::Input.define(self)
60
80
  end
61
81
  end
62
82
 
63
- # Backward compatibility alias
64
- # Some legacy code references Coradoc::Html::Input instead of Coradoc::Input::Html
65
83
  module Html
66
84
  Input = Coradoc::Input::Html
67
85
  end