coradoc-html 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/html/cleaner.rb +128 -0
  3. data/lib/coradoc/html/converters/a.rb +77 -0
  4. data/lib/coradoc/html/converters/aside.rb +20 -0
  5. data/lib/coradoc/html/converters/audio.rb +19 -0
  6. data/lib/coradoc/html/converters/base.rb +98 -0
  7. data/lib/coradoc/html/converters/blockquote.rb +25 -0
  8. data/lib/coradoc/html/converters/br.rb +17 -0
  9. data/lib/coradoc/html/converters/bypass.rb +82 -0
  10. data/lib/coradoc/html/converters/code.rb +25 -0
  11. data/lib/coradoc/html/converters/div.rb +23 -0
  12. data/lib/coradoc/html/converters/dl.rb +82 -0
  13. data/lib/coradoc/html/converters/drop.rb +26 -0
  14. data/lib/coradoc/html/converters/em.rb +23 -0
  15. data/lib/coradoc/html/converters/figure.rb +33 -0
  16. data/lib/coradoc/html/converters/h.rb +58 -0
  17. data/lib/coradoc/html/converters/head.rb +29 -0
  18. data/lib/coradoc/html/converters/hr.rb +17 -0
  19. data/lib/coradoc/html/converters/img.rb +103 -0
  20. data/lib/coradoc/html/converters/li.rb +35 -0
  21. data/lib/coradoc/html/converters/mark.rb +21 -0
  22. data/lib/coradoc/html/converters/markup.rb +93 -0
  23. data/lib/coradoc/html/converters/math.rb +37 -0
  24. data/lib/coradoc/html/converters/media_base.rb +48 -0
  25. data/lib/coradoc/html/converters/ol.rb +42 -0
  26. data/lib/coradoc/html/converters/p.rb +64 -0
  27. data/lib/coradoc/html/converters/pass_through.rb +15 -0
  28. data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
  29. data/lib/coradoc/html/converters/pre.rb +57 -0
  30. data/lib/coradoc/html/converters/q.rb +25 -0
  31. data/lib/coradoc/html/converters/strong.rb +22 -0
  32. data/lib/coradoc/html/converters/sub.rb +20 -0
  33. data/lib/coradoc/html/converters/sup.rb +20 -0
  34. data/lib/coradoc/html/converters/table.rb +64 -0
  35. data/lib/coradoc/html/converters/td.rb +42 -0
  36. data/lib/coradoc/html/converters/text.rb +66 -0
  37. data/lib/coradoc/html/converters/tr.rb +27 -0
  38. data/lib/coradoc/html/converters/video.rb +27 -0
  39. data/lib/coradoc/html/converters.rb +104 -0
  40. data/lib/coradoc/html/drop/drop_factory.rb +14 -22
  41. data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
  42. data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
  43. data/lib/coradoc/html/drop.rb +30 -8
  44. data/lib/coradoc/html/errors.rb +11 -0
  45. data/lib/coradoc/html/html_converter.rb +78 -0
  46. data/lib/coradoc/html/input_config.rb +66 -0
  47. data/lib/coradoc/html/plugin.rb +90 -0
  48. data/lib/coradoc/html/plugins/plateau.rb +212 -0
  49. data/lib/coradoc/html/postprocessor.rb +19 -0
  50. data/lib/coradoc/html/spa.rb +0 -2
  51. data/lib/coradoc/html/static.rb +0 -2
  52. data/lib/coradoc/html/tag_mapping.rb +3 -1
  53. data/lib/coradoc/html/transform/from_core_model.rb +2 -2
  54. data/lib/coradoc/html/transform/to_core_model.rb +3 -3
  55. data/lib/coradoc/html/version.rb +1 -1
  56. data/lib/coradoc/html.rb +30 -5
  57. metadata +46 -47
  58. data/lib/coradoc/html/input/cleaner.rb +0 -134
  59. data/lib/coradoc/html/input/config.rb +0 -80
  60. data/lib/coradoc/html/input/converters/a.rb +0 -79
  61. data/lib/coradoc/html/input/converters/aside.rb +0 -22
  62. data/lib/coradoc/html/input/converters/audio.rb +0 -21
  63. data/lib/coradoc/html/input/converters/base.rb +0 -118
  64. data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
  65. data/lib/coradoc/html/input/converters/br.rb +0 -19
  66. data/lib/coradoc/html/input/converters/bypass.rb +0 -84
  67. data/lib/coradoc/html/input/converters/code.rb +0 -27
  68. data/lib/coradoc/html/input/converters/div.rb +0 -25
  69. data/lib/coradoc/html/input/converters/dl.rb +0 -84
  70. data/lib/coradoc/html/input/converters/drop.rb +0 -28
  71. data/lib/coradoc/html/input/converters/em.rb +0 -25
  72. data/lib/coradoc/html/input/converters/figure.rb +0 -35
  73. data/lib/coradoc/html/input/converters/h.rb +0 -74
  74. data/lib/coradoc/html/input/converters/head.rb +0 -31
  75. data/lib/coradoc/html/input/converters/hr.rb +0 -19
  76. data/lib/coradoc/html/input/converters/img.rb +0 -105
  77. data/lib/coradoc/html/input/converters/li.rb +0 -37
  78. data/lib/coradoc/html/input/converters/mark.rb +0 -23
  79. data/lib/coradoc/html/input/converters/markup.rb +0 -103
  80. data/lib/coradoc/html/input/converters/math.rb +0 -39
  81. data/lib/coradoc/html/input/converters/media_base.rb +0 -50
  82. data/lib/coradoc/html/input/converters/ol.rb +0 -44
  83. data/lib/coradoc/html/input/converters/p.rb +0 -90
  84. data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
  85. data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
  86. data/lib/coradoc/html/input/converters/pre.rb +0 -59
  87. data/lib/coradoc/html/input/converters/q.rb +0 -27
  88. data/lib/coradoc/html/input/converters/strong.rb +0 -24
  89. data/lib/coradoc/html/input/converters/sub.rb +0 -22
  90. data/lib/coradoc/html/input/converters/sup.rb +0 -22
  91. data/lib/coradoc/html/input/converters/table.rb +0 -66
  92. data/lib/coradoc/html/input/converters/td.rb +0 -44
  93. data/lib/coradoc/html/input/converters/text.rb +0 -68
  94. data/lib/coradoc/html/input/converters/tr.rb +0 -29
  95. data/lib/coradoc/html/input/converters/video.rb +0 -29
  96. data/lib/coradoc/html/input/converters.rb +0 -107
  97. data/lib/coradoc/html/input/errors.rb +0 -22
  98. data/lib/coradoc/html/input/html_converter.rb +0 -98
  99. data/lib/coradoc/html/input/plugin.rb +0 -120
  100. data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
  101. data/lib/coradoc/html/input/postprocessor.rb +0 -25
  102. data/lib/coradoc/html/input.rb +0 -86
  103. data/lib/coradoc/html/output.rb +0 -89
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Coradoc
4
- module Input
5
- module Html
6
- module Errors
7
- # Base error class for HTML input errors
8
- # Inherits from Coradoc::Error for unified error handling
9
- class Error < Coradoc::Error
10
- end
11
-
12
- # Raised when an unknown HTML tag is encountered
13
- class UnknownTagError < Error
14
- end
15
-
16
- # Raised when HTML input configuration is invalid
17
- class InvalidConfigurationError < Error
18
- end
19
- end
20
- end
21
- end
22
- end
@@ -1,98 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Coradoc
4
- module Input
5
- module Html
6
- # HTML to CoreModel converter
7
- #
8
- # This class handles the conversion of HTML documents to CoreModel.
9
- # It does NOT handle serialization to any specific output format.
10
- # For serialization, use Coradoc.serialize(coremodel, to: :format)
11
- #
12
- # @example Basic usage - get CoreModel
13
- # coremodel = HtmlConverter.to_core_model(html_string)
14
- #
15
- # @example Serialize to AsciiDoc
16
- # coremodel = HtmlConverter.to_core_model(html_string)
17
- # adoc_text = Coradoc.serialize(coremodel, to: :asciidoc)
18
- #
19
- class HtmlConverter
20
- # Convert HTML to CoreModel
21
- #
22
- # @param input [String, Nokogiri::XML::Document, Nokogiri::XML::Node] HTML input
23
- # @param options [Hash] Conversion options
24
- # @return [Coradoc::CoreModel::Base] CoreModel document
25
- def self.to_core_model(input, options = {})
26
- Input::Html.config.with(options) do
27
- plugin_instances = prepare_plugin_instances(options)
28
-
29
- root = track_time 'Loading input HTML document' do
30
- case input
31
- when String
32
- Nokogiri::HTML(input).root
33
- when Nokogiri::XML::Document
34
- input.root
35
- when Nokogiri::XML::Node
36
- input
37
- end
38
- end
39
-
40
- return nil unless root
41
-
42
- plugin_instances.each do |plugin|
43
- plugin.html_tree = root
44
- track_time "Preprocessing document with #{plugin.name} plugin" do
45
- plugin.preprocess_html_tree
46
- end
47
- root = plugin.html_tree
48
- end
49
-
50
- coremodel = track_time 'Converting input document tree to CoreModel' do
51
- Converters.process_coradoc(
52
- root,
53
- plugin_instances: plugin_instances
54
- )
55
- end
56
-
57
- coremodel = track_time 'Post-process CoreModel tree' do
58
- Postprocessor.process(coremodel)
59
- end
60
-
61
- plugin_instances.each do |plugin|
62
- plugin.coremodel_tree = coremodel
63
- track_time "Postprocessing CoreModel tree with #{plugin.name} plugin" do
64
- plugin.postprocess_coremodel_tree
65
- end
66
- coremodel = plugin.coremodel_tree
67
- end
68
-
69
- options[:plugin_instances] = plugin_instances unless options.frozen?
70
-
71
- coremodel
72
- end
73
- end
74
-
75
- def self.prepare_plugin_instances(options)
76
- options[:plugin_instances] || Html.config.plugins.map(&:new)
77
- end
78
-
79
- @track_time_indentation = 0
80
- def self.track_time(task)
81
- if Input::Html.config.track_time
82
- warn (' ' * @track_time_indentation) + "* #{task} is starting..."
83
- @track_time_indentation += 1
84
- t0 = Time.now
85
- ret = yield
86
- time_elapsed = Time.now - t0
87
- @track_time_indentation -= 1
88
- warn (' ' * @track_time_indentation) +
89
- "* #{task} took #{time_elapsed.round(3)} seconds"
90
- ret
91
- else
92
- yield
93
- end
94
- end
95
- end
96
- end
97
- end
98
- end
@@ -1,120 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Coradoc
4
- module Input
5
- module Html
6
- class Plugin
7
- #### Plugin system general
8
-
9
- # Allow building plugins with a shorthand syntax:
10
- # plugin = Coradoc::Html::Input::Plugin.new do
11
- # def name = "Test"
12
- # end
13
-
14
- def self.new(&)
15
- if self == Plugin
16
- Class.new(Plugin, &)
17
- else
18
- super
19
- end
20
- end
21
-
22
- def initialize
23
- @html_tree_hooks_pre = {}
24
- @html_tree_hooks_post = {}
25
- end
26
-
27
- def name
28
- self.class.name
29
- end
30
-
31
- # Default no-op hooks. Plugins override these as needed.
32
- def preprocess_html_tree; end
33
- def postprocess_coremodel_tree; end
34
- def postprocess_output_string; end
35
-
36
- #### HTML Tree functionalities
37
-
38
- attr_accessor :html_tree, :coremodel_tree, :output_string
39
-
40
- def html_tree_change_tag_name_by_css(css, new_name)
41
- html_tree.css(css).each do |e|
42
- e.name = new_name
43
- end
44
- end
45
-
46
- def html_tree_change_properties_by_css(css, properties)
47
- html_tree.css(css).each do |e|
48
- properties.each do |k, v|
49
- e[k.to_s] = v
50
- end
51
- end
52
- end
53
-
54
- def html_tree_remove_by_css(css)
55
- html_tree.css(css).each(&:remove)
56
- end
57
-
58
- def html_tree_replace_with_children_by_css(css)
59
- html_tree.css(css).each do |e|
60
- e.replace(e.children)
61
- end
62
- end
63
-
64
- def html_tree_process_to_coremodel(tree, state = {})
65
- Coradoc::Html::Input::Converters.process_coradoc(tree, state)
66
- end
67
-
68
- # define preprocess_html_tree to process HTML trees
69
-
70
- # Creates a hook to be called instead of converting an element
71
- # to a CoreModel node.
72
- #
73
- # proc |html_node, state|
74
- # coremodel_node
75
- # end
76
- def html_tree_add_hook_pre(element, &block)
77
- @html_tree_hooks_pre[element] = block
78
- end
79
-
80
- def html_tree_add_hook_pre_by_css(css, &block)
81
- html_tree.css(css).each do |e|
82
- html_tree_add_hook_pre(e, &block)
83
- end
84
- end
85
-
86
- # Creates a hook to be called after converting an element
87
- # to a CoreModel node.
88
- #
89
- # proc |html_node, coremodel_node, state|
90
- # coremodel_node
91
- # end
92
- def html_tree_add_hook_post(element, &block)
93
- @html_tree_hooks_post[element] = block
94
- end
95
-
96
- def html_tree_add_hook_post_by_css(css, &block)
97
- html_tree.css(css).each do |e|
98
- html_tree_add_hook_post(e, &block)
99
- end
100
- end
101
-
102
- def html_tree_run_hooks(node, state, &)
103
- hook_pre = @html_tree_hooks_pre[node]
104
- hook_post = @html_tree_hooks_post[node]
105
-
106
- coremodel = hook_pre.call(node, state) if hook_pre
107
- coremodel ||= yield node, state
108
-
109
- coremodel = hook_post.call(node, coremodel, state) if hook_post
110
-
111
- coremodel
112
- end
113
-
114
- #### CoreModel tree functionalities
115
-
116
- #### Output string functionalities
117
- end
118
- end
119
- end
120
- end
@@ -1,214 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Coradoc
4
- module Input
5
- module Html
6
- class Plugin
7
- # This plugin enhances documents from the PLATEAU project
8
- # to extract more data.
9
- #
10
- # Usage:
11
- # coradoc convert -rcoradoc/input/html/plugins/plateau
12
- # --external-images -u raise --output _out/index.adoc index.html
13
- class Plateau < Plugin
14
- def name
15
- 'PLATEAU'
16
- end
17
-
18
- def preprocess_html_tree
19
- # Remove side menu, so we can generate TOC ourselves
20
- html_tree_remove_by_css('.sideMenu')
21
-
22
- # Correct non-semantic classes into semantic HTML tags
23
- html_tree_change_tag_name_by_css('.titledata', 'h1')
24
- html_tree_change_tag_name_by_css('.subtitledata', 'h2')
25
- html_tree_change_tag_name_by_css('.pitemdata', 'h3')
26
- html_tree_change_tag_name_by_css('.sitemdata', 'h4')
27
- html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', 'th')
28
- html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', 'th')
29
- html_tree_change_tag_name_by_css(
30
- '.framedata, .frame_container_box',
31
- 'aside'
32
- )
33
- html_tree_change_tag_name_by_css('.frame2data', 'pre')
34
- # Assumption that all code snippets in those documents are XML...
35
- html_tree_change_properties_by_css(
36
- '.frame2data',
37
- class: 'brush:xml;'
38
- )
39
-
40
- # Remove some CSS ids that are not important to us
41
- html_tree_change_properties_by_css('#__nuxt', id: nil)
42
- html_tree_change_properties_by_css('#__layout', id: nil)
43
- html_tree_change_properties_by_css('#app', id: nil)
44
-
45
- # Handle lists of document 02
46
- html_tree_replace_with_children_by_css('.list_num-wrap')
47
-
48
- # Convert table/img caption to become a caption
49
- html_tree.css('.imagedata').each do |e|
50
- table = e.parent.next&.children&.first
51
- if table&.name == 'table'
52
- e.name = 'caption'
53
- table.prepend_child(e)
54
- next
55
- end
56
-
57
- img = e.parent.previous&.children&.first
58
- next unless img&.name == 'img' && img['src']
59
-
60
- title = e.text.strip
61
- img['title'] = title
62
- e.remove
63
- next
64
- end
65
-
66
- # Add hooks for H1, H2, H3, H4
67
- html_tree_add_hook_post_by_css(
68
- 'h1, h2, h3',
69
- &method(:handle_headers)
70
- )
71
- html_tree_add_hook_post_by_css('h4', &method(:handle_headers_h4))
72
-
73
- # Table cells aligned to center
74
- html_tree_change_properties_by_css(
75
- '.tableTopCenter',
76
- align: 'center'
77
- )
78
-
79
- # Handle non-semantic lists and indentation
80
- html_tree_add_hook_pre_by_css '.text2data' do |node,|
81
- text = html_tree_process_to_coremodel(node).strip
82
- next '' if text.empty? || text == "\u3000"
83
-
84
- if text.start_with?(/\d+\./)
85
- text = text.sub(/\A\d+.\s*/, '')
86
- ".. #{text}\n"
87
- else
88
- text = text.gsub(/^/, '** ')
89
- "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
90
- end
91
- end
92
-
93
- (3..4).each do |i|
94
- html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
95
- text = html_tree_process_to_coremodel(node).strip
96
- next '' if text.empty? || text == "\u3000"
97
-
98
- text = text.strip.gsub(/^/, "#{'*' * i} ")
99
- "\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
100
- end
101
- end
102
-
103
- (2..3).each do |i|
104
- html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
105
- text = html_tree_process_to_coremodel(node.children.first.children).strip
106
-
107
- "#{'*' * i} #{text}\n"
108
- end
109
- end
110
-
111
- (1..20).each do |i|
112
- html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
113
- text = html_tree_process_to_coremodel(node).strip
114
-
115
- "[start=#{i}]\n. #{text}\n"
116
- end
117
- end
118
- end
119
-
120
- IM = /[A-Z0-9]{1,3}/
121
-
122
- def handle_headers(node, coradoc, _state)
123
- content = coradoc.content.map(&:content).join
124
-
125
- if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
126
- # Special content
127
- case content.strip
128
- when 'はじめに' # Introduction
129
- coradoc.style = 'abstract' # The older version document has ".preface"
130
- coradoc.level_int = 1
131
- when '改定の概要' # Revision overview
132
- coradoc.style = 'abstract' # The older version document has ".preface"
133
- coradoc.level_int = 1
134
- when '参考文献' # Bibliography
135
- coradoc.style = 'bibliography'
136
- coradoc.level_int = 1
137
- when '改訂履歴' # Document history
138
- coradoc.style = 'appendix'
139
- coradoc.level_int = 1
140
- when '0 概要' # Overview
141
- coradoc.style = 'abstract' # I'm not sure this is correct
142
- coradoc.level_int = 1
143
- when '索引' # Index
144
- coradoc.style = 'index' # I'm not sure this is correct
145
- coradoc.level_int = 1
146
- else
147
- warn "Unknown section #{content.inspect}"
148
- end
149
- end
150
-
151
- if node.name == 'h1' && content.start_with?('Annex')
152
- coradoc.style = 'appendix'
153
- coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, '')
154
- end
155
-
156
- # Remove numbers
157
- coradoc.content.first.content.sub!(
158
- /\A(#{IM}\.)*#{IM}[[:space:]]/o,
159
- ''
160
- )
161
-
162
- coradoc
163
- end
164
-
165
- def handle_headers_h4(_node, coradoc, _state)
166
- title = Coradoc.strip_unicode(coradoc.content.first.content)
167
- case title
168
- when /\A\(\d+\)(.*)/
169
- coradoc.level_int = 4
170
- coradoc.content.first.content = ::Regexp.last_match(1).strip
171
- coradoc
172
- when /\A\d+\)(.*)/
173
- coradoc.level_int = 5
174
- coradoc.content.first.content = ::Regexp.last_match(1).strip
175
- coradoc
176
- when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/o
177
- coradoc.level_int = 4
178
- coradoc.content.first.content = ::Regexp.last_match(1).strip
179
- else
180
- if title.empty?
181
- # Strip instances of faulty empty paragraphs
182
- nil
183
- else
184
- ["// Unhandled h4 content\n", coradoc]
185
- end
186
- end
187
- end
188
-
189
- def postprocess_output_string
190
- str = output_string
191
-
192
- ### Custom indentation handling
193
- # If there's a step up, add [none]
194
- str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
195
- str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
196
- str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
197
- # Collapse blocks of text[2,3]data
198
- str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
199
- # In the beginning, add [none]
200
- str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
201
- # If following with another list, ensure we readd styling
202
- str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
203
- # Otherwise, clean up
204
- str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
205
-
206
- self.output_string = str
207
- end
208
- end
209
- end
210
- end
211
- end
212
- end
213
-
214
- Coradoc::Input::Html.config.plugins << Coradoc::Input::Html::Plugin::Plateau
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Coradoc
4
- module Input
5
- module Html
6
- # Postprocessor hook for CoreModel tree transformations after HTML parsing.
7
- #
8
- # Override or extend to apply post-parse cleanup. The default
9
- # implementation returns the tree unchanged.
10
- class Postprocessor
11
- def self.process(coradoc)
12
- new(coradoc).process
13
- end
14
-
15
- def initialize(coradoc)
16
- @tree = coradoc
17
- end
18
-
19
- def process
20
- @tree
21
- end
22
- end
23
- end
24
- end
25
- end
@@ -1,86 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'digest'
4
- require 'nokogiri'
5
- require 'coradoc'
6
-
7
- module Coradoc
8
- module Input
9
- module Html
10
- # Autoload all components
11
- autoload :Errors, 'coradoc/html/input/errors'
12
- autoload :Cleaner, 'coradoc/html/input/cleaner'
13
- autoload :Config, 'coradoc/html/input/config'
14
- autoload :Plugin, 'coradoc/html/input/plugin'
15
- autoload :Postprocessor, 'coradoc/html/input/postprocessor'
16
- autoload :Converters, 'coradoc/html/input/converters'
17
- autoload :HtmlConverter, 'coradoc/html/input/html_converter'
18
-
19
- def self.convert(input, options = {})
20
- HtmlConverter.to_core_model(input, options)
21
- end
22
-
23
- def self.to_coradoc(input, options = {})
24
- HtmlConverter.to_core_model(input, options)
25
- end
26
-
27
- def self.config
28
- @config ||= Config.new
29
- yield @config if block_given?
30
- @config
31
- end
32
-
33
- def self.cleaner
34
- @cleaner ||= Cleaner.new
35
- end
36
-
37
- def self.processor_id
38
- :html
39
- end
40
-
41
- extend Coradoc::Html::FormatDetection
42
-
43
- def self.processor_match?(filename)
44
- html_extension?(filename)
45
- end
46
-
47
- def self.processor_execute(input, options = {})
48
- to_coradoc(input, options)
49
- end
50
-
51
- def self.processor_postprocess(data, options)
52
- if options[:output_processor] == :adoc
53
- data.transform_values { |v| clean_output(v, options) }
54
- else
55
- data
56
- end
57
- end
58
-
59
- def self.clean_output(result, options = {})
60
- config.with(options) do
61
- plugin_instances = HtmlConverter.prepare_plugin_instances(options)
62
-
63
- result = HtmlConverter.track_time('Cleaning up the result') do
64
- cleaner.tidy(result)
65
- end
66
-
67
- plugin_instances.each do |plugin|
68
- plugin.output_string = result
69
- HtmlConverter.track_time("Postprocessing output string with #{plugin.name} plugin") do
70
- plugin.postprocess_output_string
71
- end
72
- result = plugin.output_string
73
- end
74
-
75
- result
76
- end
77
- end
78
-
79
- Coradoc::Input.define(self)
80
- end
81
- end
82
-
83
- module Html
84
- Input = Coradoc::Input::Html
85
- end
86
- end
@@ -1,89 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'coradoc'
4
-
5
- module Coradoc
6
- module Output
7
- # Static HTML output processor
8
- #
9
- # Generates static HTML documents from CoreModel using the classic
10
- # rendering approach without JavaScript frameworks.
11
- #
12
- # @example Using the processor directly
13
- # html = Coradoc::Output::HtmlStatic.processor_execute({ "doc.html" => document }, {})
14
- #
15
- # @example Using through Output module
16
- # result = Coradoc::Output.process(document, format: :html_static)
17
- #
18
- class HtmlStatic
19
- extend Coradoc::Html::FormatDetection
20
-
21
- class << self
22
- def processor_id
23
- :html_static
24
- end
25
-
26
- def processor_match?(filename)
27
- html_extension?(filename)
28
- end
29
-
30
- # Process documents to static HTML
31
- # @param input [Hash<String, Object>] mapping of filenames to documents
32
- # @param options [Hash] processing options
33
- # @return [Hash<String, String>] mapping of filenames to HTML output
34
- def processor_execute(input, options = {})
35
- result = {}
36
- input.each do |filename, document|
37
- html = Coradoc::Html::Static.convert(document, options)
38
- result[filename] = html
39
- end
40
- result
41
- end
42
- end
43
- end
44
-
45
- # SPA (Single Page Application) HTML output processor
46
- #
47
- # Generates modern Vue.js + Tailwind CSS HTML documents from CoreModel.
48
- #
49
- # @example Using the processor directly
50
- # html = Coradoc::Output::HtmlSpa.processor_execute({ "doc.html" => document }, {})
51
- #
52
- # @example Using through Output module
53
- # result = Coradoc::Output.process(document, format: :html_spa)
54
- #
55
- class HtmlSpa
56
- extend Coradoc::Html::FormatDetection
57
-
58
- class << self
59
- def processor_id
60
- :html_spa
61
- end
62
-
63
- def processor_match?(filename)
64
- html_extension?(filename)
65
- end
66
-
67
- # Process documents to SPA HTML
68
- # @param input [Hash<String, Object>] mapping of filenames to documents
69
- # @param options [Hash] processing options
70
- # @return [Hash<String, String>] mapping of filenames to SPA HTML output
71
- def processor_execute(input, options = {})
72
- result = {}
73
- input.each do |filename, document|
74
- html = Coradoc::Html::Spa.convert(document, options)
75
- result[filename] = html
76
- end
77
- result
78
- end
79
- end
80
- end
81
-
82
- # Alias for HtmlSpa
83
- Spa = HtmlSpa
84
- end
85
- end
86
-
87
- # Register processors with the Output module
88
- Coradoc::Output.define(Coradoc::Output::HtmlStatic)
89
- Coradoc::Output.define(Coradoc::Output::HtmlSpa)