coradoc 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/exe/reverse_adoc +24 -3
  4. data/lib/coradoc/document.rb +1 -0
  5. data/lib/coradoc/element/admonition.rb +2 -2
  6. data/lib/coradoc/element/attribute.rb +2 -2
  7. data/lib/coradoc/element/attribute_list.rb +94 -15
  8. data/lib/coradoc/element/audio.rb +13 -2
  9. data/lib/coradoc/element/author.rb +4 -2
  10. data/lib/coradoc/element/base.rb +70 -7
  11. data/lib/coradoc/element/block/core.rb +8 -4
  12. data/lib/coradoc/element/block/quote.rb +1 -1
  13. data/lib/coradoc/element/break.rb +1 -1
  14. data/lib/coradoc/element/document_attributes.rb +6 -6
  15. data/lib/coradoc/element/header.rb +4 -2
  16. data/lib/coradoc/element/image/block_image.rb +13 -2
  17. data/lib/coradoc/element/image/core.rb +34 -5
  18. data/lib/coradoc/element/image/inline_image.rb +2 -2
  19. data/lib/coradoc/element/inline/anchor.rb +4 -2
  20. data/lib/coradoc/element/inline/bold.rb +9 -4
  21. data/lib/coradoc/element/inline/cross_reference.rb +4 -2
  22. data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
  23. data/lib/coradoc/element/inline/highlight.rb +11 -6
  24. data/lib/coradoc/element/inline/italic.rb +9 -4
  25. data/lib/coradoc/element/inline/link.rb +22 -6
  26. data/lib/coradoc/element/inline/monospace.rb +9 -4
  27. data/lib/coradoc/element/inline/quotation.rb +3 -1
  28. data/lib/coradoc/element/inline/subscript.rb +4 -2
  29. data/lib/coradoc/element/inline/superscript.rb +4 -2
  30. data/lib/coradoc/element/list/core.rb +9 -6
  31. data/lib/coradoc/element/list/definition.rb +19 -0
  32. data/lib/coradoc/element/list/ordered.rb +1 -1
  33. data/lib/coradoc/element/list/unordered.rb +1 -1
  34. data/lib/coradoc/element/list.rb +1 -0
  35. data/lib/coradoc/element/list_item.rb +8 -3
  36. data/lib/coradoc/element/list_item_definition.rb +32 -0
  37. data/lib/coradoc/element/paragraph.rb +4 -2
  38. data/lib/coradoc/element/revision.rb +4 -2
  39. data/lib/coradoc/element/section.rb +21 -4
  40. data/lib/coradoc/element/table.rb +27 -10
  41. data/lib/coradoc/element/text_element.rb +48 -8
  42. data/lib/coradoc/element/title.rb +26 -6
  43. data/lib/coradoc/element/video.rb +32 -5
  44. data/lib/coradoc/reverse_adoc/README.adoc +14 -8
  45. data/lib/coradoc/reverse_adoc/cleaner.rb +20 -8
  46. data/lib/coradoc/reverse_adoc/config.rb +35 -16
  47. data/lib/coradoc/reverse_adoc/converters/a.rb +17 -12
  48. data/lib/coradoc/reverse_adoc/converters/aside.rb +0 -4
  49. data/lib/coradoc/reverse_adoc/converters/audio.rb +0 -4
  50. data/lib/coradoc/reverse_adoc/converters/base.rb +48 -44
  51. data/lib/coradoc/reverse_adoc/converters/blockquote.rb +2 -11
  52. data/lib/coradoc/reverse_adoc/converters/br.rb +0 -4
  53. data/lib/coradoc/reverse_adoc/converters/bypass.rb +0 -4
  54. data/lib/coradoc/reverse_adoc/converters/code.rb +5 -42
  55. data/lib/coradoc/reverse_adoc/converters/div.rb +0 -4
  56. data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
  57. data/lib/coradoc/reverse_adoc/converters/em.rb +5 -43
  58. data/lib/coradoc/reverse_adoc/converters/figure.rb +0 -4
  59. data/lib/coradoc/reverse_adoc/converters/h.rb +0 -4
  60. data/lib/coradoc/reverse_adoc/converters/head.rb +0 -4
  61. data/lib/coradoc/reverse_adoc/converters/hr.rb +0 -4
  62. data/lib/coradoc/reverse_adoc/converters/img.rb +21 -16
  63. data/lib/coradoc/reverse_adoc/converters/li.rb +0 -4
  64. data/lib/coradoc/reverse_adoc/converters/mark.rb +5 -11
  65. data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
  66. data/lib/coradoc/reverse_adoc/converters/ol.rb +0 -4
  67. data/lib/coradoc/reverse_adoc/converters/p.rb +0 -4
  68. data/lib/coradoc/reverse_adoc/converters/pre.rb +0 -4
  69. data/lib/coradoc/reverse_adoc/converters/q.rb +0 -4
  70. data/lib/coradoc/reverse_adoc/converters/strong.rb +5 -41
  71. data/lib/coradoc/reverse_adoc/converters/sub.rb +6 -4
  72. data/lib/coradoc/reverse_adoc/converters/sup.rb +7 -5
  73. data/lib/coradoc/reverse_adoc/converters/table.rb +215 -4
  74. data/lib/coradoc/reverse_adoc/converters/td.rb +1 -7
  75. data/lib/coradoc/reverse_adoc/converters/text.rb +1 -38
  76. data/lib/coradoc/reverse_adoc/converters/tr.rb +0 -4
  77. data/lib/coradoc/reverse_adoc/converters/video.rb +0 -4
  78. data/lib/coradoc/reverse_adoc/converters.rb +21 -0
  79. data/lib/coradoc/reverse_adoc/html_converter.rb +109 -20
  80. data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
  81. data/lib/coradoc/reverse_adoc/plugins/plateau.rb +174 -0
  82. data/lib/coradoc/reverse_adoc/postprocessor.rb +148 -0
  83. data/lib/coradoc/reverse_adoc.rb +3 -0
  84. data/lib/coradoc/version.rb +1 -1
  85. data/lib/reverse_adoc.rb +1 -1
  86. metadata +8 -3
  87. data/lib/coradoc/element/inline/image.rb +0 -26
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "converters/markup"
3
4
  require_relative "converters/a"
4
5
  require_relative "converters/aside"
5
6
  require_relative "converters/audio"
@@ -8,6 +9,7 @@ require_relative "converters/br"
8
9
  require_relative "converters/bypass"
9
10
  require_relative "converters/code"
10
11
  require_relative "converters/div"
12
+ require_relative "converters/dl"
11
13
  require_relative "converters/drop"
12
14
  require_relative "converters/em"
13
15
  require_relative "converters/figure"
@@ -34,28 +36,115 @@ require_relative "converters/tr"
34
36
  require_relative "converters/video"
35
37
  require_relative "converters/math"
36
38
 
37
- module Coradoc::ReverseAdoc
38
- class HtmlConverter
39
- def self.to_coradoc(input, options = {})
40
- root = case input
41
- when String
42
- Nokogiri::HTML(input).root
43
- when Nokogiri::XML::Document
44
- input.root
45
- when Nokogiri::XML::Node
46
- input
47
- end
48
-
49
- return "" unless root
50
-
51
- Coradoc::ReverseAdoc.config.with(options) do
52
- Coradoc::ReverseAdoc::Converters.lookup(root.name).to_coradoc(root)
39
+ module Coradoc
40
+ module ReverseAdoc
41
+ class HtmlConverter
42
+ def self.to_coradoc(input, options = {})
43
+ plugin_instances = options.delete(:plugin_instances)
44
+ ReverseAdoc.config.with(options) do
45
+ plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
46
+
47
+ root = track_time "Loading input HTML document" do
48
+ case input
49
+ when String
50
+ Nokogiri::HTML(input).root
51
+ when Nokogiri::XML::Document
52
+ input.root
53
+ when Nokogiri::XML::Node
54
+ input
55
+ end
56
+ end
57
+
58
+ return "" unless root
59
+
60
+ plugin_instances.each do |plugin|
61
+ plugin.html_tree = root
62
+ if plugin.respond_to?(:preprocess_html_tree)
63
+ track_time "Preprocessing document with #{plugin.name} plugin" do
64
+ plugin.preprocess_html_tree
65
+ end
66
+ end
67
+ root = plugin.html_tree
68
+ end
69
+
70
+ coradoc = track_time "Converting input document tree to Coradoc tree" do
71
+ Converters.process_coradoc(root, plugin_instances: plugin_instances)
72
+ end
73
+
74
+ coradoc = track_time "Post-process Coradoc tree" do
75
+ Postprocessor.process(coradoc)
76
+ end
77
+
78
+ plugin_instances.each do |plugin|
79
+ if plugin.respond_to?(:postprocess_coradoc_tree)
80
+ plugin.coradoc_tree = coradoc
81
+ track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
82
+ plugin.postprocess_coradoc_tree
83
+ end
84
+ coradoc = plugin.coradoc_tree
85
+ end
86
+ end
87
+
88
+ coradoc
89
+ end
53
90
  end
54
- end
55
91
 
56
- def self.convert(input, options = {})
57
- result = Coradoc::Generator.gen_adoc(to_coradoc(input, options))
58
- Coradoc::ReverseAdoc.cleaner.tidy(result)
92
+ def self.convert(input, options = {})
93
+ ReverseAdoc.config.with(options) do
94
+ plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
95
+
96
+ options = options.merge(plugin_instances: plugin_instances)
97
+
98
+ coradoc = to_coradoc(input, options)
99
+
100
+ if coradoc.is_a?(Hash)
101
+ coradoc.to_h do |file, tree|
102
+ track_time "Converting file #{file || 'main'}" do
103
+ [file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
104
+ end
105
+ end
106
+ else
107
+ convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
108
+ end
109
+ end
110
+ end
111
+
112
+ def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
113
+ result = track_time "Converting Coradoc tree into Asciidoc" do
114
+ Coradoc::Generator.gen_adoc(coradoc)
115
+ end
116
+ result = track_time "Cleaning up the result" do
117
+ ReverseAdoc.cleaner.tidy(result)
118
+ end
119
+ plugin_instances.each do |plugin|
120
+ if plugin.respond_to?(:postprocess_asciidoc_string)
121
+ plugin.asciidoc_string = result
122
+ track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
123
+ plugin.postprocess_asciidoc_string
124
+ end
125
+ result = plugin.asciidoc_string
126
+ end
127
+ end
128
+ result
129
+ end
130
+
131
+ @track_time_indentation = 0
132
+ def self.track_time(task)
133
+ if ReverseAdoc.config.track_time
134
+ warn " " * @track_time_indentation +
135
+ "* #{task} is starting..."
136
+ @track_time_indentation += 1
137
+ t0 = Time.now
138
+ ret = yield
139
+ time_elapsed = Time.now - t0
140
+ @track_time_indentation -= 1
141
+ warn " " * @track_time_indentation +
142
+ "* #{task} took #{time_elapsed.round(3)} seconds"
143
+ ret
144
+ else
145
+ yield
146
+ end
147
+ end
59
148
  end
60
149
  end
61
150
  end
@@ -0,0 +1,131 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ #### Plugin system general
4
+
5
+ # Allow building plugins with a shorthand syntax:
6
+ # plugin = Coradoc::ReverseAdoc::Plugin.new do
7
+ # def name = "Test"
8
+ # end
9
+
10
+ def self.new(&block)
11
+ if self == Plugin
12
+ Class.new(Plugin, &block)
13
+ else
14
+ super
15
+ end
16
+ end
17
+
18
+ def initialize
19
+ @html_tree_hooks_pre = {}
20
+ @html_tree_hooks_post = {}
21
+ end
22
+
23
+ # define name to name a Plugin
24
+ def name
25
+ self.class.name
26
+ end
27
+
28
+ #### HTML Tree functionalities
29
+
30
+ attr_accessor :html_tree
31
+
32
+ def html_tree_change_tag_name_by_css(css, new_name)
33
+ html_tree.css(css).each do |e|
34
+ e.name = new_name
35
+ end
36
+ end
37
+
38
+ def html_tree_change_properties_by_css(css, properties)
39
+ html_tree.css(css).each do |e|
40
+ properties.each do |k,v|
41
+ e[k.to_s] = v
42
+ end
43
+ end
44
+ end
45
+
46
+ def html_tree_remove_by_css(css)
47
+ html_tree.css(css).each(&:remove)
48
+ end
49
+
50
+ def html_tree_replace_with_children_by_css(css)
51
+ html_tree.css(css).each do |e|
52
+ e.replace(e.children)
53
+ end
54
+ end
55
+
56
+ def html_tree_process_to_coradoc(tree, state={})
57
+ Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
58
+ end
59
+
60
+ def html_tree_process_to_adoc(tree, state={})
61
+ Coradoc::ReverseAdoc::Converters.process(tree, state)
62
+ end
63
+
64
+ def html_tree_preview
65
+ Tempfile.open(%w"coradoc .html") do |i|
66
+ i << html_tree.to_html
67
+ system "chromium-browser", "--no-sandbox", i.path
68
+ end
69
+ end
70
+
71
+ # define preprocess_html_tree to process HTML trees
72
+
73
+ # Creates a hook to be called instead of converting an element
74
+ # to a Coradoc node.
75
+ #
76
+ # proc |html_node, state|
77
+ # coradoc_node
78
+ # end
79
+ def html_tree_add_hook_pre(element, &block)
80
+ @html_tree_hooks_pre[element] = block
81
+ end
82
+
83
+ def html_tree_add_hook_pre_by_css(css, &block)
84
+ html_tree.css(css).each do |e|
85
+ html_tree_add_hook_pre(e, &block)
86
+ end
87
+ end
88
+
89
+ # Creates a hook to be called after converting an element
90
+ # to a Coradoc node.
91
+ #
92
+ # proc |html_node, coradoc_node, state|
93
+ # coradoc_node
94
+ # end
95
+ def html_tree_add_hook_post(element, &block)
96
+ @html_tree_hooks_post[element] = block
97
+ end
98
+
99
+ def html_tree_add_hook_post_by_css(css, &block)
100
+ html_tree.css(css).each do |e|
101
+ html_tree_add_hook_post(e, &block)
102
+ end
103
+ end
104
+
105
+ def html_tree_run_hooks(node, state, &_block)
106
+ hook_pre = @html_tree_hooks_pre[node]
107
+ hook_post = @html_tree_hooks_post[node]
108
+
109
+ coradoc = hook_pre.(node, state) if hook_pre
110
+ coradoc ||= yield node, state
111
+
112
+ if hook_post
113
+ coradoc = hook_post.(node, coradoc, state)
114
+ end
115
+
116
+ coradoc
117
+ end
118
+
119
+ #### Coradoc tree functionalities
120
+
121
+ attr_accessor :coradoc_tree
122
+
123
+ # define postprocess_coradoc_tree to change coradoc tree
124
+
125
+ #### AsciiDoc string functionalities
126
+
127
+ attr_accessor :asciidoc_string
128
+
129
+ # define postprocess_asciidoc_string to change the coradoc string
130
+ end
131
+ end
@@ -0,0 +1,174 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ # This plugin enhances documents from the PLATEAU project
4
+ # to extract more data.
5
+ #
6
+ # Usage:
7
+ # reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
8
+ # --external-images -u raise --output _out/index.adoc index.html
9
+ class Plateau < Plugin
10
+ def name
11
+ "PLATEAU"
12
+ end
13
+
14
+ def preprocess_html_tree
15
+ # Let's simplify the tree by removing what's extraneous
16
+ # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
17
+ # html_tree_replace_with_children_by_css("div.container_box")
18
+ # html_tree_replace_with_children_by_css("div.col.col-12")
19
+ # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
20
+ # html_tree_replace_with_children_by_css("div.row")
21
+ #
22
+ # We can remove that, but it messes up the images and paragraphs.
23
+
24
+ # Remove side menu, so we can generate TOC ourselves
25
+ html_tree_remove_by_css(".sideMenu")
26
+
27
+ # Correct non-semantic classes into semantic HTML tags
28
+ html_tree_change_tag_name_by_css(".titledata", "h1")
29
+ html_tree_change_tag_name_by_css(".subtitledata", "h2")
30
+ html_tree_change_tag_name_by_css(".pitemdata", "h3")
31
+ html_tree_change_tag_name_by_css(".sitemdata", "h4")
32
+ html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
33
+
34
+ # Remove some CSS ids that are not important to us
35
+ html_tree_change_properties_by_css("#__nuxt", id: nil)
36
+ html_tree_change_properties_by_css("#__layout", id: nil)
37
+ html_tree_change_properties_by_css("#app", id: nil)
38
+
39
+ # Convert table/img caption to become a caption
40
+ html_tree.css(".imagedata").each do |e|
41
+ table = e.parent.next&.children&.first
42
+ if table&.name == "table"
43
+ e.name = "caption"
44
+ table.prepend_child(e)
45
+ next
46
+ end
47
+
48
+ img = e.parent.previous&.children&.first
49
+ if img&.name == "img" && img["src"]
50
+ title = e.text.strip
51
+ img["title"] = title
52
+ e.remove
53
+ next
54
+ end
55
+ end
56
+
57
+ # Add hooks for H1, H2, H3, H4
58
+ html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
59
+ html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
60
+
61
+ # Table cells aligned to center
62
+ html_tree_change_properties_by_css(".tableTopCenter", align: "center")
63
+
64
+ # Handle non-semantic lists and indentation
65
+ html_tree_add_hook_pre_by_css ".text2data" do |node,|
66
+ text = html_tree_process_to_adoc(node).strip
67
+ next "" if text.empty? || text == "\u3000"
68
+
69
+ if text.start_with?(/\d+\./)
70
+ text = text.sub(/\A\d+.\s*/, "")
71
+ ".. #{text}\n"
72
+ else
73
+ text = text.gsub(/^/, "** ")
74
+ "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
75
+ end
76
+ end
77
+
78
+ html_tree_add_hook_pre_by_css ".text3data" do |node,|
79
+ text = html_tree_process_to_adoc(node).strip
80
+ next "" if text.empty? || text == "\u3000"
81
+
82
+ text = text.strip.gsub(/^/, "*** ")
83
+ "\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
84
+ end
85
+
86
+ html_tree_add_hook_pre_by_css ".text4data" do |node,|
87
+ text = html_tree_process_to_adoc(node).strip
88
+ next "" if text.empty? || text == "\u3000"
89
+
90
+ text = text.strip.gsub(/^/, "**** ")
91
+ "\n\n//-PT4D\n#{text}\n//-ENDPT4D\n\n"
92
+ end
93
+
94
+ html_tree_add_hook_pre_by_css ".text2data_point ul" do |node,|
95
+ text = html_tree_process_to_adoc(node.children.first.children).strip
96
+
97
+ "** #{text}\n"
98
+ end
99
+
100
+ html_tree_add_hook_pre_by_css ".text3data_point ul" do |node,|
101
+ text = html_tree_process_to_adoc(node.children.first.children).strip
102
+
103
+ "*** #{text}\n"
104
+ end
105
+
106
+ # html_tree_preview
107
+ end
108
+
109
+ def handle_headers(node, coradoc, state)
110
+ if coradoc.id.start_with?("toc0_")
111
+ content = coradoc.content.map(&:content).join
112
+ # Special content
113
+ case content.strip
114
+ when "はじめに" # Introduction
115
+ coradoc.style = "abstract" # The older version document has ".preface"
116
+ when "改定の概要" # Revision overview
117
+ coradoc.style = "abstract" # The older version document has ".preface"
118
+ when "参考文献" # Bibliography
119
+ coradoc.style = "bibliography"
120
+ when "改訂履歴" # Document history
121
+ coradoc.style = "appendix"
122
+ else
123
+ warn "Unknown section #{coradoc.content.map(&:content).join.inspect}"
124
+ end
125
+
126
+ # Ensure they are generated as level 1
127
+ coradoc.level_int = 1
128
+ end
129
+
130
+ # Remove numbers
131
+ coradoc.content.first.content.sub!(/\A[\d\s.]+/, "")
132
+
133
+ coradoc
134
+ end
135
+
136
+ def handle_headers_h4(node, coradoc, state)
137
+ case coradoc.content.first.content
138
+ when /\A\(\d+\)(.*)/
139
+ coradoc.level_int = 4
140
+ coradoc.content.first.content = $1.strip
141
+ coradoc
142
+ when /\A\d+\)(.*)/
143
+ coradoc.level_int = 5
144
+ coradoc.content.first.content = $1.strip
145
+ coradoc
146
+ else
147
+ ["// FIXME\n", coradoc]
148
+ end
149
+ end
150
+
151
+ def postprocess_asciidoc_string
152
+ str = self.asciidoc_string
153
+
154
+ ### Custom indentation handling
155
+ # If there's a step up, add [none]
156
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
157
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
158
+ str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
159
+ # Collapse blocks of text[2,3]data
160
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
161
+ # In the beginning, add [none]
162
+ str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
163
+ # If following with another list, ensure we readd styling
164
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
165
+ # Otherwise, clean up
166
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
167
+
168
+ self.asciidoc_string = str
169
+ end
170
+ end
171
+ end
172
+ end
173
+
174
+ Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau
@@ -0,0 +1,148 @@
1
+ module Coradoc::ReverseAdoc
2
+ # Postprocessor's aim is to convert a Coradoc tree from
3
+ # a mess that has been created from HTML into a tree that
4
+ # is compatible with what we would get out of Coradoc, if
5
+ # it parsed it directly.
6
+ class Postprocessor
7
+ def self.process(coradoc)
8
+ new(coradoc).process
9
+ end
10
+
11
+ def initialize(coradoc)
12
+ @tree = coradoc
13
+ end
14
+
15
+ # Collapse DIVs that only have a title, or nest another DIV.
16
+ def collapse_meaningless_sections
17
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
+ if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
19
+ children_classes = Array(elem.contents).map(&:class)
20
+ count = children_classes.length
21
+ safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
22
+
23
+ # Count > 0 because some documents use <div> as a <br>.
24
+ if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
+ next elem.contents
26
+ end
27
+ end
28
+ elem
29
+ end
30
+ end
31
+
32
+ # tree should now be more cleaned up, so we can progress with
33
+ # creating meaningful sections
34
+ def generate_meaningful_sections
35
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
36
+ # We are searching for an array, that has a title. This
37
+ # will be a candidate for our section array.
38
+ if dir == :post &&
39
+ elem.is_a?(Array) &&
40
+ !elem.grep(Coradoc::Element::Title).empty?
41
+
42
+ new_array = []
43
+ content_array = new_array
44
+ section_arrays_by_level = [new_array] * 8
45
+
46
+ # For each title element, we create a new section. Then we push
47
+ # all descendant sections into those sections. Otherwise, we push
48
+ # an element as content of current section.
49
+ elem.each do |e|
50
+ if e.is_a? Coradoc::Element::Title
51
+ title = e
52
+ content_array = []
53
+ section_array = []
54
+ level = title.level_int
55
+ section = Coradoc::Element::Section.new(
56
+ title, contents: content_array, sections: section_array
57
+ )
58
+ # Some documents may not be consistent and eg. follow H4 after
59
+ # H2. Let's ensure that proceeding sections will land in a
60
+ # correct place.
61
+ (8 - level).times do |j|
62
+ section_arrays_by_level[level + j] = section_array
63
+ end
64
+ section_arrays_by_level[level - 1] << section
65
+ else
66
+ content_array << e
67
+ end
68
+ end
69
+ next new_array
70
+ end
71
+ elem
72
+ end
73
+ end
74
+
75
+ def split_sections
76
+ max_level = Coradoc::ReverseAdoc.config.split_sections
77
+
78
+ return unless max_level
79
+
80
+ sections = {}
81
+ parent_sections = []
82
+ previous_sections = {}
83
+
84
+ determine_section_id = ->(elem) do
85
+ level = 0
86
+ section = elem
87
+ while section
88
+ level += 1 if elem.title.style == section.title.style
89
+ section = previous_sections[section]
90
+ end
91
+ level
92
+ end
93
+
94
+ determine_style = ->(elem) do
95
+ style = elem.title.style || "section"
96
+ style += "-"
97
+ style
98
+ end
99
+
100
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
101
+ title = elem.title if elem.is_a?(Coradoc::Element::Section)
102
+
103
+ if title && title.level_int <= max_level
104
+ if dir == :pre
105
+ # In the PRE pass, we build a tree of sections, so that
106
+ # we can compute numbers
107
+ previous_sections[elem] = parent_sections[title.level_int]
108
+ parent_sections[title.level_int] = elem
109
+ parent_sections[(title.level_int+1)..nil] = nil
110
+
111
+ elem
112
+ else
113
+ # In the POST pass, we replace the sections with their
114
+ # include tag.
115
+ section_file = "sections/"
116
+ section_file += parent_sections[1..title.level_int].map do |parent|
117
+ style = determine_style.(parent)
118
+ "%s%02d" % [style, determine_section_id.(parent)]
119
+ end.join("/")
120
+ section_file += ".adoc"
121
+
122
+ sections[section_file] = elem
123
+ up = "../" * (title.level_int - 1)
124
+ "\ninclude::#{up}#{section_file}[]\n"
125
+ end
126
+ else
127
+ elem
128
+ end
129
+ end
130
+
131
+ sections[nil] = @tree
132
+ @tree = sections
133
+ end
134
+
135
+ def process
136
+ collapse_meaningless_sections
137
+ generate_meaningful_sections
138
+ # Do it again to simplify the document further.
139
+ # Since the structure is changed, we may have new meaningful
140
+ # sections as only children of some meaningless sections.
141
+ collapse_meaningless_sections
142
+
143
+ split_sections
144
+
145
+ @tree
146
+ end
147
+ end
148
+ end
@@ -9,6 +9,9 @@ require_relative "reverse_adoc/config"
9
9
  require_relative "reverse_adoc/converters"
10
10
  require_relative "reverse_adoc/converters/base"
11
11
  require_relative "reverse_adoc/html_converter"
12
+ require_relative "reverse_adoc/plugin"
13
+ require_relative "reverse_adoc/postprocessor"
14
+
12
15
 
13
16
  module Coradoc::ReverseAdoc
14
17
  def self.convert(input, options = {})
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = "0.3.0"
4
+ VERSION = "1.0.0"
5
5
  end
data/lib/reverse_adoc.rb CHANGED
@@ -8,7 +8,7 @@ warn <<~END
8
8
  | You are referencing an old require here:
9
9
  | #{caller.join("\n| ")}
10
10
  |
11
- | You should also replace 'coradoc' with 'reverse_adoc' in your gem dependencies.
11
+ | You should also replace 'reverse_adoc' with 'coradoc' in your gem dependencies.
12
12
  | reverse_adoc 2.0.0 will be kept with 'coradoc' as the only dependency.
13
13
  |
14
14
  | Please also ensure that you replace all references to ReverseAdoc in your code
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-05-21 00:00:00.000000000 Z
12
+ date: 2024-06-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: marcel
@@ -278,7 +278,6 @@ files:
278
278
  - lib/coradoc/element/inline/cross_reference.rb
279
279
  - lib/coradoc/element/inline/hard_line_break.rb
280
280
  - lib/coradoc/element/inline/highlight.rb
281
- - lib/coradoc/element/inline/image.rb
282
281
  - lib/coradoc/element/inline/italic.rb
283
282
  - lib/coradoc/element/inline/link.rb
284
283
  - lib/coradoc/element/inline/monospace.rb
@@ -291,6 +290,7 @@ files:
291
290
  - lib/coradoc/element/list/ordered.rb
292
291
  - lib/coradoc/element/list/unordered.rb
293
292
  - lib/coradoc/element/list_item.rb
293
+ - lib/coradoc/element/list_item_definition.rb
294
294
  - lib/coradoc/element/paragraph.rb
295
295
  - lib/coradoc/element/revision.rb
296
296
  - lib/coradoc/element/section.rb
@@ -323,6 +323,7 @@ files:
323
323
  - lib/coradoc/reverse_adoc/converters/bypass.rb
324
324
  - lib/coradoc/reverse_adoc/converters/code.rb
325
325
  - lib/coradoc/reverse_adoc/converters/div.rb
326
+ - lib/coradoc/reverse_adoc/converters/dl.rb
326
327
  - lib/coradoc/reverse_adoc/converters/drop.rb
327
328
  - lib/coradoc/reverse_adoc/converters/em.rb
328
329
  - lib/coradoc/reverse_adoc/converters/figure.rb
@@ -333,6 +334,7 @@ files:
333
334
  - lib/coradoc/reverse_adoc/converters/img.rb
334
335
  - lib/coradoc/reverse_adoc/converters/li.rb
335
336
  - lib/coradoc/reverse_adoc/converters/mark.rb
337
+ - lib/coradoc/reverse_adoc/converters/markup.rb
336
338
  - lib/coradoc/reverse_adoc/converters/math.rb
337
339
  - lib/coradoc/reverse_adoc/converters/ol.rb
338
340
  - lib/coradoc/reverse_adoc/converters/p.rb
@@ -350,6 +352,9 @@ files:
350
352
  - lib/coradoc/reverse_adoc/converters/video.rb
351
353
  - lib/coradoc/reverse_adoc/errors.rb
352
354
  - lib/coradoc/reverse_adoc/html_converter.rb
355
+ - lib/coradoc/reverse_adoc/plugin.rb
356
+ - lib/coradoc/reverse_adoc/plugins/plateau.rb
357
+ - lib/coradoc/reverse_adoc/postprocessor.rb
353
358
  - lib/coradoc/transformer.rb
354
359
  - lib/coradoc/version.rb
355
360
  - lib/reverse_adoc.rb