coradoc 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/exe/reverse_adoc +24 -3
  4. data/lib/coradoc/document.rb +1 -0
  5. data/lib/coradoc/element/admonition.rb +2 -2
  6. data/lib/coradoc/element/attribute.rb +2 -2
  7. data/lib/coradoc/element/attribute_list.rb +94 -15
  8. data/lib/coradoc/element/audio.rb +13 -2
  9. data/lib/coradoc/element/author.rb +4 -2
  10. data/lib/coradoc/element/base.rb +70 -7
  11. data/lib/coradoc/element/block/core.rb +8 -4
  12. data/lib/coradoc/element/block/quote.rb +1 -1
  13. data/lib/coradoc/element/break.rb +1 -1
  14. data/lib/coradoc/element/document_attributes.rb +6 -6
  15. data/lib/coradoc/element/header.rb +4 -2
  16. data/lib/coradoc/element/image/block_image.rb +13 -2
  17. data/lib/coradoc/element/image/core.rb +34 -5
  18. data/lib/coradoc/element/image/inline_image.rb +2 -2
  19. data/lib/coradoc/element/inline/anchor.rb +4 -2
  20. data/lib/coradoc/element/inline/bold.rb +9 -4
  21. data/lib/coradoc/element/inline/cross_reference.rb +4 -2
  22. data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
  23. data/lib/coradoc/element/inline/highlight.rb +11 -6
  24. data/lib/coradoc/element/inline/italic.rb +9 -4
  25. data/lib/coradoc/element/inline/link.rb +22 -6
  26. data/lib/coradoc/element/inline/monospace.rb +9 -4
  27. data/lib/coradoc/element/inline/quotation.rb +3 -1
  28. data/lib/coradoc/element/inline/subscript.rb +4 -2
  29. data/lib/coradoc/element/inline/superscript.rb +4 -2
  30. data/lib/coradoc/element/list/core.rb +9 -6
  31. data/lib/coradoc/element/list/definition.rb +19 -0
  32. data/lib/coradoc/element/list/ordered.rb +1 -1
  33. data/lib/coradoc/element/list/unordered.rb +1 -1
  34. data/lib/coradoc/element/list.rb +1 -0
  35. data/lib/coradoc/element/list_item.rb +8 -3
  36. data/lib/coradoc/element/list_item_definition.rb +32 -0
  37. data/lib/coradoc/element/paragraph.rb +4 -2
  38. data/lib/coradoc/element/revision.rb +4 -2
  39. data/lib/coradoc/element/section.rb +21 -4
  40. data/lib/coradoc/element/table.rb +27 -10
  41. data/lib/coradoc/element/text_element.rb +48 -8
  42. data/lib/coradoc/element/title.rb +26 -6
  43. data/lib/coradoc/element/video.rb +32 -5
  44. data/lib/coradoc/reverse_adoc/README.adoc +14 -8
  45. data/lib/coradoc/reverse_adoc/cleaner.rb +20 -8
  46. data/lib/coradoc/reverse_adoc/config.rb +35 -16
  47. data/lib/coradoc/reverse_adoc/converters/a.rb +17 -12
  48. data/lib/coradoc/reverse_adoc/converters/aside.rb +0 -4
  49. data/lib/coradoc/reverse_adoc/converters/audio.rb +0 -4
  50. data/lib/coradoc/reverse_adoc/converters/base.rb +48 -44
  51. data/lib/coradoc/reverse_adoc/converters/blockquote.rb +2 -11
  52. data/lib/coradoc/reverse_adoc/converters/br.rb +0 -4
  53. data/lib/coradoc/reverse_adoc/converters/bypass.rb +0 -4
  54. data/lib/coradoc/reverse_adoc/converters/code.rb +5 -42
  55. data/lib/coradoc/reverse_adoc/converters/div.rb +0 -4
  56. data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
  57. data/lib/coradoc/reverse_adoc/converters/em.rb +5 -43
  58. data/lib/coradoc/reverse_adoc/converters/figure.rb +0 -4
  59. data/lib/coradoc/reverse_adoc/converters/h.rb +0 -4
  60. data/lib/coradoc/reverse_adoc/converters/head.rb +0 -4
  61. data/lib/coradoc/reverse_adoc/converters/hr.rb +0 -4
  62. data/lib/coradoc/reverse_adoc/converters/img.rb +21 -16
  63. data/lib/coradoc/reverse_adoc/converters/li.rb +0 -4
  64. data/lib/coradoc/reverse_adoc/converters/mark.rb +5 -11
  65. data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
  66. data/lib/coradoc/reverse_adoc/converters/ol.rb +0 -4
  67. data/lib/coradoc/reverse_adoc/converters/p.rb +0 -4
  68. data/lib/coradoc/reverse_adoc/converters/pre.rb +0 -4
  69. data/lib/coradoc/reverse_adoc/converters/q.rb +0 -4
  70. data/lib/coradoc/reverse_adoc/converters/strong.rb +5 -41
  71. data/lib/coradoc/reverse_adoc/converters/sub.rb +6 -4
  72. data/lib/coradoc/reverse_adoc/converters/sup.rb +7 -5
  73. data/lib/coradoc/reverse_adoc/converters/table.rb +215 -4
  74. data/lib/coradoc/reverse_adoc/converters/td.rb +1 -7
  75. data/lib/coradoc/reverse_adoc/converters/text.rb +1 -38
  76. data/lib/coradoc/reverse_adoc/converters/tr.rb +0 -4
  77. data/lib/coradoc/reverse_adoc/converters/video.rb +0 -4
  78. data/lib/coradoc/reverse_adoc/converters.rb +21 -0
  79. data/lib/coradoc/reverse_adoc/html_converter.rb +109 -20
  80. data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
  81. data/lib/coradoc/reverse_adoc/plugins/plateau.rb +174 -0
  82. data/lib/coradoc/reverse_adoc/postprocessor.rb +148 -0
  83. data/lib/coradoc/reverse_adoc.rb +3 -0
  84. data/lib/coradoc/version.rb +1 -1
  85. data/lib/reverse_adoc.rb +1 -1
  86. metadata +8 -3
  87. data/lib/coradoc/element/inline/image.rb +0 -26
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "converters/markup"
3
4
  require_relative "converters/a"
4
5
  require_relative "converters/aside"
5
6
  require_relative "converters/audio"
@@ -8,6 +9,7 @@ require_relative "converters/br"
8
9
  require_relative "converters/bypass"
9
10
  require_relative "converters/code"
10
11
  require_relative "converters/div"
12
+ require_relative "converters/dl"
11
13
  require_relative "converters/drop"
12
14
  require_relative "converters/em"
13
15
  require_relative "converters/figure"
@@ -34,28 +36,115 @@ require_relative "converters/tr"
34
36
  require_relative "converters/video"
35
37
  require_relative "converters/math"
36
38
 
37
- module Coradoc::ReverseAdoc
38
- class HtmlConverter
39
- def self.to_coradoc(input, options = {})
40
- root = case input
41
- when String
42
- Nokogiri::HTML(input).root
43
- when Nokogiri::XML::Document
44
- input.root
45
- when Nokogiri::XML::Node
46
- input
47
- end
48
-
49
- return "" unless root
50
-
51
- Coradoc::ReverseAdoc.config.with(options) do
52
- Coradoc::ReverseAdoc::Converters.lookup(root.name).to_coradoc(root)
39
+ module Coradoc
40
+ module ReverseAdoc
41
+ class HtmlConverter
42
+ def self.to_coradoc(input, options = {})
43
+ plugin_instances = options.delete(:plugin_instances)
44
+ ReverseAdoc.config.with(options) do
45
+ plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
46
+
47
+ root = track_time "Loading input HTML document" do
48
+ case input
49
+ when String
50
+ Nokogiri::HTML(input).root
51
+ when Nokogiri::XML::Document
52
+ input.root
53
+ when Nokogiri::XML::Node
54
+ input
55
+ end
56
+ end
57
+
58
+ return "" unless root
59
+
60
+ plugin_instances.each do |plugin|
61
+ plugin.html_tree = root
62
+ if plugin.respond_to?(:preprocess_html_tree)
63
+ track_time "Preprocessing document with #{plugin.name} plugin" do
64
+ plugin.preprocess_html_tree
65
+ end
66
+ end
67
+ root = plugin.html_tree
68
+ end
69
+
70
+ coradoc = track_time "Converting input document tree to Coradoc tree" do
71
+ Converters.process_coradoc(root, plugin_instances: plugin_instances)
72
+ end
73
+
74
+ coradoc = track_time "Post-process Coradoc tree" do
75
+ Postprocessor.process(coradoc)
76
+ end
77
+
78
+ plugin_instances.each do |plugin|
79
+ if plugin.respond_to?(:postprocess_coradoc_tree)
80
+ plugin.coradoc_tree = coradoc
81
+ track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
82
+ plugin.postprocess_coradoc_tree
83
+ end
84
+ coradoc = plugin.coradoc_tree
85
+ end
86
+ end
87
+
88
+ coradoc
89
+ end
53
90
  end
54
- end
55
91
 
56
- def self.convert(input, options = {})
57
- result = Coradoc::Generator.gen_adoc(to_coradoc(input, options))
58
- Coradoc::ReverseAdoc.cleaner.tidy(result)
92
+ def self.convert(input, options = {})
93
+ ReverseAdoc.config.with(options) do
94
+ plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
95
+
96
+ options = options.merge(plugin_instances: plugin_instances)
97
+
98
+ coradoc = to_coradoc(input, options)
99
+
100
+ if coradoc.is_a?(Hash)
101
+ coradoc.to_h do |file, tree|
102
+ track_time "Converting file #{file || 'main'}" do
103
+ [file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
104
+ end
105
+ end
106
+ else
107
+ convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
108
+ end
109
+ end
110
+ end
111
+
112
+ def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
113
+ result = track_time "Converting Coradoc tree into Asciidoc" do
114
+ Coradoc::Generator.gen_adoc(coradoc)
115
+ end
116
+ result = track_time "Cleaning up the result" do
117
+ ReverseAdoc.cleaner.tidy(result)
118
+ end
119
+ plugin_instances.each do |plugin|
120
+ if plugin.respond_to?(:postprocess_asciidoc_string)
121
+ plugin.asciidoc_string = result
122
+ track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
123
+ plugin.postprocess_asciidoc_string
124
+ end
125
+ result = plugin.asciidoc_string
126
+ end
127
+ end
128
+ result
129
+ end
130
+
131
+ @track_time_indentation = 0
132
+ def self.track_time(task)
133
+ if ReverseAdoc.config.track_time
134
+ warn " " * @track_time_indentation +
135
+ "* #{task} is starting..."
136
+ @track_time_indentation += 1
137
+ t0 = Time.now
138
+ ret = yield
139
+ time_elapsed = Time.now - t0
140
+ @track_time_indentation -= 1
141
+ warn " " * @track_time_indentation +
142
+ "* #{task} took #{time_elapsed.round(3)} seconds"
143
+ ret
144
+ else
145
+ yield
146
+ end
147
+ end
59
148
  end
60
149
  end
61
150
  end
@@ -0,0 +1,131 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ #### Plugin system general
4
+
5
+ # Allow building plugins with a shorthand syntax:
6
+ # plugin = Coradoc::ReverseAdoc::Plugin.new do
7
+ # def name = "Test"
8
+ # end
9
+
10
+ def self.new(&block)
11
+ if self == Plugin
12
+ Class.new(Plugin, &block)
13
+ else
14
+ super
15
+ end
16
+ end
17
+
18
+ def initialize
19
+ @html_tree_hooks_pre = {}
20
+ @html_tree_hooks_post = {}
21
+ end
22
+
23
+ # define name to name a Plugin
24
+ def name
25
+ self.class.name
26
+ end
27
+
28
+ #### HTML Tree functionalities
29
+
30
+ attr_accessor :html_tree
31
+
32
+ def html_tree_change_tag_name_by_css(css, new_name)
33
+ html_tree.css(css).each do |e|
34
+ e.name = new_name
35
+ end
36
+ end
37
+
38
+ def html_tree_change_properties_by_css(css, properties)
39
+ html_tree.css(css).each do |e|
40
+ properties.each do |k,v|
41
+ e[k.to_s] = v
42
+ end
43
+ end
44
+ end
45
+
46
+ def html_tree_remove_by_css(css)
47
+ html_tree.css(css).each(&:remove)
48
+ end
49
+
50
+ def html_tree_replace_with_children_by_css(css)
51
+ html_tree.css(css).each do |e|
52
+ e.replace(e.children)
53
+ end
54
+ end
55
+
56
+ def html_tree_process_to_coradoc(tree, state={})
57
+ Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
58
+ end
59
+
60
+ def html_tree_process_to_adoc(tree, state={})
61
+ Coradoc::ReverseAdoc::Converters.process(tree, state)
62
+ end
63
+
64
+ def html_tree_preview
65
+ Tempfile.open(%w"coradoc .html") do |i|
66
+ i << html_tree.to_html
67
+ system "chromium-browser", "--no-sandbox", i.path
68
+ end
69
+ end
70
+
71
+ # define preprocess_html_tree to process HTML trees
72
+
73
+ # Creates a hook to be called instead of converting an element
74
+ # to a Coradoc node.
75
+ #
76
+ # proc |html_node, state|
77
+ # coradoc_node
78
+ # end
79
+ def html_tree_add_hook_pre(element, &block)
80
+ @html_tree_hooks_pre[element] = block
81
+ end
82
+
83
+ def html_tree_add_hook_pre_by_css(css, &block)
84
+ html_tree.css(css).each do |e|
85
+ html_tree_add_hook_pre(e, &block)
86
+ end
87
+ end
88
+
89
+ # Creates a hook to be called after converting an element
90
+ # to a Coradoc node.
91
+ #
92
+ # proc |html_node, coradoc_node, state|
93
+ # coradoc_node
94
+ # end
95
+ def html_tree_add_hook_post(element, &block)
96
+ @html_tree_hooks_post[element] = block
97
+ end
98
+
99
+ def html_tree_add_hook_post_by_css(css, &block)
100
+ html_tree.css(css).each do |e|
101
+ html_tree_add_hook_post(e, &block)
102
+ end
103
+ end
104
+
105
+ def html_tree_run_hooks(node, state, &_block)
106
+ hook_pre = @html_tree_hooks_pre[node]
107
+ hook_post = @html_tree_hooks_post[node]
108
+
109
+ coradoc = hook_pre.(node, state) if hook_pre
110
+ coradoc ||= yield node, state
111
+
112
+ if hook_post
113
+ coradoc = hook_post.(node, coradoc, state)
114
+ end
115
+
116
+ coradoc
117
+ end
118
+
119
+ #### Coradoc tree functionalities
120
+
121
+ attr_accessor :coradoc_tree
122
+
123
+ # define postprocess_coradoc_tree to change coradoc tree
124
+
125
+ #### AsciiDoc string functionalities
126
+
127
+ attr_accessor :asciidoc_string
128
+
129
+ # define postprocess_asciidoc_string to change the coradoc string
130
+ end
131
+ end
@@ -0,0 +1,174 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ # This plugin enhances documents from the PLATEAU project
4
+ # to extract more data.
5
+ #
6
+ # Usage:
7
+ # reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
8
+ # --external-images -u raise --output _out/index.adoc index.html
9
+ class Plateau < Plugin
10
+ def name
11
+ "PLATEAU"
12
+ end
13
+
14
+ def preprocess_html_tree
15
+ # Let's simplify the tree by removing what's extraneous
16
+ # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
17
+ # html_tree_replace_with_children_by_css("div.container_box")
18
+ # html_tree_replace_with_children_by_css("div.col.col-12")
19
+ # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
20
+ # html_tree_replace_with_children_by_css("div.row")
21
+ #
22
+ # We can remove that, but it messes up the images and paragraphs.
23
+
24
+ # Remove side menu, so we can generate TOC ourselves
25
+ html_tree_remove_by_css(".sideMenu")
26
+
27
+ # Correct non-semantic classes into semantic HTML tags
28
+ html_tree_change_tag_name_by_css(".titledata", "h1")
29
+ html_tree_change_tag_name_by_css(".subtitledata", "h2")
30
+ html_tree_change_tag_name_by_css(".pitemdata", "h3")
31
+ html_tree_change_tag_name_by_css(".sitemdata", "h4")
32
+ html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
33
+
34
+ # Remove some CSS ids that are not important to us
35
+ html_tree_change_properties_by_css("#__nuxt", id: nil)
36
+ html_tree_change_properties_by_css("#__layout", id: nil)
37
+ html_tree_change_properties_by_css("#app", id: nil)
38
+
39
+ # Convert table/img caption to become a caption
40
+ html_tree.css(".imagedata").each do |e|
41
+ table = e.parent.next&.children&.first
42
+ if table&.name == "table"
43
+ e.name = "caption"
44
+ table.prepend_child(e)
45
+ next
46
+ end
47
+
48
+ img = e.parent.previous&.children&.first
49
+ if img&.name == "img" && img["src"]
50
+ title = e.text.strip
51
+ img["title"] = title
52
+ e.remove
53
+ next
54
+ end
55
+ end
56
+
57
+ # Add hooks for H1, H2, H3, H4
58
+ html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
59
+ html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
60
+
61
+ # Table cells aligned to center
62
+ html_tree_change_properties_by_css(".tableTopCenter", align: "center")
63
+
64
+ # Handle non-semantic lists and indentation
65
+ html_tree_add_hook_pre_by_css ".text2data" do |node,|
66
+ text = html_tree_process_to_adoc(node).strip
67
+ next "" if text.empty? || text == "\u3000"
68
+
69
+ if text.start_with?(/\d+\./)
70
+ text = text.sub(/\A\d+.\s*/, "")
71
+ ".. #{text}\n"
72
+ else
73
+ text = text.gsub(/^/, "** ")
74
+ "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
75
+ end
76
+ end
77
+
78
+ html_tree_add_hook_pre_by_css ".text3data" do |node,|
79
+ text = html_tree_process_to_adoc(node).strip
80
+ next "" if text.empty? || text == "\u3000"
81
+
82
+ text = text.strip.gsub(/^/, "*** ")
83
+ "\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
84
+ end
85
+
86
+ html_tree_add_hook_pre_by_css ".text4data" do |node,|
87
+ text = html_tree_process_to_adoc(node).strip
88
+ next "" if text.empty? || text == "\u3000"
89
+
90
+ text = text.strip.gsub(/^/, "**** ")
91
+ "\n\n//-PT4D\n#{text}\n//-ENDPT4D\n\n"
92
+ end
93
+
94
+ html_tree_add_hook_pre_by_css ".text2data_point ul" do |node,|
95
+ text = html_tree_process_to_adoc(node.children.first.children).strip
96
+
97
+ "** #{text}\n"
98
+ end
99
+
100
+ html_tree_add_hook_pre_by_css ".text3data_point ul" do |node,|
101
+ text = html_tree_process_to_adoc(node.children.first.children).strip
102
+
103
+ "*** #{text}\n"
104
+ end
105
+
106
+ # html_tree_preview
107
+ end
108
+
109
+ def handle_headers(node, coradoc, state)
110
+ if coradoc.id.start_with?("toc0_")
111
+ content = coradoc.content.map(&:content).join
112
+ # Special content
113
+ case content.strip
114
+ when "はじめに" # Introduction
115
+ coradoc.style = "abstract" # The older version document has ".preface"
116
+ when "改定の概要" # Revision overview
117
+ coradoc.style = "abstract" # The older version document has ".preface"
118
+ when "参考文献" # Bibliography
119
+ coradoc.style = "bibliography"
120
+ when "改訂履歴" # Document history
121
+ coradoc.style = "appendix"
122
+ else
123
+ warn "Unknown section #{coradoc.content.map(&:content).join.inspect}"
124
+ end
125
+
126
+ # Ensure they are generated as level 1
127
+ coradoc.level_int = 1
128
+ end
129
+
130
+ # Remove numbers
131
+ coradoc.content.first.content.sub!(/\A[\d\s.]+/, "")
132
+
133
+ coradoc
134
+ end
135
+
136
+ def handle_headers_h4(node, coradoc, state)
137
+ case coradoc.content.first.content
138
+ when /\A\(\d+\)(.*)/
139
+ coradoc.level_int = 4
140
+ coradoc.content.first.content = $1.strip
141
+ coradoc
142
+ when /\A\d+\)(.*)/
143
+ coradoc.level_int = 5
144
+ coradoc.content.first.content = $1.strip
145
+ coradoc
146
+ else
147
+ ["// FIXME\n", coradoc]
148
+ end
149
+ end
150
+
151
+ def postprocess_asciidoc_string
152
+ str = self.asciidoc_string
153
+
154
+ ### Custom indentation handling
155
+ # If there's a step up, add [none]
156
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
157
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
158
+ str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
159
+ # Collapse blocks of text[2,3]data
160
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
161
+ # In the beginning, add [none]
162
+ str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
163
+ # If following with another list, ensure we readd styling
164
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
165
+ # Otherwise, clean up
166
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
167
+
168
+ self.asciidoc_string = str
169
+ end
170
+ end
171
+ end
172
+ end
173
+
174
+ Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau
@@ -0,0 +1,148 @@
1
+ module Coradoc::ReverseAdoc
2
+ # Postprocessor's aim is to convert a Coradoc tree from
3
+ # a mess that has been created from HTML into a tree that
4
+ # is compatible with what we would get out of Coradoc, if
5
+ # it parsed it directly.
6
+ class Postprocessor
7
+ def self.process(coradoc)
8
+ new(coradoc).process
9
+ end
10
+
11
+ def initialize(coradoc)
12
+ @tree = coradoc
13
+ end
14
+
15
+ # Collapse DIVs that only have a title, or nest another DIV.
16
+ def collapse_meaningless_sections
17
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
+ if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
19
+ children_classes = Array(elem.contents).map(&:class)
20
+ count = children_classes.length
21
+ safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
22
+
23
+ # Count > 0 because some documents use <div> as a <br>.
24
+ if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
+ next elem.contents
26
+ end
27
+ end
28
+ elem
29
+ end
30
+ end
31
+
32
+ # tree should now be more cleaned up, so we can progress with
33
+ # creating meaningful sections
34
+ def generate_meaningful_sections
35
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
36
+ # We are searching for an array, that has a title. This
37
+ # will be a candidate for our section array.
38
+ if dir == :post &&
39
+ elem.is_a?(Array) &&
40
+ !elem.grep(Coradoc::Element::Title).empty?
41
+
42
+ new_array = []
43
+ content_array = new_array
44
+ section_arrays_by_level = [new_array] * 8
45
+
46
+ # For each title element, we create a new section. Then we push
47
+ # all descendant sections into those sections. Otherwise, we push
48
+ # an element as content of current section.
49
+ elem.each do |e|
50
+ if e.is_a? Coradoc::Element::Title
51
+ title = e
52
+ content_array = []
53
+ section_array = []
54
+ level = title.level_int
55
+ section = Coradoc::Element::Section.new(
56
+ title, contents: content_array, sections: section_array
57
+ )
58
+ # Some documents may not be consistent and eg. follow H4 after
59
+ # H2. Let's ensure that proceeding sections will land in a
60
+ # correct place.
61
+ (8 - level).times do |j|
62
+ section_arrays_by_level[level + j] = section_array
63
+ end
64
+ section_arrays_by_level[level - 1] << section
65
+ else
66
+ content_array << e
67
+ end
68
+ end
69
+ next new_array
70
+ end
71
+ elem
72
+ end
73
+ end
74
+
75
+ def split_sections
76
+ max_level = Coradoc::ReverseAdoc.config.split_sections
77
+
78
+ return unless max_level
79
+
80
+ sections = {}
81
+ parent_sections = []
82
+ previous_sections = {}
83
+
84
+ determine_section_id = ->(elem) do
85
+ level = 0
86
+ section = elem
87
+ while section
88
+ level += 1 if elem.title.style == section.title.style
89
+ section = previous_sections[section]
90
+ end
91
+ level
92
+ end
93
+
94
+ determine_style = ->(elem) do
95
+ style = elem.title.style || "section"
96
+ style += "-"
97
+ style
98
+ end
99
+
100
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
101
+ title = elem.title if elem.is_a?(Coradoc::Element::Section)
102
+
103
+ if title && title.level_int <= max_level
104
+ if dir == :pre
105
+ # In the PRE pass, we build a tree of sections, so that
106
+ # we can compute numbers
107
+ previous_sections[elem] = parent_sections[title.level_int]
108
+ parent_sections[title.level_int] = elem
109
+ parent_sections[(title.level_int+1)..nil] = nil
110
+
111
+ elem
112
+ else
113
+ # In the POST pass, we replace the sections with their
114
+ # include tag.
115
+ section_file = "sections/"
116
+ section_file += parent_sections[1..title.level_int].map do |parent|
117
+ style = determine_style.(parent)
118
+ "%s%02d" % [style, determine_section_id.(parent)]
119
+ end.join("/")
120
+ section_file += ".adoc"
121
+
122
+ sections[section_file] = elem
123
+ up = "../" * (title.level_int - 1)
124
+ "\ninclude::#{up}#{section_file}[]\n"
125
+ end
126
+ else
127
+ elem
128
+ end
129
+ end
130
+
131
+ sections[nil] = @tree
132
+ @tree = sections
133
+ end
134
+
135
+ def process
136
+ collapse_meaningless_sections
137
+ generate_meaningful_sections
138
+ # Do it again to simplify the document further.
139
+ # Since the structure is changed, we may have new meaningful
140
+ # sections as only children of some meaningless sections.
141
+ collapse_meaningless_sections
142
+
143
+ split_sections
144
+
145
+ @tree
146
+ end
147
+ end
148
+ end
@@ -9,6 +9,9 @@ require_relative "reverse_adoc/config"
9
9
  require_relative "reverse_adoc/converters"
10
10
  require_relative "reverse_adoc/converters/base"
11
11
  require_relative "reverse_adoc/html_converter"
12
+ require_relative "reverse_adoc/plugin"
13
+ require_relative "reverse_adoc/postprocessor"
14
+
12
15
 
13
16
  module Coradoc::ReverseAdoc
14
17
  def self.convert(input, options = {})
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = "0.3.0"
4
+ VERSION = "1.0.0"
5
5
  end
data/lib/reverse_adoc.rb CHANGED
@@ -8,7 +8,7 @@ warn <<~END
8
8
  | You are referencing an old require here:
9
9
  | #{caller.join("\n| ")}
10
10
  |
11
- | You should also replace 'coradoc' with 'reverse_adoc' in your gem dependencies.
11
+ | You should also replace 'reverse_adoc' with 'coradoc' in your gem dependencies.
12
12
  | reverse_adoc 2.0.0 will be kept with 'coradoc' as the only dependency.
13
13
  |
14
14
  | Please also ensure that you replace all references to ReverseAdoc in your code
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-05-21 00:00:00.000000000 Z
12
+ date: 2024-06-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: marcel
@@ -278,7 +278,6 @@ files:
278
278
  - lib/coradoc/element/inline/cross_reference.rb
279
279
  - lib/coradoc/element/inline/hard_line_break.rb
280
280
  - lib/coradoc/element/inline/highlight.rb
281
- - lib/coradoc/element/inline/image.rb
282
281
  - lib/coradoc/element/inline/italic.rb
283
282
  - lib/coradoc/element/inline/link.rb
284
283
  - lib/coradoc/element/inline/monospace.rb
@@ -291,6 +290,7 @@ files:
291
290
  - lib/coradoc/element/list/ordered.rb
292
291
  - lib/coradoc/element/list/unordered.rb
293
292
  - lib/coradoc/element/list_item.rb
293
+ - lib/coradoc/element/list_item_definition.rb
294
294
  - lib/coradoc/element/paragraph.rb
295
295
  - lib/coradoc/element/revision.rb
296
296
  - lib/coradoc/element/section.rb
@@ -323,6 +323,7 @@ files:
323
323
  - lib/coradoc/reverse_adoc/converters/bypass.rb
324
324
  - lib/coradoc/reverse_adoc/converters/code.rb
325
325
  - lib/coradoc/reverse_adoc/converters/div.rb
326
+ - lib/coradoc/reverse_adoc/converters/dl.rb
326
327
  - lib/coradoc/reverse_adoc/converters/drop.rb
327
328
  - lib/coradoc/reverse_adoc/converters/em.rb
328
329
  - lib/coradoc/reverse_adoc/converters/figure.rb
@@ -333,6 +334,7 @@ files:
333
334
  - lib/coradoc/reverse_adoc/converters/img.rb
334
335
  - lib/coradoc/reverse_adoc/converters/li.rb
335
336
  - lib/coradoc/reverse_adoc/converters/mark.rb
337
+ - lib/coradoc/reverse_adoc/converters/markup.rb
336
338
  - lib/coradoc/reverse_adoc/converters/math.rb
337
339
  - lib/coradoc/reverse_adoc/converters/ol.rb
338
340
  - lib/coradoc/reverse_adoc/converters/p.rb
@@ -350,6 +352,9 @@ files:
350
352
  - lib/coradoc/reverse_adoc/converters/video.rb
351
353
  - lib/coradoc/reverse_adoc/errors.rb
352
354
  - lib/coradoc/reverse_adoc/html_converter.rb
355
+ - lib/coradoc/reverse_adoc/plugin.rb
356
+ - lib/coradoc/reverse_adoc/plugins/plateau.rb
357
+ - lib/coradoc/reverse_adoc/postprocessor.rb
353
358
  - lib/coradoc/transformer.rb
354
359
  - lib/coradoc/version.rb
355
360
  - lib/reverse_adoc.rb