coradoc 0.2.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/.docker/Dockerfile +1 -1
  3. data/.docker/docker-compose.yml +2 -2
  4. data/.editorconfig +15 -0
  5. data/CHANGELOG.md +4 -0
  6. data/README.md +4 -0
  7. data/Rakefile +10 -0
  8. data/coradoc.gemspec +11 -2
  9. data/exe/reverse_adoc +91 -0
  10. data/exe/w2a +72 -0
  11. data/lib/coradoc/document.rb +6 -6
  12. data/lib/coradoc/element/admonition.rb +8 -6
  13. data/lib/coradoc/element/attribute.rb +2 -2
  14. data/lib/coradoc/element/attribute_list.rb +94 -15
  15. data/lib/coradoc/element/audio.rb +14 -3
  16. data/lib/coradoc/element/author.rb +18 -14
  17. data/lib/coradoc/element/base.rb +69 -8
  18. data/lib/coradoc/element/block/core.rb +10 -6
  19. data/lib/coradoc/element/block/literal.rb +1 -1
  20. data/lib/coradoc/element/block/quote.rb +1 -1
  21. data/lib/coradoc/element/block/sourcecode.rb +2 -2
  22. data/lib/coradoc/element/break.rb +1 -1
  23. data/lib/coradoc/element/document_attributes.rb +6 -6
  24. data/lib/coradoc/element/header.rb +4 -2
  25. data/lib/coradoc/element/image/block_image.rb +13 -2
  26. data/lib/coradoc/element/image/core.rb +35 -5
  27. data/lib/coradoc/element/image/inline_image.rb +2 -2
  28. data/lib/coradoc/element/image.rb +0 -1
  29. data/lib/coradoc/element/inline/anchor.rb +4 -2
  30. data/lib/coradoc/element/inline/bold.rb +10 -4
  31. data/lib/coradoc/element/inline/cross_reference.rb +4 -2
  32. data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
  33. data/lib/coradoc/element/inline/highlight.rb +12 -6
  34. data/lib/coradoc/element/inline/italic.rb +10 -4
  35. data/lib/coradoc/element/inline/link.rb +26 -10
  36. data/lib/coradoc/element/inline/monospace.rb +10 -4
  37. data/lib/coradoc/element/inline/quotation.rb +4 -1
  38. data/lib/coradoc/element/inline/subscript.rb +5 -2
  39. data/lib/coradoc/element/inline/superscript.rb +5 -2
  40. data/lib/coradoc/element/inline.rb +0 -1
  41. data/lib/coradoc/element/list/core.rb +10 -8
  42. data/lib/coradoc/element/list/definition.rb +19 -0
  43. data/lib/coradoc/element/list/ordered.rb +1 -1
  44. data/lib/coradoc/element/list/unordered.rb +1 -1
  45. data/lib/coradoc/element/list.rb +1 -1
  46. data/lib/coradoc/element/list_item.rb +9 -4
  47. data/lib/coradoc/element/list_item_definition.rb +32 -0
  48. data/lib/coradoc/element/paragraph.rb +5 -3
  49. data/lib/coradoc/element/revision.rb +20 -16
  50. data/lib/coradoc/element/section.rb +21 -4
  51. data/lib/coradoc/element/table.rb +36 -19
  52. data/lib/coradoc/element/text_element.rb +63 -17
  53. data/lib/coradoc/element/title.rb +27 -7
  54. data/lib/coradoc/element/video.rb +33 -6
  55. data/lib/coradoc/generator.rb +2 -2
  56. data/lib/coradoc/legacy_parser.rb +41 -41
  57. data/lib/coradoc/oscal.rb +2 -4
  58. data/lib/coradoc/parser/asciidoc/content.rb +15 -15
  59. data/lib/coradoc/parser/asciidoc/document_attributes.rb +1 -1
  60. data/lib/coradoc/parser/asciidoc/header.rb +6 -6
  61. data/lib/coradoc/parser/asciidoc/section.rb +1 -1
  62. data/lib/coradoc/reverse_adoc/LICENSE.txt +25 -0
  63. data/lib/coradoc/reverse_adoc/README.adoc +308 -0
  64. data/lib/coradoc/reverse_adoc/cleaner.rb +125 -0
  65. data/lib/coradoc/reverse_adoc/config.rb +73 -0
  66. data/lib/coradoc/reverse_adoc/converters/a.rb +47 -0
  67. data/lib/coradoc/reverse_adoc/converters/aside.rb +12 -0
  68. data/lib/coradoc/reverse_adoc/converters/audio.rb +25 -0
  69. data/lib/coradoc/reverse_adoc/converters/base.rb +104 -0
  70. data/lib/coradoc/reverse_adoc/converters/blockquote.rb +18 -0
  71. data/lib/coradoc/reverse_adoc/converters/br.rb +11 -0
  72. data/lib/coradoc/reverse_adoc/converters/bypass.rb +77 -0
  73. data/lib/coradoc/reverse_adoc/converters/code.rb +19 -0
  74. data/lib/coradoc/reverse_adoc/converters/div.rb +14 -0
  75. data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
  76. data/lib/coradoc/reverse_adoc/converters/drop.rb +22 -0
  77. data/lib/coradoc/reverse_adoc/converters/em.rb +17 -0
  78. data/lib/coradoc/reverse_adoc/converters/figure.rb +21 -0
  79. data/lib/coradoc/reverse_adoc/converters/h.rb +38 -0
  80. data/lib/coradoc/reverse_adoc/converters/head.rb +19 -0
  81. data/lib/coradoc/reverse_adoc/converters/hr.rb +11 -0
  82. data/lib/coradoc/reverse_adoc/converters/ignore.rb +16 -0
  83. data/lib/coradoc/reverse_adoc/converters/img.rb +98 -0
  84. data/lib/coradoc/reverse_adoc/converters/li.rb +13 -0
  85. data/lib/coradoc/reverse_adoc/converters/mark.rb +15 -0
  86. data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
  87. data/lib/coradoc/reverse_adoc/converters/math.rb +31 -0
  88. data/lib/coradoc/reverse_adoc/converters/ol.rb +60 -0
  89. data/lib/coradoc/reverse_adoc/converters/p.rb +19 -0
  90. data/lib/coradoc/reverse_adoc/converters/pass_through.rb +13 -0
  91. data/lib/coradoc/reverse_adoc/converters/pre.rb +51 -0
  92. data/lib/coradoc/reverse_adoc/converters/q.rb +12 -0
  93. data/lib/coradoc/reverse_adoc/converters/strong.rb +16 -0
  94. data/lib/coradoc/reverse_adoc/converters/sub.rb +18 -0
  95. data/lib/coradoc/reverse_adoc/converters/sup.rb +18 -0
  96. data/lib/coradoc/reverse_adoc/converters/table.rb +280 -0
  97. data/lib/coradoc/reverse_adoc/converters/td.rb +77 -0
  98. data/lib/coradoc/reverse_adoc/converters/text.rb +28 -0
  99. data/lib/coradoc/reverse_adoc/converters/th.rb +14 -0
  100. data/lib/coradoc/reverse_adoc/converters/tr.rb +18 -0
  101. data/lib/coradoc/reverse_adoc/converters/video.rb +25 -0
  102. data/lib/coradoc/reverse_adoc/converters.rb +53 -0
  103. data/lib/coradoc/reverse_adoc/errors.rb +10 -0
  104. data/lib/coradoc/reverse_adoc/html_converter.rb +150 -0
  105. data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
  106. data/lib/coradoc/reverse_adoc/plugins/plateau.rb +174 -0
  107. data/lib/coradoc/reverse_adoc/postprocessor.rb +148 -0
  108. data/lib/coradoc/reverse_adoc.rb +30 -0
  109. data/lib/coradoc/transformer.rb +24 -14
  110. data/lib/coradoc/version.rb +1 -1
  111. data/lib/reverse_adoc.rb +20 -0
  112. metadata +184 -5
  113. data/lib/coradoc/element/inline/image.rb +0 -25
@@ -0,0 +1,25 @@
1
+ module Coradoc::ReverseAdoc
2
+ module Converters
3
+ class Video < Base
4
+ def to_coradoc(node, _state = {})
5
+ src = node["src"]
6
+ id = node["id"]
7
+ title = extract_title(node)
8
+ attributes = Coradoc::Element::AttributeList.new
9
+ options = options(node)
10
+ attributes.add_named("options", options) if options.any?
11
+ Coradoc::Element::Video.new(title, id: id, src: src,
12
+ attributes: attributes)
13
+ end
14
+
15
+ def options(node)
16
+ autoplay = node["autoplay"]
17
+ loop_attr = node["loop"]
18
+ controls = node["controls"]
19
+ [autoplay, loop_attr, controls].compact
20
+ end
21
+ end
22
+
23
+ register :video, Video.new
24
+ end
25
+ end
@@ -0,0 +1,53 @@
1
+ module Coradoc::ReverseAdoc
2
+ module Converters
3
+ def self.register(tag_name, converter)
4
+ @@converters ||= {}
5
+ @@converters[tag_name.to_sym] = converter
6
+ end
7
+
8
+ def self.unregister(tag_name)
9
+ @@converters.delete(tag_name.to_sym)
10
+ end
11
+
12
+ def self.lookup(tag_name)
13
+ @@converters[tag_name.to_sym] or default_converter(tag_name)
14
+ end
15
+
16
+ # Note: process won't run plugin hooks
17
+ def self.process(node, state)
18
+ node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
19
+ return node.map { |i| process(i, state) }.join if node.is_a? Array
20
+
21
+ lookup(node.name).convert(node, state)
22
+ end
23
+
24
+ def self.process_coradoc(node, state)
25
+ node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
26
+ return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
27
+
28
+ plugins = state[:plugin_instances] || {}
29
+ process = proc { lookup(node.name).to_coradoc(node, state) }
30
+ plugins.each do |i|
31
+ prev_process = process
32
+ process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
33
+ end
34
+ process.(node, state)
35
+ end
36
+
37
+ def self.default_converter(tag_name)
38
+ case Coradoc::ReverseAdoc.config.unknown_tags.to_sym
39
+ when :pass_through
40
+ Coradoc::ReverseAdoc::Converters::PassThrough.new
41
+ when :drop
42
+ Coradoc::ReverseAdoc::Converters::Drop.new
43
+ when :bypass
44
+ Coradoc::ReverseAdoc::Converters::Bypass.new
45
+ when :raise
46
+ raise UnknownTagError, "unknown tag: #{tag_name}"
47
+ else
48
+ raise InvalidConfigurationError,
49
+ "unknown value #{Coradoc::ReverseAdoc.config.unknown_tags.inspect} for Coradoc::ReverseAdoc.config.unknown_tags"
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,10 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Error < StandardError
3
+ end
4
+
5
+ class UnknownTagError < Error
6
+ end
7
+
8
+ class InvalidConfigurationError < Error
9
+ end
10
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "converters/markup"
4
+ require_relative "converters/a"
5
+ require_relative "converters/aside"
6
+ require_relative "converters/audio"
7
+ require_relative "converters/blockquote"
8
+ require_relative "converters/br"
9
+ require_relative "converters/bypass"
10
+ require_relative "converters/code"
11
+ require_relative "converters/div"
12
+ require_relative "converters/dl"
13
+ require_relative "converters/drop"
14
+ require_relative "converters/em"
15
+ require_relative "converters/figure"
16
+ require_relative "converters/h"
17
+ require_relative "converters/head"
18
+ require_relative "converters/hr"
19
+ require_relative "converters/ignore"
20
+ require_relative "converters/img"
21
+ require_relative "converters/mark"
22
+ require_relative "converters/li"
23
+ require_relative "converters/ol"
24
+ require_relative "converters/p"
25
+ require_relative "converters/pass_through"
26
+ require_relative "converters/pre"
27
+ require_relative "converters/q"
28
+ require_relative "converters/strong"
29
+ require_relative "converters/sup"
30
+ require_relative "converters/sub"
31
+ require_relative "converters/table"
32
+ require_relative "converters/td"
33
+ require_relative "converters/th"
34
+ require_relative "converters/text"
35
+ require_relative "converters/tr"
36
+ require_relative "converters/video"
37
+ require_relative "converters/math"
38
+
39
+ module Coradoc
40
+ module ReverseAdoc
41
+ class HtmlConverter
42
+ def self.to_coradoc(input, options = {})
43
+ plugin_instances = options.delete(:plugin_instances)
44
+ ReverseAdoc.config.with(options) do
45
+ plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
46
+
47
+ root = track_time "Loading input HTML document" do
48
+ case input
49
+ when String
50
+ Nokogiri::HTML(input).root
51
+ when Nokogiri::XML::Document
52
+ input.root
53
+ when Nokogiri::XML::Node
54
+ input
55
+ end
56
+ end
57
+
58
+ return "" unless root
59
+
60
+ plugin_instances.each do |plugin|
61
+ plugin.html_tree = root
62
+ if plugin.respond_to?(:preprocess_html_tree)
63
+ track_time "Preprocessing document with #{plugin.name} plugin" do
64
+ plugin.preprocess_html_tree
65
+ end
66
+ end
67
+ root = plugin.html_tree
68
+ end
69
+
70
+ coradoc = track_time "Converting input document tree to Coradoc tree" do
71
+ Converters.process_coradoc(root, plugin_instances: plugin_instances)
72
+ end
73
+
74
+ coradoc = track_time "Post-process Coradoc tree" do
75
+ Postprocessor.process(coradoc)
76
+ end
77
+
78
+ plugin_instances.each do |plugin|
79
+ if plugin.respond_to?(:postprocess_coradoc_tree)
80
+ plugin.coradoc_tree = coradoc
81
+ track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
82
+ plugin.postprocess_coradoc_tree
83
+ end
84
+ coradoc = plugin.coradoc_tree
85
+ end
86
+ end
87
+
88
+ coradoc
89
+ end
90
+ end
91
+
92
+ def self.convert(input, options = {})
93
+ ReverseAdoc.config.with(options) do
94
+ plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
95
+
96
+ options = options.merge(plugin_instances: plugin_instances)
97
+
98
+ coradoc = to_coradoc(input, options)
99
+
100
+ if coradoc.is_a?(Hash)
101
+ coradoc.to_h do |file, tree|
102
+ track_time "Converting file #{file || 'main'}" do
103
+ [file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
104
+ end
105
+ end
106
+ else
107
+ convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
108
+ end
109
+ end
110
+ end
111
+
112
+ def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
113
+ result = track_time "Converting Coradoc tree into Asciidoc" do
114
+ Coradoc::Generator.gen_adoc(coradoc)
115
+ end
116
+ result = track_time "Cleaning up the result" do
117
+ ReverseAdoc.cleaner.tidy(result)
118
+ end
119
+ plugin_instances.each do |plugin|
120
+ if plugin.respond_to?(:postprocess_asciidoc_string)
121
+ plugin.asciidoc_string = result
122
+ track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
123
+ plugin.postprocess_asciidoc_string
124
+ end
125
+ result = plugin.asciidoc_string
126
+ end
127
+ end
128
+ result
129
+ end
130
+
131
+ @track_time_indentation = 0
132
+ def self.track_time(task)
133
+ if ReverseAdoc.config.track_time
134
+ warn " " * @track_time_indentation +
135
+ "* #{task} is starting..."
136
+ @track_time_indentation += 1
137
+ t0 = Time.now
138
+ ret = yield
139
+ time_elapsed = Time.now - t0
140
+ @track_time_indentation -= 1
141
+ warn " " * @track_time_indentation +
142
+ "* #{task} took #{time_elapsed.round(3)} seconds"
143
+ ret
144
+ else
145
+ yield
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,131 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ #### Plugin system general
4
+
5
+ # Allow building plugins with a shorthand syntax:
6
+ # plugin = Coradoc::ReverseAdoc::Plugin.new do
7
+ # def name = "Test"
8
+ # end
9
+
10
+ def self.new(&block)
11
+ if self == Plugin
12
+ Class.new(Plugin, &block)
13
+ else
14
+ super
15
+ end
16
+ end
17
+
18
+ def initialize
19
+ @html_tree_hooks_pre = {}
20
+ @html_tree_hooks_post = {}
21
+ end
22
+
23
+ # define name to name a Plugin
24
+ def name
25
+ self.class.name
26
+ end
27
+
28
+ #### HTML Tree functionalities
29
+
30
+ attr_accessor :html_tree
31
+
32
+ def html_tree_change_tag_name_by_css(css, new_name)
33
+ html_tree.css(css).each do |e|
34
+ e.name = new_name
35
+ end
36
+ end
37
+
38
+ def html_tree_change_properties_by_css(css, properties)
39
+ html_tree.css(css).each do |e|
40
+ properties.each do |k,v|
41
+ e[k.to_s] = v
42
+ end
43
+ end
44
+ end
45
+
46
+ def html_tree_remove_by_css(css)
47
+ html_tree.css(css).each(&:remove)
48
+ end
49
+
50
+ def html_tree_replace_with_children_by_css(css)
51
+ html_tree.css(css).each do |e|
52
+ e.replace(e.children)
53
+ end
54
+ end
55
+
56
+ def html_tree_process_to_coradoc(tree, state={})
57
+ Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
58
+ end
59
+
60
+ def html_tree_process_to_adoc(tree, state={})
61
+ Coradoc::ReverseAdoc::Converters.process(tree, state)
62
+ end
63
+
64
+ def html_tree_preview
65
+ Tempfile.open(%w"coradoc .html") do |i|
66
+ i << html_tree.to_html
67
+ system "chromium-browser", "--no-sandbox", i.path
68
+ end
69
+ end
70
+
71
+ # define preprocess_html_tree to process HTML trees
72
+
73
+ # Creates a hook to be called instead of converting an element
74
+ # to a Coradoc node.
75
+ #
76
+ # proc |html_node, state|
77
+ # coradoc_node
78
+ # end
79
+ def html_tree_add_hook_pre(element, &block)
80
+ @html_tree_hooks_pre[element] = block
81
+ end
82
+
83
+ def html_tree_add_hook_pre_by_css(css, &block)
84
+ html_tree.css(css).each do |e|
85
+ html_tree_add_hook_pre(e, &block)
86
+ end
87
+ end
88
+
89
+ # Creates a hook to be called after converting an element
90
+ # to a Coradoc node.
91
+ #
92
+ # proc |html_node, coradoc_node, state|
93
+ # coradoc_node
94
+ # end
95
+ def html_tree_add_hook_post(element, &block)
96
+ @html_tree_hooks_post[element] = block
97
+ end
98
+
99
+ def html_tree_add_hook_post_by_css(css, &block)
100
+ html_tree.css(css).each do |e|
101
+ html_tree_add_hook_post(e, &block)
102
+ end
103
+ end
104
+
105
+ def html_tree_run_hooks(node, state, &_block)
106
+ hook_pre = @html_tree_hooks_pre[node]
107
+ hook_post = @html_tree_hooks_post[node]
108
+
109
+ coradoc = hook_pre.(node, state) if hook_pre
110
+ coradoc ||= yield node, state
111
+
112
+ if hook_post
113
+ coradoc = hook_post.(node, coradoc, state)
114
+ end
115
+
116
+ coradoc
117
+ end
118
+
119
+ #### Coradoc tree functionalities
120
+
121
+ attr_accessor :coradoc_tree
122
+
123
+ # define postprocess_coradoc_tree to change coradoc tree
124
+
125
+ #### AsciiDoc string functionalities
126
+
127
+ attr_accessor :asciidoc_string
128
+
129
+ # define postprocess_asciidoc_string to change the coradoc string
130
+ end
131
+ end
@@ -0,0 +1,174 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ # This plugin enhances documents from the PLATEAU project
4
+ # to extract more data.
5
+ #
6
+ # Usage:
7
+ # reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
8
+ # --external-images -u raise --output _out/index.adoc index.html
9
+ class Plateau < Plugin
10
+ def name
11
+ "PLATEAU"
12
+ end
13
+
14
+ def preprocess_html_tree
15
+ # Let's simplify the tree by removing what's extraneous
16
+ # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
17
+ # html_tree_replace_with_children_by_css("div.container_box")
18
+ # html_tree_replace_with_children_by_css("div.col.col-12")
19
+ # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
20
+ # html_tree_replace_with_children_by_css("div.row")
21
+ #
22
+ # We can remove that, but it messes up the images and paragraphs.
23
+
24
+ # Remove side menu, so we can generate TOC ourselves
25
+ html_tree_remove_by_css(".sideMenu")
26
+
27
+ # Correct non-semantic classes into semantic HTML tags
28
+ html_tree_change_tag_name_by_css(".titledata", "h1")
29
+ html_tree_change_tag_name_by_css(".subtitledata", "h2")
30
+ html_tree_change_tag_name_by_css(".pitemdata", "h3")
31
+ html_tree_change_tag_name_by_css(".sitemdata", "h4")
32
+ html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
33
+
34
+ # Remove some CSS ids that are not important to us
35
+ html_tree_change_properties_by_css("#__nuxt", id: nil)
36
+ html_tree_change_properties_by_css("#__layout", id: nil)
37
+ html_tree_change_properties_by_css("#app", id: nil)
38
+
39
+ # Convert table/img caption to become a caption
40
+ html_tree.css(".imagedata").each do |e|
41
+ table = e.parent.next&.children&.first
42
+ if table&.name == "table"
43
+ e.name = "caption"
44
+ table.prepend_child(e)
45
+ next
46
+ end
47
+
48
+ img = e.parent.previous&.children&.first
49
+ if img&.name == "img" && img["src"]
50
+ title = e.text.strip
51
+ img["title"] = title
52
+ e.remove
53
+ next
54
+ end
55
+ end
56
+
57
+ # Add hooks for H1, H2, H3, H4
58
+ html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
59
+ html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
60
+
61
+ # Table cells aligned to center
62
+ html_tree_change_properties_by_css(".tableTopCenter", align: "center")
63
+
64
+ # Handle non-semantic lists and indentation
65
+ html_tree_add_hook_pre_by_css ".text2data" do |node,|
66
+ text = html_tree_process_to_adoc(node).strip
67
+ next "" if text.empty? || text == "\u3000"
68
+
69
+ if text.start_with?(/\d+\./)
70
+ text = text.sub(/\A\d+.\s*/, "")
71
+ ".. #{text}\n"
72
+ else
73
+ text = text.gsub(/^/, "** ")
74
+ "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
75
+ end
76
+ end
77
+
78
+ html_tree_add_hook_pre_by_css ".text3data" do |node,|
79
+ text = html_tree_process_to_adoc(node).strip
80
+ next "" if text.empty? || text == "\u3000"
81
+
82
+ text = text.strip.gsub(/^/, "*** ")
83
+ "\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
84
+ end
85
+
86
+ html_tree_add_hook_pre_by_css ".text4data" do |node,|
87
+ text = html_tree_process_to_adoc(node).strip
88
+ next "" if text.empty? || text == "\u3000"
89
+
90
+ text = text.strip.gsub(/^/, "**** ")
91
+ "\n\n//-PT4D\n#{text}\n//-ENDPT4D\n\n"
92
+ end
93
+
94
+ html_tree_add_hook_pre_by_css ".text2data_point ul" do |node,|
95
+ text = html_tree_process_to_adoc(node.children.first.children).strip
96
+
97
+ "** #{text}\n"
98
+ end
99
+
100
+ html_tree_add_hook_pre_by_css ".text3data_point ul" do |node,|
101
+ text = html_tree_process_to_adoc(node.children.first.children).strip
102
+
103
+ "*** #{text}\n"
104
+ end
105
+
106
+ # html_tree_preview
107
+ end
108
+
109
+ def handle_headers(node, coradoc, state)
110
+ if coradoc.id.start_with?("toc0_")
111
+ content = coradoc.content.map(&:content).join
112
+ # Special content
113
+ case content.strip
114
+ when "はじめに" # Introduction
115
+ coradoc.style = "abstract" # The older version document has ".preface"
116
+ when "改定の概要" # Revision overview
117
+ coradoc.style = "abstract" # The older version document has ".preface"
118
+ when "参考文献" # Bibliography
119
+ coradoc.style = "bibliography"
120
+ when "改訂履歴" # Document history
121
+ coradoc.style = "appendix"
122
+ else
123
+ warn "Unknown section #{coradoc.content.map(&:content).join.inspect}"
124
+ end
125
+
126
+ # Ensure they are generated as level 1
127
+ coradoc.level_int = 1
128
+ end
129
+
130
+ # Remove numbers
131
+ coradoc.content.first.content.sub!(/\A[\d\s.]+/, "")
132
+
133
+ coradoc
134
+ end
135
+
136
+ def handle_headers_h4(node, coradoc, state)
137
+ case coradoc.content.first.content
138
+ when /\A\(\d+\)(.*)/
139
+ coradoc.level_int = 4
140
+ coradoc.content.first.content = $1.strip
141
+ coradoc
142
+ when /\A\d+\)(.*)/
143
+ coradoc.level_int = 5
144
+ coradoc.content.first.content = $1.strip
145
+ coradoc
146
+ else
147
+ ["// FIXME\n", coradoc]
148
+ end
149
+ end
150
+
151
+ def postprocess_asciidoc_string
152
+ str = self.asciidoc_string
153
+
154
+ ### Custom indentation handling
155
+ # If there's a step up, add [none]
156
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
157
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
158
+ str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
159
+ # Collapse blocks of text[2,3]data
160
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
161
+ # In the beginning, add [none]
162
+ str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
163
+ # If following with another list, ensure we readd styling
164
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
165
+ # Otherwise, clean up
166
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
167
+
168
+ self.asciidoc_string = str
169
+ end
170
+ end
171
+ end
172
+ end
173
+
174
+ Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau
@@ -0,0 +1,148 @@
1
+ module Coradoc::ReverseAdoc
2
+ # Postprocessor's aim is to convert a Coradoc tree from
3
+ # a mess that has been created from HTML into a tree that
4
+ # is compatible with what we would get out of Coradoc, if
5
+ # it parsed it directly.
6
+ class Postprocessor
7
+ def self.process(coradoc)
8
+ new(coradoc).process
9
+ end
10
+
11
+ def initialize(coradoc)
12
+ @tree = coradoc
13
+ end
14
+
15
+ # Collapse DIVs that only have a title, or nest another DIV.
16
+ def collapse_meaningless_sections
17
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
+ if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
19
+ children_classes = Array(elem.contents).map(&:class)
20
+ count = children_classes.length
21
+ safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
22
+
23
+ # Count > 0 because some documents use <div> as a <br>.
24
+ if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
+ next elem.contents
26
+ end
27
+ end
28
+ elem
29
+ end
30
+ end
31
+
32
+ # tree should now be more cleaned up, so we can progress with
33
+ # creating meaningful sections
34
+ def generate_meaningful_sections
35
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
36
+ # We are searching for an array, that has a title. This
37
+ # will be a candidate for our section array.
38
+ if dir == :post &&
39
+ elem.is_a?(Array) &&
40
+ !elem.grep(Coradoc::Element::Title).empty?
41
+
42
+ new_array = []
43
+ content_array = new_array
44
+ section_arrays_by_level = [new_array] * 8
45
+
46
+ # For each title element, we create a new section. Then we push
47
+ # all descendant sections into those sections. Otherwise, we push
48
+ # an element as content of current section.
49
+ elem.each do |e|
50
+ if e.is_a? Coradoc::Element::Title
51
+ title = e
52
+ content_array = []
53
+ section_array = []
54
+ level = title.level_int
55
+ section = Coradoc::Element::Section.new(
56
+ title, contents: content_array, sections: section_array
57
+ )
58
+ # Some documents may not be consistent and eg. follow H4 after
59
+ # H2. Let's ensure that proceeding sections will land in a
60
+ # correct place.
61
+ (8 - level).times do |j|
62
+ section_arrays_by_level[level + j] = section_array
63
+ end
64
+ section_arrays_by_level[level - 1] << section
65
+ else
66
+ content_array << e
67
+ end
68
+ end
69
+ next new_array
70
+ end
71
+ elem
72
+ end
73
+ end
74
+
75
+ def split_sections
76
+ max_level = Coradoc::ReverseAdoc.config.split_sections
77
+
78
+ return unless max_level
79
+
80
+ sections = {}
81
+ parent_sections = []
82
+ previous_sections = {}
83
+
84
+ determine_section_id = ->(elem) do
85
+ level = 0
86
+ section = elem
87
+ while section
88
+ level += 1 if elem.title.style == section.title.style
89
+ section = previous_sections[section]
90
+ end
91
+ level
92
+ end
93
+
94
+ determine_style = ->(elem) do
95
+ style = elem.title.style || "section"
96
+ style += "-"
97
+ style
98
+ end
99
+
100
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
101
+ title = elem.title if elem.is_a?(Coradoc::Element::Section)
102
+
103
+ if title && title.level_int <= max_level
104
+ if dir == :pre
105
+ # In the PRE pass, we build a tree of sections, so that
106
+ # we can compute numbers
107
+ previous_sections[elem] = parent_sections[title.level_int]
108
+ parent_sections[title.level_int] = elem
109
+ parent_sections[(title.level_int+1)..nil] = nil
110
+
111
+ elem
112
+ else
113
+ # In the POST pass, we replace the sections with their
114
+ # include tag.
115
+ section_file = "sections/"
116
+ section_file += parent_sections[1..title.level_int].map do |parent|
117
+ style = determine_style.(parent)
118
+ "%s%02d" % [style, determine_section_id.(parent)]
119
+ end.join("/")
120
+ section_file += ".adoc"
121
+
122
+ sections[section_file] = elem
123
+ up = "../" * (title.level_int - 1)
124
+ "\ninclude::#{up}#{section_file}[]\n"
125
+ end
126
+ else
127
+ elem
128
+ end
129
+ end
130
+
131
+ sections[nil] = @tree
132
+ @tree = sections
133
+ end
134
+
135
+ def process
136
+ collapse_meaningless_sections
137
+ generate_meaningful_sections
138
+ # Do it again to simplify the document further.
139
+ # Since the structure is changed, we may have new meaningful
140
+ # sections as only children of some meaningless sections.
141
+ collapse_meaningless_sections
142
+
143
+ split_sections
144
+
145
+ @tree
146
+ end
147
+ end
148
+ end