coradoc 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/.docker/Dockerfile +1 -1
  3. data/.docker/docker-compose.yml +2 -2
  4. data/.editorconfig +15 -0
  5. data/CHANGELOG.md +4 -0
  6. data/README.md +4 -0
  7. data/Rakefile +10 -0
  8. data/coradoc.gemspec +11 -2
  9. data/exe/reverse_adoc +91 -0
  10. data/exe/w2a +72 -0
  11. data/lib/coradoc/document.rb +6 -6
  12. data/lib/coradoc/element/admonition.rb +8 -6
  13. data/lib/coradoc/element/attribute.rb +2 -2
  14. data/lib/coradoc/element/attribute_list.rb +94 -15
  15. data/lib/coradoc/element/audio.rb +14 -3
  16. data/lib/coradoc/element/author.rb +18 -14
  17. data/lib/coradoc/element/base.rb +69 -8
  18. data/lib/coradoc/element/block/core.rb +10 -6
  19. data/lib/coradoc/element/block/literal.rb +1 -1
  20. data/lib/coradoc/element/block/quote.rb +1 -1
  21. data/lib/coradoc/element/block/sourcecode.rb +2 -2
  22. data/lib/coradoc/element/break.rb +1 -1
  23. data/lib/coradoc/element/document_attributes.rb +6 -6
  24. data/lib/coradoc/element/header.rb +4 -2
  25. data/lib/coradoc/element/image/block_image.rb +13 -2
  26. data/lib/coradoc/element/image/core.rb +35 -5
  27. data/lib/coradoc/element/image/inline_image.rb +2 -2
  28. data/lib/coradoc/element/image.rb +0 -1
  29. data/lib/coradoc/element/inline/anchor.rb +4 -2
  30. data/lib/coradoc/element/inline/bold.rb +10 -4
  31. data/lib/coradoc/element/inline/cross_reference.rb +4 -2
  32. data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
  33. data/lib/coradoc/element/inline/highlight.rb +12 -6
  34. data/lib/coradoc/element/inline/italic.rb +10 -4
  35. data/lib/coradoc/element/inline/link.rb +26 -10
  36. data/lib/coradoc/element/inline/monospace.rb +10 -4
  37. data/lib/coradoc/element/inline/quotation.rb +4 -1
  38. data/lib/coradoc/element/inline/subscript.rb +5 -2
  39. data/lib/coradoc/element/inline/superscript.rb +5 -2
  40. data/lib/coradoc/element/inline.rb +0 -1
  41. data/lib/coradoc/element/list/core.rb +10 -8
  42. data/lib/coradoc/element/list/definition.rb +19 -0
  43. data/lib/coradoc/element/list/ordered.rb +1 -1
  44. data/lib/coradoc/element/list/unordered.rb +1 -1
  45. data/lib/coradoc/element/list.rb +1 -1
  46. data/lib/coradoc/element/list_item.rb +9 -4
  47. data/lib/coradoc/element/list_item_definition.rb +32 -0
  48. data/lib/coradoc/element/paragraph.rb +5 -3
  49. data/lib/coradoc/element/revision.rb +20 -16
  50. data/lib/coradoc/element/section.rb +21 -4
  51. data/lib/coradoc/element/table.rb +36 -19
  52. data/lib/coradoc/element/text_element.rb +63 -17
  53. data/lib/coradoc/element/title.rb +27 -7
  54. data/lib/coradoc/element/video.rb +33 -6
  55. data/lib/coradoc/generator.rb +2 -2
  56. data/lib/coradoc/legacy_parser.rb +41 -41
  57. data/lib/coradoc/oscal.rb +2 -4
  58. data/lib/coradoc/parser/asciidoc/content.rb +15 -15
  59. data/lib/coradoc/parser/asciidoc/document_attributes.rb +1 -1
  60. data/lib/coradoc/parser/asciidoc/header.rb +6 -6
  61. data/lib/coradoc/parser/asciidoc/section.rb +1 -1
  62. data/lib/coradoc/reverse_adoc/LICENSE.txt +25 -0
  63. data/lib/coradoc/reverse_adoc/README.adoc +308 -0
  64. data/lib/coradoc/reverse_adoc/cleaner.rb +125 -0
  65. data/lib/coradoc/reverse_adoc/config.rb +73 -0
  66. data/lib/coradoc/reverse_adoc/converters/a.rb +47 -0
  67. data/lib/coradoc/reverse_adoc/converters/aside.rb +12 -0
  68. data/lib/coradoc/reverse_adoc/converters/audio.rb +25 -0
  69. data/lib/coradoc/reverse_adoc/converters/base.rb +104 -0
  70. data/lib/coradoc/reverse_adoc/converters/blockquote.rb +18 -0
  71. data/lib/coradoc/reverse_adoc/converters/br.rb +11 -0
  72. data/lib/coradoc/reverse_adoc/converters/bypass.rb +77 -0
  73. data/lib/coradoc/reverse_adoc/converters/code.rb +19 -0
  74. data/lib/coradoc/reverse_adoc/converters/div.rb +14 -0
  75. data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
  76. data/lib/coradoc/reverse_adoc/converters/drop.rb +22 -0
  77. data/lib/coradoc/reverse_adoc/converters/em.rb +17 -0
  78. data/lib/coradoc/reverse_adoc/converters/figure.rb +21 -0
  79. data/lib/coradoc/reverse_adoc/converters/h.rb +38 -0
  80. data/lib/coradoc/reverse_adoc/converters/head.rb +19 -0
  81. data/lib/coradoc/reverse_adoc/converters/hr.rb +11 -0
  82. data/lib/coradoc/reverse_adoc/converters/ignore.rb +16 -0
  83. data/lib/coradoc/reverse_adoc/converters/img.rb +98 -0
  84. data/lib/coradoc/reverse_adoc/converters/li.rb +13 -0
  85. data/lib/coradoc/reverse_adoc/converters/mark.rb +15 -0
  86. data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
  87. data/lib/coradoc/reverse_adoc/converters/math.rb +31 -0
  88. data/lib/coradoc/reverse_adoc/converters/ol.rb +60 -0
  89. data/lib/coradoc/reverse_adoc/converters/p.rb +19 -0
  90. data/lib/coradoc/reverse_adoc/converters/pass_through.rb +13 -0
  91. data/lib/coradoc/reverse_adoc/converters/pre.rb +51 -0
  92. data/lib/coradoc/reverse_adoc/converters/q.rb +12 -0
  93. data/lib/coradoc/reverse_adoc/converters/strong.rb +16 -0
  94. data/lib/coradoc/reverse_adoc/converters/sub.rb +18 -0
  95. data/lib/coradoc/reverse_adoc/converters/sup.rb +18 -0
  96. data/lib/coradoc/reverse_adoc/converters/table.rb +280 -0
  97. data/lib/coradoc/reverse_adoc/converters/td.rb +77 -0
  98. data/lib/coradoc/reverse_adoc/converters/text.rb +28 -0
  99. data/lib/coradoc/reverse_adoc/converters/th.rb +14 -0
  100. data/lib/coradoc/reverse_adoc/converters/tr.rb +18 -0
  101. data/lib/coradoc/reverse_adoc/converters/video.rb +25 -0
  102. data/lib/coradoc/reverse_adoc/converters.rb +53 -0
  103. data/lib/coradoc/reverse_adoc/errors.rb +10 -0
  104. data/lib/coradoc/reverse_adoc/html_converter.rb +150 -0
  105. data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
  106. data/lib/coradoc/reverse_adoc/plugins/plateau.rb +174 -0
  107. data/lib/coradoc/reverse_adoc/postprocessor.rb +148 -0
  108. data/lib/coradoc/reverse_adoc.rb +30 -0
  109. data/lib/coradoc/transformer.rb +24 -14
  110. data/lib/coradoc/version.rb +1 -1
  111. data/lib/reverse_adoc.rb +20 -0
  112. metadata +184 -5
  113. data/lib/coradoc/element/inline/image.rb +0 -25
@@ -0,0 +1,25 @@
1
+ module Coradoc::ReverseAdoc
2
+ module Converters
3
+ class Video < Base
4
+ def to_coradoc(node, _state = {})
5
+ src = node["src"]
6
+ id = node["id"]
7
+ title = extract_title(node)
8
+ attributes = Coradoc::Element::AttributeList.new
9
+ options = options(node)
10
+ attributes.add_named("options", options) if options.any?
11
+ Coradoc::Element::Video.new(title, id: id, src: src,
12
+ attributes: attributes)
13
+ end
14
+
15
+ def options(node)
16
+ autoplay = node["autoplay"]
17
+ loop_attr = node["loop"]
18
+ controls = node["controls"]
19
+ [autoplay, loop_attr, controls].compact
20
+ end
21
+ end
22
+
23
+ register :video, Video.new
24
+ end
25
+ end
@@ -0,0 +1,53 @@
1
+ module Coradoc::ReverseAdoc
2
+ module Converters
3
+ def self.register(tag_name, converter)
4
+ @@converters ||= {}
5
+ @@converters[tag_name.to_sym] = converter
6
+ end
7
+
8
+ def self.unregister(tag_name)
9
+ @@converters.delete(tag_name.to_sym)
10
+ end
11
+
12
+ def self.lookup(tag_name)
13
+ @@converters[tag_name.to_sym] or default_converter(tag_name)
14
+ end
15
+
16
+ # Note: process won't run plugin hooks
17
+ def self.process(node, state)
18
+ node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
19
+ return node.map { |i| process(i, state) }.join if node.is_a? Array
20
+
21
+ lookup(node.name).convert(node, state)
22
+ end
23
+
24
+ def self.process_coradoc(node, state)
25
+ node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
26
+ return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
27
+
28
+ plugins = state[:plugin_instances] || {}
29
+ process = proc { lookup(node.name).to_coradoc(node, state) }
30
+ plugins.each do |i|
31
+ prev_process = process
32
+ process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
33
+ end
34
+ process.(node, state)
35
+ end
36
+
37
+ def self.default_converter(tag_name)
38
+ case Coradoc::ReverseAdoc.config.unknown_tags.to_sym
39
+ when :pass_through
40
+ Coradoc::ReverseAdoc::Converters::PassThrough.new
41
+ when :drop
42
+ Coradoc::ReverseAdoc::Converters::Drop.new
43
+ when :bypass
44
+ Coradoc::ReverseAdoc::Converters::Bypass.new
45
+ when :raise
46
+ raise UnknownTagError, "unknown tag: #{tag_name}"
47
+ else
48
+ raise InvalidConfigurationError,
49
+ "unknown value #{Coradoc::ReverseAdoc.config.unknown_tags.inspect} for Coradoc::ReverseAdoc.config.unknown_tags"
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,10 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Error < StandardError
3
+ end
4
+
5
+ class UnknownTagError < Error
6
+ end
7
+
8
+ class InvalidConfigurationError < Error
9
+ end
10
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "converters/markup"
4
+ require_relative "converters/a"
5
+ require_relative "converters/aside"
6
+ require_relative "converters/audio"
7
+ require_relative "converters/blockquote"
8
+ require_relative "converters/br"
9
+ require_relative "converters/bypass"
10
+ require_relative "converters/code"
11
+ require_relative "converters/div"
12
+ require_relative "converters/dl"
13
+ require_relative "converters/drop"
14
+ require_relative "converters/em"
15
+ require_relative "converters/figure"
16
+ require_relative "converters/h"
17
+ require_relative "converters/head"
18
+ require_relative "converters/hr"
19
+ require_relative "converters/ignore"
20
+ require_relative "converters/img"
21
+ require_relative "converters/mark"
22
+ require_relative "converters/li"
23
+ require_relative "converters/ol"
24
+ require_relative "converters/p"
25
+ require_relative "converters/pass_through"
26
+ require_relative "converters/pre"
27
+ require_relative "converters/q"
28
+ require_relative "converters/strong"
29
+ require_relative "converters/sup"
30
+ require_relative "converters/sub"
31
+ require_relative "converters/table"
32
+ require_relative "converters/td"
33
+ require_relative "converters/th"
34
+ require_relative "converters/text"
35
+ require_relative "converters/tr"
36
+ require_relative "converters/video"
37
+ require_relative "converters/math"
38
+
39
+ module Coradoc
40
+ module ReverseAdoc
41
+ class HtmlConverter
42
+ def self.to_coradoc(input, options = {})
43
+ plugin_instances = options.delete(:plugin_instances)
44
+ ReverseAdoc.config.with(options) do
45
+ plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
46
+
47
+ root = track_time "Loading input HTML document" do
48
+ case input
49
+ when String
50
+ Nokogiri::HTML(input).root
51
+ when Nokogiri::XML::Document
52
+ input.root
53
+ when Nokogiri::XML::Node
54
+ input
55
+ end
56
+ end
57
+
58
+ return "" unless root
59
+
60
+ plugin_instances.each do |plugin|
61
+ plugin.html_tree = root
62
+ if plugin.respond_to?(:preprocess_html_tree)
63
+ track_time "Preprocessing document with #{plugin.name} plugin" do
64
+ plugin.preprocess_html_tree
65
+ end
66
+ end
67
+ root = plugin.html_tree
68
+ end
69
+
70
+ coradoc = track_time "Converting input document tree to Coradoc tree" do
71
+ Converters.process_coradoc(root, plugin_instances: plugin_instances)
72
+ end
73
+
74
+ coradoc = track_time "Post-process Coradoc tree" do
75
+ Postprocessor.process(coradoc)
76
+ end
77
+
78
+ plugin_instances.each do |plugin|
79
+ if plugin.respond_to?(:postprocess_coradoc_tree)
80
+ plugin.coradoc_tree = coradoc
81
+ track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
82
+ plugin.postprocess_coradoc_tree
83
+ end
84
+ coradoc = plugin.coradoc_tree
85
+ end
86
+ end
87
+
88
+ coradoc
89
+ end
90
+ end
91
+
92
+ def self.convert(input, options = {})
93
+ ReverseAdoc.config.with(options) do
94
+ plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
95
+
96
+ options = options.merge(plugin_instances: plugin_instances)
97
+
98
+ coradoc = to_coradoc(input, options)
99
+
100
+ if coradoc.is_a?(Hash)
101
+ coradoc.to_h do |file, tree|
102
+ track_time "Converting file #{file || 'main'}" do
103
+ [file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
104
+ end
105
+ end
106
+ else
107
+ convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
108
+ end
109
+ end
110
+ end
111
+
112
+ def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
113
+ result = track_time "Converting Coradoc tree into Asciidoc" do
114
+ Coradoc::Generator.gen_adoc(coradoc)
115
+ end
116
+ result = track_time "Cleaning up the result" do
117
+ ReverseAdoc.cleaner.tidy(result)
118
+ end
119
+ plugin_instances.each do |plugin|
120
+ if plugin.respond_to?(:postprocess_asciidoc_string)
121
+ plugin.asciidoc_string = result
122
+ track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
123
+ plugin.postprocess_asciidoc_string
124
+ end
125
+ result = plugin.asciidoc_string
126
+ end
127
+ end
128
+ result
129
+ end
130
+
131
+ @track_time_indentation = 0
132
+ def self.track_time(task)
133
+ if ReverseAdoc.config.track_time
134
+ warn " " * @track_time_indentation +
135
+ "* #{task} is starting..."
136
+ @track_time_indentation += 1
137
+ t0 = Time.now
138
+ ret = yield
139
+ time_elapsed = Time.now - t0
140
+ @track_time_indentation -= 1
141
+ warn " " * @track_time_indentation +
142
+ "* #{task} took #{time_elapsed.round(3)} seconds"
143
+ ret
144
+ else
145
+ yield
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,131 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ #### Plugin system general
4
+
5
+ # Allow building plugins with a shorthand syntax:
6
+ # plugin = Coradoc::ReverseAdoc::Plugin.new do
7
+ # def name = "Test"
8
+ # end
9
+
10
+ def self.new(&block)
11
+ if self == Plugin
12
+ Class.new(Plugin, &block)
13
+ else
14
+ super
15
+ end
16
+ end
17
+
18
+ def initialize
19
+ @html_tree_hooks_pre = {}
20
+ @html_tree_hooks_post = {}
21
+ end
22
+
23
+ # define name to name a Plugin
24
+ def name
25
+ self.class.name
26
+ end
27
+
28
+ #### HTML Tree functionalities
29
+
30
+ attr_accessor :html_tree
31
+
32
+ def html_tree_change_tag_name_by_css(css, new_name)
33
+ html_tree.css(css).each do |e|
34
+ e.name = new_name
35
+ end
36
+ end
37
+
38
+ def html_tree_change_properties_by_css(css, properties)
39
+ html_tree.css(css).each do |e|
40
+ properties.each do |k,v|
41
+ e[k.to_s] = v
42
+ end
43
+ end
44
+ end
45
+
46
+ def html_tree_remove_by_css(css)
47
+ html_tree.css(css).each(&:remove)
48
+ end
49
+
50
+ def html_tree_replace_with_children_by_css(css)
51
+ html_tree.css(css).each do |e|
52
+ e.replace(e.children)
53
+ end
54
+ end
55
+
56
+ def html_tree_process_to_coradoc(tree, state={})
57
+ Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
58
+ end
59
+
60
+ def html_tree_process_to_adoc(tree, state={})
61
+ Coradoc::ReverseAdoc::Converters.process(tree, state)
62
+ end
63
+
64
+ def html_tree_preview
65
+ Tempfile.open(%w"coradoc .html") do |i|
66
+ i << html_tree.to_html
67
+ system "chromium-browser", "--no-sandbox", i.path
68
+ end
69
+ end
70
+
71
+ # define preprocess_html_tree to process HTML trees
72
+
73
+ # Creates a hook to be called instead of converting an element
74
+ # to a Coradoc node.
75
+ #
76
+ # proc |html_node, state|
77
+ # coradoc_node
78
+ # end
79
+ def html_tree_add_hook_pre(element, &block)
80
+ @html_tree_hooks_pre[element] = block
81
+ end
82
+
83
+ def html_tree_add_hook_pre_by_css(css, &block)
84
+ html_tree.css(css).each do |e|
85
+ html_tree_add_hook_pre(e, &block)
86
+ end
87
+ end
88
+
89
+ # Creates a hook to be called after converting an element
90
+ # to a Coradoc node.
91
+ #
92
+ # proc |html_node, coradoc_node, state|
93
+ # coradoc_node
94
+ # end
95
+ def html_tree_add_hook_post(element, &block)
96
+ @html_tree_hooks_post[element] = block
97
+ end
98
+
99
+ def html_tree_add_hook_post_by_css(css, &block)
100
+ html_tree.css(css).each do |e|
101
+ html_tree_add_hook_post(e, &block)
102
+ end
103
+ end
104
+
105
+ def html_tree_run_hooks(node, state, &_block)
106
+ hook_pre = @html_tree_hooks_pre[node]
107
+ hook_post = @html_tree_hooks_post[node]
108
+
109
+ coradoc = hook_pre.(node, state) if hook_pre
110
+ coradoc ||= yield node, state
111
+
112
+ if hook_post
113
+ coradoc = hook_post.(node, coradoc, state)
114
+ end
115
+
116
+ coradoc
117
+ end
118
+
119
+ #### Coradoc tree functionalities
120
+
121
+ attr_accessor :coradoc_tree
122
+
123
+ # define postprocess_coradoc_tree to change coradoc tree
124
+
125
+ #### AsciiDoc string functionalities
126
+
127
+ attr_accessor :asciidoc_string
128
+
129
+ # define postprocess_asciidoc_string to change the coradoc string
130
+ end
131
+ end
@@ -0,0 +1,174 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ # This plugin enhances documents from the PLATEAU project
4
+ # to extract more data.
5
+ #
6
+ # Usage:
7
+ # reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
8
+ # --external-images -u raise --output _out/index.adoc index.html
9
+ class Plateau < Plugin
10
+ def name
11
+ "PLATEAU"
12
+ end
13
+
14
+ def preprocess_html_tree
15
+ # Let's simplify the tree by removing what's extraneous
16
+ # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
17
+ # html_tree_replace_with_children_by_css("div.container_box")
18
+ # html_tree_replace_with_children_by_css("div.col.col-12")
19
+ # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
20
+ # html_tree_replace_with_children_by_css("div.row")
21
+ #
22
+ # We can remove that, but it messes up the images and paragraphs.
23
+
24
+ # Remove side menu, so we can generate TOC ourselves
25
+ html_tree_remove_by_css(".sideMenu")
26
+
27
+ # Correct non-semantic classes into semantic HTML tags
28
+ html_tree_change_tag_name_by_css(".titledata", "h1")
29
+ html_tree_change_tag_name_by_css(".subtitledata", "h2")
30
+ html_tree_change_tag_name_by_css(".pitemdata", "h3")
31
+ html_tree_change_tag_name_by_css(".sitemdata", "h4")
32
+ html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
33
+
34
+ # Remove some CSS ids that are not important to us
35
+ html_tree_change_properties_by_css("#__nuxt", id: nil)
36
+ html_tree_change_properties_by_css("#__layout", id: nil)
37
+ html_tree_change_properties_by_css("#app", id: nil)
38
+
39
+ # Convert table/img caption to become a caption
40
+ html_tree.css(".imagedata").each do |e|
41
+ table = e.parent.next&.children&.first
42
+ if table&.name == "table"
43
+ e.name = "caption"
44
+ table.prepend_child(e)
45
+ next
46
+ end
47
+
48
+ img = e.parent.previous&.children&.first
49
+ if img&.name == "img" && img["src"]
50
+ title = e.text.strip
51
+ img["title"] = title
52
+ e.remove
53
+ next
54
+ end
55
+ end
56
+
57
+ # Add hooks for H1, H2, H3, H4
58
+ html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
59
+ html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
60
+
61
+ # Table cells aligned to center
62
+ html_tree_change_properties_by_css(".tableTopCenter", align: "center")
63
+
64
+ # Handle non-semantic lists and indentation
65
+ html_tree_add_hook_pre_by_css ".text2data" do |node,|
66
+ text = html_tree_process_to_adoc(node).strip
67
+ next "" if text.empty? || text == "\u3000"
68
+
69
+ if text.start_with?(/\d+\./)
70
+ text = text.sub(/\A\d+.\s*/, "")
71
+ ".. #{text}\n"
72
+ else
73
+ text = text.gsub(/^/, "** ")
74
+ "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
75
+ end
76
+ end
77
+
78
+ html_tree_add_hook_pre_by_css ".text3data" do |node,|
79
+ text = html_tree_process_to_adoc(node).strip
80
+ next "" if text.empty? || text == "\u3000"
81
+
82
+ text = text.strip.gsub(/^/, "*** ")
83
+ "\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
84
+ end
85
+
86
+ html_tree_add_hook_pre_by_css ".text4data" do |node,|
87
+ text = html_tree_process_to_adoc(node).strip
88
+ next "" if text.empty? || text == "\u3000"
89
+
90
+ text = text.strip.gsub(/^/, "**** ")
91
+ "\n\n//-PT4D\n#{text}\n//-ENDPT4D\n\n"
92
+ end
93
+
94
+ html_tree_add_hook_pre_by_css ".text2data_point ul" do |node,|
95
+ text = html_tree_process_to_adoc(node.children.first.children).strip
96
+
97
+ "** #{text}\n"
98
+ end
99
+
100
+ html_tree_add_hook_pre_by_css ".text3data_point ul" do |node,|
101
+ text = html_tree_process_to_adoc(node.children.first.children).strip
102
+
103
+ "*** #{text}\n"
104
+ end
105
+
106
+ # html_tree_preview
107
+ end
108
+
109
+ def handle_headers(node, coradoc, state)
110
+ if coradoc.id.start_with?("toc0_")
111
+ content = coradoc.content.map(&:content).join
112
+ # Special content
113
+ case content.strip
114
+ when "はじめに" # Introduction
115
+ coradoc.style = "abstract" # The older version document has ".preface"
116
+ when "改定の概要" # Revision overview
117
+ coradoc.style = "abstract" # The older version document has ".preface"
118
+ when "参考文献" # Bibliography
119
+ coradoc.style = "bibliography"
120
+ when "改訂履歴" # Document history
121
+ coradoc.style = "appendix"
122
+ else
123
+ warn "Unknown section #{coradoc.content.map(&:content).join.inspect}"
124
+ end
125
+
126
+ # Ensure they are generated as level 1
127
+ coradoc.level_int = 1
128
+ end
129
+
130
+ # Remove numbers
131
+ coradoc.content.first.content.sub!(/\A[\d\s.]+/, "")
132
+
133
+ coradoc
134
+ end
135
+
136
+ def handle_headers_h4(node, coradoc, state)
137
+ case coradoc.content.first.content
138
+ when /\A\(\d+\)(.*)/
139
+ coradoc.level_int = 4
140
+ coradoc.content.first.content = $1.strip
141
+ coradoc
142
+ when /\A\d+\)(.*)/
143
+ coradoc.level_int = 5
144
+ coradoc.content.first.content = $1.strip
145
+ coradoc
146
+ else
147
+ ["// FIXME\n", coradoc]
148
+ end
149
+ end
150
+
151
+ def postprocess_asciidoc_string
152
+ str = self.asciidoc_string
153
+
154
+ ### Custom indentation handling
155
+ # If there's a step up, add [none]
156
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
157
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
158
+ str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
159
+ # Collapse blocks of text[2,3]data
160
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
161
+ # In the beginning, add [none]
162
+ str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
163
+ # If following with another list, ensure we readd styling
164
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
165
+ # Otherwise, clean up
166
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
167
+
168
+ self.asciidoc_string = str
169
+ end
170
+ end
171
+ end
172
+ end
173
+
174
+ Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau
@@ -0,0 +1,148 @@
1
+ module Coradoc::ReverseAdoc
2
+ # Postprocessor's aim is to convert a Coradoc tree from
3
+ # a mess that has been created from HTML into a tree that
4
+ # is compatible with what we would get out of Coradoc, if
5
+ # it parsed it directly.
6
+ class Postprocessor
7
+ def self.process(coradoc)
8
+ new(coradoc).process
9
+ end
10
+
11
+ def initialize(coradoc)
12
+ @tree = coradoc
13
+ end
14
+
15
+ # Collapse DIVs that only have a title, or nest another DIV.
16
+ def collapse_meaningless_sections
17
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
+ if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
19
+ children_classes = Array(elem.contents).map(&:class)
20
+ count = children_classes.length
21
+ safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
22
+
23
+ # Count > 0 because some documents use <div> as a <br>.
24
+ if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
+ next elem.contents
26
+ end
27
+ end
28
+ elem
29
+ end
30
+ end
31
+
32
+ # tree should now be more cleaned up, so we can progress with
33
+ # creating meaningful sections
34
+ def generate_meaningful_sections
35
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
36
+ # We are searching for an array, that has a title. This
37
+ # will be a candidate for our section array.
38
+ if dir == :post &&
39
+ elem.is_a?(Array) &&
40
+ !elem.grep(Coradoc::Element::Title).empty?
41
+
42
+ new_array = []
43
+ content_array = new_array
44
+ section_arrays_by_level = [new_array] * 8
45
+
46
+ # For each title element, we create a new section. Then we push
47
+ # all descendant sections into those sections. Otherwise, we push
48
+ # an element as content of current section.
49
+ elem.each do |e|
50
+ if e.is_a? Coradoc::Element::Title
51
+ title = e
52
+ content_array = []
53
+ section_array = []
54
+ level = title.level_int
55
+ section = Coradoc::Element::Section.new(
56
+ title, contents: content_array, sections: section_array
57
+ )
58
+ # Some documents may not be consistent and eg. follow H4 after
59
+ # H2. Let's ensure that proceeding sections will land in a
60
+ # correct place.
61
+ (8 - level).times do |j|
62
+ section_arrays_by_level[level + j] = section_array
63
+ end
64
+ section_arrays_by_level[level - 1] << section
65
+ else
66
+ content_array << e
67
+ end
68
+ end
69
+ next new_array
70
+ end
71
+ elem
72
+ end
73
+ end
74
+
75
+ def split_sections
76
+ max_level = Coradoc::ReverseAdoc.config.split_sections
77
+
78
+ return unless max_level
79
+
80
+ sections = {}
81
+ parent_sections = []
82
+ previous_sections = {}
83
+
84
+ determine_section_id = ->(elem) do
85
+ level = 0
86
+ section = elem
87
+ while section
88
+ level += 1 if elem.title.style == section.title.style
89
+ section = previous_sections[section]
90
+ end
91
+ level
92
+ end
93
+
94
+ determine_style = ->(elem) do
95
+ style = elem.title.style || "section"
96
+ style += "-"
97
+ style
98
+ end
99
+
100
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
101
+ title = elem.title if elem.is_a?(Coradoc::Element::Section)
102
+
103
+ if title && title.level_int <= max_level
104
+ if dir == :pre
105
+ # In the PRE pass, we build a tree of sections, so that
106
+ # we can compute numbers
107
+ previous_sections[elem] = parent_sections[title.level_int]
108
+ parent_sections[title.level_int] = elem
109
+ parent_sections[(title.level_int+1)..nil] = nil
110
+
111
+ elem
112
+ else
113
+ # In the POST pass, we replace the sections with their
114
+ # include tag.
115
+ section_file = "sections/"
116
+ section_file += parent_sections[1..title.level_int].map do |parent|
117
+ style = determine_style.(parent)
118
+ "%s%02d" % [style, determine_section_id.(parent)]
119
+ end.join("/")
120
+ section_file += ".adoc"
121
+
122
+ sections[section_file] = elem
123
+ up = "../" * (title.level_int - 1)
124
+ "\ninclude::#{up}#{section_file}[]\n"
125
+ end
126
+ else
127
+ elem
128
+ end
129
+ end
130
+
131
+ sections[nil] = @tree
132
+ @tree = sections
133
+ end
134
+
135
+ def process
136
+ collapse_meaningless_sections
137
+ generate_meaningful_sections
138
+ # Do it again to simplify the document further.
139
+ # Since the structure is changed, we may have new meaningful
140
+ # sections as only children of some meaningless sections.
141
+ collapse_meaningless_sections
142
+
143
+ split_sections
144
+
145
+ @tree
146
+ end
147
+ end
148
+ end