coradoc 0.3.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/exe/reverse_adoc +24 -3
  4. data/lib/coradoc/document.rb +1 -0
  5. data/lib/coradoc/element/admonition.rb +2 -2
  6. data/lib/coradoc/element/attribute.rb +2 -2
  7. data/lib/coradoc/element/attribute_list.rb +94 -15
  8. data/lib/coradoc/element/audio.rb +13 -2
  9. data/lib/coradoc/element/author.rb +4 -2
  10. data/lib/coradoc/element/base.rb +70 -7
  11. data/lib/coradoc/element/block/core.rb +8 -4
  12. data/lib/coradoc/element/block/quote.rb +1 -1
  13. data/lib/coradoc/element/block/side.rb +1 -1
  14. data/lib/coradoc/element/break.rb +1 -1
  15. data/lib/coradoc/element/document_attributes.rb +6 -6
  16. data/lib/coradoc/element/header.rb +4 -2
  17. data/lib/coradoc/element/image/block_image.rb +13 -2
  18. data/lib/coradoc/element/image/core.rb +37 -6
  19. data/lib/coradoc/element/image/inline_image.rb +2 -2
  20. data/lib/coradoc/element/inline/anchor.rb +4 -2
  21. data/lib/coradoc/element/inline/bold.rb +9 -4
  22. data/lib/coradoc/element/inline/cross_reference.rb +4 -2
  23. data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
  24. data/lib/coradoc/element/inline/highlight.rb +11 -6
  25. data/lib/coradoc/element/inline/italic.rb +9 -4
  26. data/lib/coradoc/element/inline/link.rb +22 -6
  27. data/lib/coradoc/element/inline/monospace.rb +9 -4
  28. data/lib/coradoc/element/inline/quotation.rb +3 -1
  29. data/lib/coradoc/element/inline/subscript.rb +4 -2
  30. data/lib/coradoc/element/inline/superscript.rb +4 -2
  31. data/lib/coradoc/element/list/core.rb +15 -7
  32. data/lib/coradoc/element/list/definition.rb +22 -1
  33. data/lib/coradoc/element/list/ordered.rb +1 -1
  34. data/lib/coradoc/element/list/unordered.rb +1 -1
  35. data/lib/coradoc/element/list.rb +1 -0
  36. data/lib/coradoc/element/list_item.rb +16 -3
  37. data/lib/coradoc/element/list_item_definition.rb +32 -0
  38. data/lib/coradoc/element/paragraph.rb +6 -4
  39. data/lib/coradoc/element/revision.rb +4 -2
  40. data/lib/coradoc/element/section.rb +27 -4
  41. data/lib/coradoc/element/table.rb +32 -10
  42. data/lib/coradoc/element/text_element.rb +48 -8
  43. data/lib/coradoc/element/title.rb +27 -7
  44. data/lib/coradoc/element/video.rb +32 -5
  45. data/lib/coradoc/reverse_adoc/README.adoc +14 -8
  46. data/lib/coradoc/reverse_adoc/cleaner.rb +21 -10
  47. data/lib/coradoc/reverse_adoc/config.rb +35 -16
  48. data/lib/coradoc/reverse_adoc/converters/a.rb +17 -12
  49. data/lib/coradoc/reverse_adoc/converters/aside.rb +0 -4
  50. data/lib/coradoc/reverse_adoc/converters/audio.rb +0 -4
  51. data/lib/coradoc/reverse_adoc/converters/base.rb +48 -44
  52. data/lib/coradoc/reverse_adoc/converters/blockquote.rb +2 -11
  53. data/lib/coradoc/reverse_adoc/converters/br.rb +0 -4
  54. data/lib/coradoc/reverse_adoc/converters/bypass.rb +0 -4
  55. data/lib/coradoc/reverse_adoc/converters/code.rb +5 -42
  56. data/lib/coradoc/reverse_adoc/converters/div.rb +0 -4
  57. data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
  58. data/lib/coradoc/reverse_adoc/converters/em.rb +5 -43
  59. data/lib/coradoc/reverse_adoc/converters/figure.rb +0 -4
  60. data/lib/coradoc/reverse_adoc/converters/h.rb +0 -4
  61. data/lib/coradoc/reverse_adoc/converters/head.rb +0 -4
  62. data/lib/coradoc/reverse_adoc/converters/hr.rb +0 -4
  63. data/lib/coradoc/reverse_adoc/converters/img.rb +30 -18
  64. data/lib/coradoc/reverse_adoc/converters/li.rb +0 -4
  65. data/lib/coradoc/reverse_adoc/converters/mark.rb +5 -11
  66. data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
  67. data/lib/coradoc/reverse_adoc/converters/ol.rb +0 -4
  68. data/lib/coradoc/reverse_adoc/converters/p.rb +0 -4
  69. data/lib/coradoc/reverse_adoc/converters/pre.rb +0 -4
  70. data/lib/coradoc/reverse_adoc/converters/q.rb +0 -4
  71. data/lib/coradoc/reverse_adoc/converters/strong.rb +5 -41
  72. data/lib/coradoc/reverse_adoc/converters/sub.rb +6 -4
  73. data/lib/coradoc/reverse_adoc/converters/sup.rb +7 -5
  74. data/lib/coradoc/reverse_adoc/converters/table.rb +240 -4
  75. data/lib/coradoc/reverse_adoc/converters/td.rb +1 -7
  76. data/lib/coradoc/reverse_adoc/converters/text.rb +1 -38
  77. data/lib/coradoc/reverse_adoc/converters/tr.rb +0 -4
  78. data/lib/coradoc/reverse_adoc/converters/video.rb +0 -4
  79. data/lib/coradoc/reverse_adoc/converters.rb +24 -1
  80. data/lib/coradoc/reverse_adoc/html_converter.rb +109 -20
  81. data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
  82. data/lib/coradoc/reverse_adoc/plugins/plateau.rb +206 -0
  83. data/lib/coradoc/reverse_adoc/postprocessor.rb +152 -0
  84. data/lib/coradoc/reverse_adoc.rb +3 -0
  85. data/lib/coradoc/util.rb +10 -0
  86. data/lib/coradoc/version.rb +1 -1
  87. data/lib/coradoc.rb +1 -0
  88. data/lib/reverse_adoc.rb +1 -1
  89. metadata +9 -3
  90. data/lib/coradoc/element/inline/image.rb +0 -26
@@ -4,11 +4,7 @@ module Coradoc::ReverseAdoc
4
4
  def to_coradoc(node, state = {})
5
5
  return treat_empty(node, state) if node.text.strip.empty?
6
6
 
7
- Coradoc::Element::TextElement.new(treat_text(node))
8
- end
9
-
10
- def convert(node, state = {})
11
- Coradoc::Generator.gen_adoc(to_coradoc(node, state))
7
+ Coradoc::Element::TextElement.new(node.text)
12
8
  end
13
9
 
14
10
  private
@@ -25,39 +21,6 @@ module Coradoc::ReverseAdoc
25
21
  ""
26
22
  end
27
23
  end
28
-
29
- def treat_text(node)
30
- text = node.text
31
- text = preserve_nbsp(text)
32
- text = remove_border_newlines(text)
33
- text = remove_inner_newlines(text)
34
- text = escape_keychars(text)
35
-
36
- text = preserve_keychars_within_backticks(text)
37
- escape_links(text)
38
- end
39
-
40
- def preserve_nbsp(text)
41
- text.gsub(/\u00A0/, " ")
42
- end
43
-
44
- def escape_links(text)
45
- text.gsub(/<<([^>]*)>>/, "\\<<\\1>>")
46
- end
47
-
48
- def remove_border_newlines(text)
49
- text.gsub(/\A\n+/, "").gsub(/\n+\z/, "")
50
- end
51
-
52
- def remove_inner_newlines(text)
53
- text.tr("\n\t", " ").squeeze(" ")
54
- end
55
-
56
- def preserve_keychars_within_backticks(text)
57
- text.gsub(/`.*?`/) do |match|
58
- match.gsub('\_', "_").gsub('\*', "*")
59
- end
60
- end
61
24
  end
62
25
 
63
26
  register :text, Text.new
@@ -7,10 +7,6 @@ module Coradoc::ReverseAdoc
7
7
  Coradoc::Element::Table::Row.new(content, header)
8
8
  end
9
9
 
10
- def convert(node, state = {})
11
- Coradoc::Generator.gen_adoc(to_coradoc(node, state))
12
- end
13
-
14
10
  def table_header_row?(node)
15
11
  # node.element_children.all? {|child| child.name.to_sym == :th}
16
12
  node.previous_element.nil?
@@ -12,10 +12,6 @@ module Coradoc::ReverseAdoc
12
12
  attributes: attributes)
13
13
  end
14
14
 
15
- def convert(node, state = {})
16
- Coradoc::Generator.gen_adoc(to_coradoc(node, state))
17
- end
18
-
19
15
  def options(node)
20
16
  autoplay = node["autoplay"]
21
17
  loop_attr = node["loop"]
@@ -10,7 +10,30 @@ module Coradoc::ReverseAdoc
10
10
  end
11
11
 
12
12
  def self.lookup(tag_name)
13
- @@converters[tag_name.to_sym] or default_converter(tag_name)
13
+ converter = @@converters[tag_name.to_sym] || default_converter(tag_name)
14
+ converter = converter.new if converter.respond_to? :new
15
+ converter
16
+ end
17
+
18
+ # Note: process won't run plugin hooks
19
+ def self.process(node, state)
20
+ node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
21
+ return node.map { |i| process(i, state) }.join if node.is_a? Array
22
+
23
+ lookup(node.name).convert(node, state)
24
+ end
25
+
26
+ def self.process_coradoc(node, state)
27
+ node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
28
+ return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
29
+
30
+ plugins = state[:plugin_instances] || {}
31
+ process = proc { lookup(node.name).to_coradoc(node, state) }
32
+ plugins.each do |i|
33
+ prev_process = process
34
+ process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
35
+ end
36
+ process.(node, state)
14
37
  end
15
38
 
16
39
  def self.default_converter(tag_name)
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "converters/markup"
3
4
  require_relative "converters/a"
4
5
  require_relative "converters/aside"
5
6
  require_relative "converters/audio"
@@ -8,6 +9,7 @@ require_relative "converters/br"
8
9
  require_relative "converters/bypass"
9
10
  require_relative "converters/code"
10
11
  require_relative "converters/div"
12
+ require_relative "converters/dl"
11
13
  require_relative "converters/drop"
12
14
  require_relative "converters/em"
13
15
  require_relative "converters/figure"
@@ -34,28 +36,115 @@ require_relative "converters/tr"
34
36
  require_relative "converters/video"
35
37
  require_relative "converters/math"
36
38
 
37
- module Coradoc::ReverseAdoc
38
- class HtmlConverter
39
- def self.to_coradoc(input, options = {})
40
- root = case input
41
- when String
42
- Nokogiri::HTML(input).root
43
- when Nokogiri::XML::Document
44
- input.root
45
- when Nokogiri::XML::Node
46
- input
47
- end
48
-
49
- return "" unless root
50
-
51
- Coradoc::ReverseAdoc.config.with(options) do
52
- Coradoc::ReverseAdoc::Converters.lookup(root.name).to_coradoc(root)
39
+ module Coradoc
40
+ module ReverseAdoc
41
+ class HtmlConverter
42
+ def self.to_coradoc(input, options = {})
43
+ plugin_instances = options.delete(:plugin_instances)
44
+ ReverseAdoc.config.with(options) do
45
+ plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
46
+
47
+ root = track_time "Loading input HTML document" do
48
+ case input
49
+ when String
50
+ Nokogiri::HTML(input).root
51
+ when Nokogiri::XML::Document
52
+ input.root
53
+ when Nokogiri::XML::Node
54
+ input
55
+ end
56
+ end
57
+
58
+ return "" unless root
59
+
60
+ plugin_instances.each do |plugin|
61
+ plugin.html_tree = root
62
+ if plugin.respond_to?(:preprocess_html_tree)
63
+ track_time "Preprocessing document with #{plugin.name} plugin" do
64
+ plugin.preprocess_html_tree
65
+ end
66
+ end
67
+ root = plugin.html_tree
68
+ end
69
+
70
+ coradoc = track_time "Converting input document tree to Coradoc tree" do
71
+ Converters.process_coradoc(root, plugin_instances: plugin_instances)
72
+ end
73
+
74
+ coradoc = track_time "Post-process Coradoc tree" do
75
+ Postprocessor.process(coradoc)
76
+ end
77
+
78
+ plugin_instances.each do |plugin|
79
+ if plugin.respond_to?(:postprocess_coradoc_tree)
80
+ plugin.coradoc_tree = coradoc
81
+ track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
82
+ plugin.postprocess_coradoc_tree
83
+ end
84
+ coradoc = plugin.coradoc_tree
85
+ end
86
+ end
87
+
88
+ coradoc
89
+ end
53
90
  end
54
- end
55
91
 
56
- def self.convert(input, options = {})
57
- result = Coradoc::Generator.gen_adoc(to_coradoc(input, options))
58
- Coradoc::ReverseAdoc.cleaner.tidy(result)
92
+ def self.convert(input, options = {})
93
+ ReverseAdoc.config.with(options) do
94
+ plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
95
+
96
+ options = options.merge(plugin_instances: plugin_instances)
97
+
98
+ coradoc = to_coradoc(input, options)
99
+
100
+ if coradoc.is_a?(Hash)
101
+ coradoc.to_h do |file, tree|
102
+ track_time "Converting file #{file || 'main'}" do
103
+ [file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
104
+ end
105
+ end
106
+ else
107
+ convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
108
+ end
109
+ end
110
+ end
111
+
112
+ def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
113
+ result = track_time "Converting Coradoc tree into Asciidoc" do
114
+ Coradoc::Generator.gen_adoc(coradoc)
115
+ end
116
+ result = track_time "Cleaning up the result" do
117
+ ReverseAdoc.cleaner.tidy(result)
118
+ end
119
+ plugin_instances.each do |plugin|
120
+ if plugin.respond_to?(:postprocess_asciidoc_string)
121
+ plugin.asciidoc_string = result
122
+ track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
123
+ plugin.postprocess_asciidoc_string
124
+ end
125
+ result = plugin.asciidoc_string
126
+ end
127
+ end
128
+ result
129
+ end
130
+
131
+ @track_time_indentation = 0
132
+ def self.track_time(task)
133
+ if ReverseAdoc.config.track_time
134
+ warn " " * @track_time_indentation +
135
+ "* #{task} is starting..."
136
+ @track_time_indentation += 1
137
+ t0 = Time.now
138
+ ret = yield
139
+ time_elapsed = Time.now - t0
140
+ @track_time_indentation -= 1
141
+ warn " " * @track_time_indentation +
142
+ "* #{task} took #{time_elapsed.round(3)} seconds"
143
+ ret
144
+ else
145
+ yield
146
+ end
147
+ end
59
148
  end
60
149
  end
61
150
  end
@@ -0,0 +1,131 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ #### Plugin system general
4
+
5
+ # Allow building plugins with a shorthand syntax:
6
+ # plugin = Coradoc::ReverseAdoc::Plugin.new do
7
+ # def name = "Test"
8
+ # end
9
+
10
+ def self.new(&block)
11
+ if self == Plugin
12
+ Class.new(Plugin, &block)
13
+ else
14
+ super
15
+ end
16
+ end
17
+
18
+ def initialize
19
+ @html_tree_hooks_pre = {}
20
+ @html_tree_hooks_post = {}
21
+ end
22
+
23
+ # define name to name a Plugin
24
+ def name
25
+ self.class.name
26
+ end
27
+
28
+ #### HTML Tree functionalities
29
+
30
+ attr_accessor :html_tree
31
+
32
+ def html_tree_change_tag_name_by_css(css, new_name)
33
+ html_tree.css(css).each do |e|
34
+ e.name = new_name
35
+ end
36
+ end
37
+
38
+ def html_tree_change_properties_by_css(css, properties)
39
+ html_tree.css(css).each do |e|
40
+ properties.each do |k,v|
41
+ e[k.to_s] = v
42
+ end
43
+ end
44
+ end
45
+
46
+ def html_tree_remove_by_css(css)
47
+ html_tree.css(css).each(&:remove)
48
+ end
49
+
50
+ def html_tree_replace_with_children_by_css(css)
51
+ html_tree.css(css).each do |e|
52
+ e.replace(e.children)
53
+ end
54
+ end
55
+
56
+ def html_tree_process_to_coradoc(tree, state={})
57
+ Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
58
+ end
59
+
60
+ def html_tree_process_to_adoc(tree, state={})
61
+ Coradoc::ReverseAdoc::Converters.process(tree, state)
62
+ end
63
+
64
+ def html_tree_preview
65
+ Tempfile.open(%w"coradoc .html") do |i|
66
+ i << html_tree.to_html
67
+ system "chromium-browser", "--no-sandbox", i.path
68
+ end
69
+ end
70
+
71
+ # define preprocess_html_tree to process HTML trees
72
+
73
+ # Creates a hook to be called instead of converting an element
74
+ # to a Coradoc node.
75
+ #
76
+ # proc |html_node, state|
77
+ # coradoc_node
78
+ # end
79
+ def html_tree_add_hook_pre(element, &block)
80
+ @html_tree_hooks_pre[element] = block
81
+ end
82
+
83
+ def html_tree_add_hook_pre_by_css(css, &block)
84
+ html_tree.css(css).each do |e|
85
+ html_tree_add_hook_pre(e, &block)
86
+ end
87
+ end
88
+
89
+ # Creates a hook to be called after converting an element
90
+ # to a Coradoc node.
91
+ #
92
+ # proc |html_node, coradoc_node, state|
93
+ # coradoc_node
94
+ # end
95
+ def html_tree_add_hook_post(element, &block)
96
+ @html_tree_hooks_post[element] = block
97
+ end
98
+
99
+ def html_tree_add_hook_post_by_css(css, &block)
100
+ html_tree.css(css).each do |e|
101
+ html_tree_add_hook_post(e, &block)
102
+ end
103
+ end
104
+
105
+ def html_tree_run_hooks(node, state, &_block)
106
+ hook_pre = @html_tree_hooks_pre[node]
107
+ hook_post = @html_tree_hooks_post[node]
108
+
109
+ coradoc = hook_pre.(node, state) if hook_pre
110
+ coradoc ||= yield node, state
111
+
112
+ if hook_post
113
+ coradoc = hook_post.(node, coradoc, state)
114
+ end
115
+
116
+ coradoc
117
+ end
118
+
119
+ #### Coradoc tree functionalities
120
+
121
+ attr_accessor :coradoc_tree
122
+
123
+ # define postprocess_coradoc_tree to change coradoc tree
124
+
125
+ #### AsciiDoc string functionalities
126
+
127
+ attr_accessor :asciidoc_string
128
+
129
+ # define postprocess_asciidoc_string to change the coradoc string
130
+ end
131
+ end
@@ -0,0 +1,206 @@
1
+ module Coradoc::ReverseAdoc
2
+ class Plugin
3
+ # This plugin enhances documents from the PLATEAU project
4
+ # to extract more data.
5
+ #
6
+ # Usage:
7
+ # reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
8
+ # --external-images -u raise --output _out/index.adoc index.html
9
+ class Plateau < Plugin
10
+ def name
11
+ "PLATEAU"
12
+ end
13
+
14
+ def preprocess_html_tree
15
+ # Let's simplify the tree by removing what's extraneous
16
+ # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
17
+ # html_tree_replace_with_children_by_css("div.container_box")
18
+ # html_tree_replace_with_children_by_css("div.col.col-12")
19
+ # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
20
+ # html_tree_replace_with_children_by_css("div.row")
21
+ #
22
+ # We can remove that, but it messes up the images and paragraphs.
23
+
24
+ # Remove side menu, so we can generate TOC ourselves
25
+ html_tree_remove_by_css(".sideMenu")
26
+
27
+ # Correct non-semantic classes into semantic HTML tags
28
+ html_tree_change_tag_name_by_css(".titledata", "h1")
29
+ html_tree_change_tag_name_by_css(".subtitledata", "h2")
30
+ html_tree_change_tag_name_by_css(".pitemdata", "h3")
31
+ html_tree_change_tag_name_by_css(".sitemdata", "h4")
32
+ html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
33
+ html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
34
+ html_tree_change_tag_name_by_css('.framedata, .frame_container_box', 'aside')
35
+ html_tree_change_tag_name_by_css('.frame2data', 'pre')
36
+ # Assumption that all code snippets in those documents are XML...
37
+ html_tree_change_properties_by_css(".frame2data", class: "brush:xml;")
38
+
39
+ # Remove some CSS ids that are not important to us
40
+ html_tree_change_properties_by_css("#__nuxt", id: nil)
41
+ html_tree_change_properties_by_css("#__layout", id: nil)
42
+ html_tree_change_properties_by_css("#app", id: nil)
43
+
44
+ # Handle lists of document 02
45
+ html_tree_replace_with_children_by_css(".list_num-wrap")
46
+
47
+ # Convert table/img caption to become a caption
48
+ html_tree.css(".imagedata").each do |e|
49
+ table = e.parent.next&.children&.first
50
+ if table&.name == "table"
51
+ e.name = "caption"
52
+ table.prepend_child(e)
53
+ next
54
+ end
55
+
56
+ img = e.parent.previous&.children&.first
57
+ if img&.name == "img" && img["src"]
58
+ title = e.text.strip
59
+ img["title"] = title
60
+ e.remove
61
+ next
62
+ end
63
+ end
64
+
65
+ # Add hooks for H1, H2, H3, H4
66
+ html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
67
+ html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
68
+
69
+ # Table cells aligned to center
70
+ html_tree_change_properties_by_css(".tableTopCenter", align: "center")
71
+
72
+ # Handle non-semantic lists and indentation
73
+ html_tree_add_hook_pre_by_css ".text2data" do |node,|
74
+ text = html_tree_process_to_adoc(node).strip
75
+ next "" if text.empty? || text == "\u3000"
76
+
77
+ if text.start_with?(/\d+\./)
78
+ text = text.sub(/\A\d+.\s*/, "")
79
+ ".. #{text}\n"
80
+ else
81
+ text = text.gsub(/^/, "** ")
82
+ "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
83
+ end
84
+ end
85
+
86
+ (3..4).each do |i|
87
+ html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
88
+ text = html_tree_process_to_adoc(node).strip
89
+ next "" if text.empty? || text == "\u3000"
90
+
91
+ text = text.strip.gsub(/^/, "#{'*' * i} ")
92
+ "\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
93
+ end
94
+ end
95
+
96
+ (2..3).each do |i|
97
+ html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
98
+ text = html_tree_process_to_adoc(node.children.first.children).strip
99
+
100
+ "#{'*' * i} #{text}\n"
101
+ end
102
+ end
103
+
104
+ (1..20).each do |i|
105
+ html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
106
+ text = html_tree_process_to_adoc(node).strip
107
+
108
+ "[start=#{i}]\n. #{text}\n"
109
+ end
110
+ end
111
+
112
+ # html_tree_preview
113
+ end
114
+
115
+ IM = /[A-Z0-9]{1,3}/
116
+
117
+ def handle_headers(node, coradoc, state)
118
+ content = coradoc.content.map(&:content).join
119
+
120
+ if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
121
+ # Special content
122
+ case content.strip
123
+ when "はじめに" # Introduction
124
+ coradoc.style = "abstract" # The older version document has ".preface"
125
+ coradoc.level_int = 1
126
+ when "改定の概要" # Revision overview
127
+ coradoc.style = "abstract" # The older version document has ".preface"
128
+ coradoc.level_int = 1
129
+ when "参考文献" # Bibliography
130
+ coradoc.style = "bibliography"
131
+ coradoc.level_int = 1
132
+ when "改訂履歴" # Document history
133
+ coradoc.style = "appendix"
134
+ coradoc.level_int = 1
135
+ when "0 概要" # Overview
136
+ coradoc.style = "abstract" # I'm not sure this is correct
137
+ coradoc.level_int = 1
138
+ when "索引" # Index
139
+ coradoc.style = "index" # I'm not sure this is correct
140
+ coradoc.level_int = 1
141
+ else
142
+ warn "Unknown section #{content.inspect}"
143
+ end
144
+ end
145
+
146
+ if node.name == "h1"
147
+ if content.start_with?("Annex")
148
+ coradoc.style = "appendix"
149
+ coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, "")
150
+ end
151
+ end
152
+
153
+ # Remove numbers
154
+ coradoc.content.first.content.sub!(/\A(#{IM}\.)*#{IM}[[:space:]]/, "")
155
+
156
+ coradoc
157
+ end
158
+
159
+ def handle_headers_h4(node, coradoc, state)
160
+ title = Coradoc.strip_unicode(coradoc.content.first.content)
161
+ case title
162
+ when /\A\(\d+\)(.*)/
163
+ coradoc.level_int = 4
164
+ coradoc.content.first.content = $1.strip
165
+ coradoc
166
+ when /\A\d+\)(.*)/
167
+ coradoc.level_int = 5
168
+ coradoc.content.first.content = $1.strip
169
+ coradoc
170
+ when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/
171
+ coradoc.level_int = 4
172
+ coradoc.content.first.content = $1.strip
173
+ else
174
+ if title.empty?
175
+ # Strip instances of faulty empty paragraphs
176
+ nil
177
+ else
178
+ ["// FIXME\n", coradoc]
179
+ end
180
+ end
181
+ end
182
+
183
+ def postprocess_asciidoc_string
184
+ str = self.asciidoc_string
185
+
186
+ ### Custom indentation handling
187
+ # If there's a step up, add [none]
188
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
189
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
190
+ str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
191
+ # Collapse blocks of text[2,3]data
192
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
193
+ # In the beginning, add [none]
194
+ str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
195
+ # If following with another list, ensure we readd styling
196
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
197
+ # Otherwise, clean up
198
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
199
+
200
+ self.asciidoc_string = str
201
+ end
202
+ end
203
+ end
204
+ end
205
+
206
+ Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau