coradoc 0.3.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/exe/reverse_adoc +24 -3
- data/lib/coradoc/document.rb +1 -0
- data/lib/coradoc/element/admonition.rb +2 -2
- data/lib/coradoc/element/attribute.rb +2 -2
- data/lib/coradoc/element/attribute_list.rb +94 -15
- data/lib/coradoc/element/audio.rb +13 -2
- data/lib/coradoc/element/author.rb +4 -2
- data/lib/coradoc/element/base.rb +70 -7
- data/lib/coradoc/element/block/core.rb +8 -4
- data/lib/coradoc/element/block/quote.rb +1 -1
- data/lib/coradoc/element/block/side.rb +1 -1
- data/lib/coradoc/element/break.rb +1 -1
- data/lib/coradoc/element/document_attributes.rb +6 -6
- data/lib/coradoc/element/header.rb +4 -2
- data/lib/coradoc/element/image/block_image.rb +13 -2
- data/lib/coradoc/element/image/core.rb +37 -6
- data/lib/coradoc/element/image/inline_image.rb +2 -2
- data/lib/coradoc/element/inline/anchor.rb +4 -2
- data/lib/coradoc/element/inline/bold.rb +9 -4
- data/lib/coradoc/element/inline/cross_reference.rb +4 -2
- data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
- data/lib/coradoc/element/inline/highlight.rb +11 -6
- data/lib/coradoc/element/inline/italic.rb +9 -4
- data/lib/coradoc/element/inline/link.rb +22 -6
- data/lib/coradoc/element/inline/monospace.rb +9 -4
- data/lib/coradoc/element/inline/quotation.rb +3 -1
- data/lib/coradoc/element/inline/subscript.rb +4 -2
- data/lib/coradoc/element/inline/superscript.rb +4 -2
- data/lib/coradoc/element/list/core.rb +15 -7
- data/lib/coradoc/element/list/definition.rb +22 -1
- data/lib/coradoc/element/list/ordered.rb +1 -1
- data/lib/coradoc/element/list/unordered.rb +1 -1
- data/lib/coradoc/element/list.rb +1 -0
- data/lib/coradoc/element/list_item.rb +16 -3
- data/lib/coradoc/element/list_item_definition.rb +32 -0
- data/lib/coradoc/element/paragraph.rb +6 -4
- data/lib/coradoc/element/revision.rb +4 -2
- data/lib/coradoc/element/section.rb +27 -4
- data/lib/coradoc/element/table.rb +32 -10
- data/lib/coradoc/element/text_element.rb +48 -8
- data/lib/coradoc/element/title.rb +27 -7
- data/lib/coradoc/element/video.rb +32 -5
- data/lib/coradoc/reverse_adoc/README.adoc +14 -8
- data/lib/coradoc/reverse_adoc/cleaner.rb +21 -10
- data/lib/coradoc/reverse_adoc/config.rb +35 -16
- data/lib/coradoc/reverse_adoc/converters/a.rb +17 -12
- data/lib/coradoc/reverse_adoc/converters/aside.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/audio.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/base.rb +48 -44
- data/lib/coradoc/reverse_adoc/converters/blockquote.rb +2 -11
- data/lib/coradoc/reverse_adoc/converters/br.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/bypass.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/code.rb +5 -42
- data/lib/coradoc/reverse_adoc/converters/div.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
- data/lib/coradoc/reverse_adoc/converters/em.rb +5 -43
- data/lib/coradoc/reverse_adoc/converters/figure.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/h.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/head.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/hr.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/img.rb +30 -18
- data/lib/coradoc/reverse_adoc/converters/li.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/mark.rb +5 -11
- data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
- data/lib/coradoc/reverse_adoc/converters/ol.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/p.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/pre.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/q.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/strong.rb +5 -41
- data/lib/coradoc/reverse_adoc/converters/sub.rb +6 -4
- data/lib/coradoc/reverse_adoc/converters/sup.rb +7 -5
- data/lib/coradoc/reverse_adoc/converters/table.rb +240 -4
- data/lib/coradoc/reverse_adoc/converters/td.rb +1 -7
- data/lib/coradoc/reverse_adoc/converters/text.rb +1 -38
- data/lib/coradoc/reverse_adoc/converters/tr.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/video.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters.rb +24 -1
- data/lib/coradoc/reverse_adoc/html_converter.rb +109 -20
- data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
- data/lib/coradoc/reverse_adoc/plugins/plateau.rb +206 -0
- data/lib/coradoc/reverse_adoc/postprocessor.rb +152 -0
- data/lib/coradoc/reverse_adoc.rb +3 -0
- data/lib/coradoc/util.rb +10 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc.rb +1 -0
- data/lib/reverse_adoc.rb +1 -1
- metadata +9 -3
- data/lib/coradoc/element/inline/image.rb +0 -26
@@ -4,11 +4,7 @@ module Coradoc::ReverseAdoc
|
|
4
4
|
def to_coradoc(node, state = {})
|
5
5
|
return treat_empty(node, state) if node.text.strip.empty?
|
6
6
|
|
7
|
-
Coradoc::Element::TextElement.new(
|
8
|
-
end
|
9
|
-
|
10
|
-
def convert(node, state = {})
|
11
|
-
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
|
7
|
+
Coradoc::Element::TextElement.new(node.text)
|
12
8
|
end
|
13
9
|
|
14
10
|
private
|
@@ -25,39 +21,6 @@ module Coradoc::ReverseAdoc
|
|
25
21
|
""
|
26
22
|
end
|
27
23
|
end
|
28
|
-
|
29
|
-
def treat_text(node)
|
30
|
-
text = node.text
|
31
|
-
text = preserve_nbsp(text)
|
32
|
-
text = remove_border_newlines(text)
|
33
|
-
text = remove_inner_newlines(text)
|
34
|
-
text = escape_keychars(text)
|
35
|
-
|
36
|
-
text = preserve_keychars_within_backticks(text)
|
37
|
-
escape_links(text)
|
38
|
-
end
|
39
|
-
|
40
|
-
def preserve_nbsp(text)
|
41
|
-
text.gsub(/\u00A0/, " ")
|
42
|
-
end
|
43
|
-
|
44
|
-
def escape_links(text)
|
45
|
-
text.gsub(/<<([^>]*)>>/, "\\<<\\1>>")
|
46
|
-
end
|
47
|
-
|
48
|
-
def remove_border_newlines(text)
|
49
|
-
text.gsub(/\A\n+/, "").gsub(/\n+\z/, "")
|
50
|
-
end
|
51
|
-
|
52
|
-
def remove_inner_newlines(text)
|
53
|
-
text.tr("\n\t", " ").squeeze(" ")
|
54
|
-
end
|
55
|
-
|
56
|
-
def preserve_keychars_within_backticks(text)
|
57
|
-
text.gsub(/`.*?`/) do |match|
|
58
|
-
match.gsub('\_', "_").gsub('\*', "*")
|
59
|
-
end
|
60
|
-
end
|
61
24
|
end
|
62
25
|
|
63
26
|
register :text, Text.new
|
@@ -7,10 +7,6 @@ module Coradoc::ReverseAdoc
|
|
7
7
|
Coradoc::Element::Table::Row.new(content, header)
|
8
8
|
end
|
9
9
|
|
10
|
-
def convert(node, state = {})
|
11
|
-
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
|
12
|
-
end
|
13
|
-
|
14
10
|
def table_header_row?(node)
|
15
11
|
# node.element_children.all? {|child| child.name.to_sym == :th}
|
16
12
|
node.previous_element.nil?
|
@@ -10,7 +10,30 @@ module Coradoc::ReverseAdoc
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def self.lookup(tag_name)
|
13
|
-
@@converters[tag_name.to_sym]
|
13
|
+
converter = @@converters[tag_name.to_sym] || default_converter(tag_name)
|
14
|
+
converter = converter.new if converter.respond_to? :new
|
15
|
+
converter
|
16
|
+
end
|
17
|
+
|
18
|
+
# Note: process won't run plugin hooks
|
19
|
+
def self.process(node, state)
|
20
|
+
node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
|
21
|
+
return node.map { |i| process(i, state) }.join if node.is_a? Array
|
22
|
+
|
23
|
+
lookup(node.name).convert(node, state)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.process_coradoc(node, state)
|
27
|
+
node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
|
28
|
+
return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
|
29
|
+
|
30
|
+
plugins = state[:plugin_instances] || {}
|
31
|
+
process = proc { lookup(node.name).to_coradoc(node, state) }
|
32
|
+
plugins.each do |i|
|
33
|
+
prev_process = process
|
34
|
+
process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
|
35
|
+
end
|
36
|
+
process.(node, state)
|
14
37
|
end
|
15
38
|
|
16
39
|
def self.default_converter(tag_name)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative "converters/markup"
|
3
4
|
require_relative "converters/a"
|
4
5
|
require_relative "converters/aside"
|
5
6
|
require_relative "converters/audio"
|
@@ -8,6 +9,7 @@ require_relative "converters/br"
|
|
8
9
|
require_relative "converters/bypass"
|
9
10
|
require_relative "converters/code"
|
10
11
|
require_relative "converters/div"
|
12
|
+
require_relative "converters/dl"
|
11
13
|
require_relative "converters/drop"
|
12
14
|
require_relative "converters/em"
|
13
15
|
require_relative "converters/figure"
|
@@ -34,28 +36,115 @@ require_relative "converters/tr"
|
|
34
36
|
require_relative "converters/video"
|
35
37
|
require_relative "converters/math"
|
36
38
|
|
37
|
-
module Coradoc
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
39
|
+
module Coradoc
|
40
|
+
module ReverseAdoc
|
41
|
+
class HtmlConverter
|
42
|
+
def self.to_coradoc(input, options = {})
|
43
|
+
plugin_instances = options.delete(:plugin_instances)
|
44
|
+
ReverseAdoc.config.with(options) do
|
45
|
+
plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
|
46
|
+
|
47
|
+
root = track_time "Loading input HTML document" do
|
48
|
+
case input
|
49
|
+
when String
|
50
|
+
Nokogiri::HTML(input).root
|
51
|
+
when Nokogiri::XML::Document
|
52
|
+
input.root
|
53
|
+
when Nokogiri::XML::Node
|
54
|
+
input
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
return "" unless root
|
59
|
+
|
60
|
+
plugin_instances.each do |plugin|
|
61
|
+
plugin.html_tree = root
|
62
|
+
if plugin.respond_to?(:preprocess_html_tree)
|
63
|
+
track_time "Preprocessing document with #{plugin.name} plugin" do
|
64
|
+
plugin.preprocess_html_tree
|
65
|
+
end
|
66
|
+
end
|
67
|
+
root = plugin.html_tree
|
68
|
+
end
|
69
|
+
|
70
|
+
coradoc = track_time "Converting input document tree to Coradoc tree" do
|
71
|
+
Converters.process_coradoc(root, plugin_instances: plugin_instances)
|
72
|
+
end
|
73
|
+
|
74
|
+
coradoc = track_time "Post-process Coradoc tree" do
|
75
|
+
Postprocessor.process(coradoc)
|
76
|
+
end
|
77
|
+
|
78
|
+
plugin_instances.each do |plugin|
|
79
|
+
if plugin.respond_to?(:postprocess_coradoc_tree)
|
80
|
+
plugin.coradoc_tree = coradoc
|
81
|
+
track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
|
82
|
+
plugin.postprocess_coradoc_tree
|
83
|
+
end
|
84
|
+
coradoc = plugin.coradoc_tree
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
coradoc
|
89
|
+
end
|
53
90
|
end
|
54
|
-
end
|
55
91
|
|
56
|
-
|
57
|
-
|
58
|
-
|
92
|
+
def self.convert(input, options = {})
|
93
|
+
ReverseAdoc.config.with(options) do
|
94
|
+
plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
|
95
|
+
|
96
|
+
options = options.merge(plugin_instances: plugin_instances)
|
97
|
+
|
98
|
+
coradoc = to_coradoc(input, options)
|
99
|
+
|
100
|
+
if coradoc.is_a?(Hash)
|
101
|
+
coradoc.to_h do |file, tree|
|
102
|
+
track_time "Converting file #{file || 'main'}" do
|
103
|
+
[file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
else
|
107
|
+
convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
|
113
|
+
result = track_time "Converting Coradoc tree into Asciidoc" do
|
114
|
+
Coradoc::Generator.gen_adoc(coradoc)
|
115
|
+
end
|
116
|
+
result = track_time "Cleaning up the result" do
|
117
|
+
ReverseAdoc.cleaner.tidy(result)
|
118
|
+
end
|
119
|
+
plugin_instances.each do |plugin|
|
120
|
+
if plugin.respond_to?(:postprocess_asciidoc_string)
|
121
|
+
plugin.asciidoc_string = result
|
122
|
+
track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
|
123
|
+
plugin.postprocess_asciidoc_string
|
124
|
+
end
|
125
|
+
result = plugin.asciidoc_string
|
126
|
+
end
|
127
|
+
end
|
128
|
+
result
|
129
|
+
end
|
130
|
+
|
131
|
+
@track_time_indentation = 0
|
132
|
+
def self.track_time(task)
|
133
|
+
if ReverseAdoc.config.track_time
|
134
|
+
warn " " * @track_time_indentation +
|
135
|
+
"* #{task} is starting..."
|
136
|
+
@track_time_indentation += 1
|
137
|
+
t0 = Time.now
|
138
|
+
ret = yield
|
139
|
+
time_elapsed = Time.now - t0
|
140
|
+
@track_time_indentation -= 1
|
141
|
+
warn " " * @track_time_indentation +
|
142
|
+
"* #{task} took #{time_elapsed.round(3)} seconds"
|
143
|
+
ret
|
144
|
+
else
|
145
|
+
yield
|
146
|
+
end
|
147
|
+
end
|
59
148
|
end
|
60
149
|
end
|
61
150
|
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
class Plugin
|
3
|
+
#### Plugin system general
|
4
|
+
|
5
|
+
# Allow building plugins with a shorthand syntax:
|
6
|
+
# plugin = Coradoc::ReverseAdoc::Plugin.new do
|
7
|
+
# def name = "Test"
|
8
|
+
# end
|
9
|
+
|
10
|
+
def self.new(&block)
|
11
|
+
if self == Plugin
|
12
|
+
Class.new(Plugin, &block)
|
13
|
+
else
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
@html_tree_hooks_pre = {}
|
20
|
+
@html_tree_hooks_post = {}
|
21
|
+
end
|
22
|
+
|
23
|
+
# define name to name a Plugin
|
24
|
+
def name
|
25
|
+
self.class.name
|
26
|
+
end
|
27
|
+
|
28
|
+
#### HTML Tree functionalities
|
29
|
+
|
30
|
+
attr_accessor :html_tree
|
31
|
+
|
32
|
+
def html_tree_change_tag_name_by_css(css, new_name)
|
33
|
+
html_tree.css(css).each do |e|
|
34
|
+
e.name = new_name
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def html_tree_change_properties_by_css(css, properties)
|
39
|
+
html_tree.css(css).each do |e|
|
40
|
+
properties.each do |k,v|
|
41
|
+
e[k.to_s] = v
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def html_tree_remove_by_css(css)
|
47
|
+
html_tree.css(css).each(&:remove)
|
48
|
+
end
|
49
|
+
|
50
|
+
def html_tree_replace_with_children_by_css(css)
|
51
|
+
html_tree.css(css).each do |e|
|
52
|
+
e.replace(e.children)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def html_tree_process_to_coradoc(tree, state={})
|
57
|
+
Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
|
58
|
+
end
|
59
|
+
|
60
|
+
def html_tree_process_to_adoc(tree, state={})
|
61
|
+
Coradoc::ReverseAdoc::Converters.process(tree, state)
|
62
|
+
end
|
63
|
+
|
64
|
+
def html_tree_preview
|
65
|
+
Tempfile.open(%w"coradoc .html") do |i|
|
66
|
+
i << html_tree.to_html
|
67
|
+
system "chromium-browser", "--no-sandbox", i.path
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# define preprocess_html_tree to process HTML trees
|
72
|
+
|
73
|
+
# Creates a hook to be called instead of converting an element
|
74
|
+
# to a Coradoc node.
|
75
|
+
#
|
76
|
+
# proc |html_node, state|
|
77
|
+
# coradoc_node
|
78
|
+
# end
|
79
|
+
def html_tree_add_hook_pre(element, &block)
|
80
|
+
@html_tree_hooks_pre[element] = block
|
81
|
+
end
|
82
|
+
|
83
|
+
def html_tree_add_hook_pre_by_css(css, &block)
|
84
|
+
html_tree.css(css).each do |e|
|
85
|
+
html_tree_add_hook_pre(e, &block)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Creates a hook to be called after converting an element
|
90
|
+
# to a Coradoc node.
|
91
|
+
#
|
92
|
+
# proc |html_node, coradoc_node, state|
|
93
|
+
# coradoc_node
|
94
|
+
# end
|
95
|
+
def html_tree_add_hook_post(element, &block)
|
96
|
+
@html_tree_hooks_post[element] = block
|
97
|
+
end
|
98
|
+
|
99
|
+
def html_tree_add_hook_post_by_css(css, &block)
|
100
|
+
html_tree.css(css).each do |e|
|
101
|
+
html_tree_add_hook_post(e, &block)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def html_tree_run_hooks(node, state, &_block)
|
106
|
+
hook_pre = @html_tree_hooks_pre[node]
|
107
|
+
hook_post = @html_tree_hooks_post[node]
|
108
|
+
|
109
|
+
coradoc = hook_pre.(node, state) if hook_pre
|
110
|
+
coradoc ||= yield node, state
|
111
|
+
|
112
|
+
if hook_post
|
113
|
+
coradoc = hook_post.(node, coradoc, state)
|
114
|
+
end
|
115
|
+
|
116
|
+
coradoc
|
117
|
+
end
|
118
|
+
|
119
|
+
#### Coradoc tree functionalities
|
120
|
+
|
121
|
+
attr_accessor :coradoc_tree
|
122
|
+
|
123
|
+
# define postprocess_coradoc_tree to change coradoc tree
|
124
|
+
|
125
|
+
#### AsciiDoc string functionalities
|
126
|
+
|
127
|
+
attr_accessor :asciidoc_string
|
128
|
+
|
129
|
+
# define postprocess_asciidoc_string to change the coradoc string
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
class Plugin
|
3
|
+
# This plugin enhances documents from the PLATEAU project
|
4
|
+
# to extract more data.
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
# reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
|
8
|
+
# --external-images -u raise --output _out/index.adoc index.html
|
9
|
+
class Plateau < Plugin
|
10
|
+
def name
|
11
|
+
"PLATEAU"
|
12
|
+
end
|
13
|
+
|
14
|
+
def preprocess_html_tree
|
15
|
+
# Let's simplify the tree by removing what's extraneous
|
16
|
+
# html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
|
17
|
+
# html_tree_replace_with_children_by_css("div.container_box")
|
18
|
+
# html_tree_replace_with_children_by_css("div.col.col-12")
|
19
|
+
# html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
|
20
|
+
# html_tree_replace_with_children_by_css("div.row")
|
21
|
+
#
|
22
|
+
# We can remove that, but it messes up the images and paragraphs.
|
23
|
+
|
24
|
+
# Remove side menu, so we can generate TOC ourselves
|
25
|
+
html_tree_remove_by_css(".sideMenu")
|
26
|
+
|
27
|
+
# Correct non-semantic classes into semantic HTML tags
|
28
|
+
html_tree_change_tag_name_by_css(".titledata", "h1")
|
29
|
+
html_tree_change_tag_name_by_css(".subtitledata", "h2")
|
30
|
+
html_tree_change_tag_name_by_css(".pitemdata", "h3")
|
31
|
+
html_tree_change_tag_name_by_css(".sitemdata", "h4")
|
32
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
|
33
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
|
34
|
+
html_tree_change_tag_name_by_css('.framedata, .frame_container_box', 'aside')
|
35
|
+
html_tree_change_tag_name_by_css('.frame2data', 'pre')
|
36
|
+
# Assumption that all code snippets in those documents are XML...
|
37
|
+
html_tree_change_properties_by_css(".frame2data", class: "brush:xml;")
|
38
|
+
|
39
|
+
# Remove some CSS ids that are not important to us
|
40
|
+
html_tree_change_properties_by_css("#__nuxt", id: nil)
|
41
|
+
html_tree_change_properties_by_css("#__layout", id: nil)
|
42
|
+
html_tree_change_properties_by_css("#app", id: nil)
|
43
|
+
|
44
|
+
# Handle lists of document 02
|
45
|
+
html_tree_replace_with_children_by_css(".list_num-wrap")
|
46
|
+
|
47
|
+
# Convert table/img caption to become a caption
|
48
|
+
html_tree.css(".imagedata").each do |e|
|
49
|
+
table = e.parent.next&.children&.first
|
50
|
+
if table&.name == "table"
|
51
|
+
e.name = "caption"
|
52
|
+
table.prepend_child(e)
|
53
|
+
next
|
54
|
+
end
|
55
|
+
|
56
|
+
img = e.parent.previous&.children&.first
|
57
|
+
if img&.name == "img" && img["src"]
|
58
|
+
title = e.text.strip
|
59
|
+
img["title"] = title
|
60
|
+
e.remove
|
61
|
+
next
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Add hooks for H1, H2, H3, H4
|
66
|
+
html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
|
67
|
+
html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
|
68
|
+
|
69
|
+
# Table cells aligned to center
|
70
|
+
html_tree_change_properties_by_css(".tableTopCenter", align: "center")
|
71
|
+
|
72
|
+
# Handle non-semantic lists and indentation
|
73
|
+
html_tree_add_hook_pre_by_css ".text2data" do |node,|
|
74
|
+
text = html_tree_process_to_adoc(node).strip
|
75
|
+
next "" if text.empty? || text == "\u3000"
|
76
|
+
|
77
|
+
if text.start_with?(/\d+\./)
|
78
|
+
text = text.sub(/\A\d+.\s*/, "")
|
79
|
+
".. #{text}\n"
|
80
|
+
else
|
81
|
+
text = text.gsub(/^/, "** ")
|
82
|
+
"\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
(3..4).each do |i|
|
87
|
+
html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
|
88
|
+
text = html_tree_process_to_adoc(node).strip
|
89
|
+
next "" if text.empty? || text == "\u3000"
|
90
|
+
|
91
|
+
text = text.strip.gsub(/^/, "#{'*' * i} ")
|
92
|
+
"\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
(2..3).each do |i|
|
97
|
+
html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
|
98
|
+
text = html_tree_process_to_adoc(node.children.first.children).strip
|
99
|
+
|
100
|
+
"#{'*' * i} #{text}\n"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
(1..20).each do |i|
|
105
|
+
html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
|
106
|
+
text = html_tree_process_to_adoc(node).strip
|
107
|
+
|
108
|
+
"[start=#{i}]\n. #{text}\n"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# html_tree_preview
|
113
|
+
end
|
114
|
+
|
115
|
+
IM = /[A-Z0-9]{1,3}/
|
116
|
+
|
117
|
+
def handle_headers(node, coradoc, state)
|
118
|
+
content = coradoc.content.map(&:content).join
|
119
|
+
|
120
|
+
if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
|
121
|
+
# Special content
|
122
|
+
case content.strip
|
123
|
+
when "はじめに" # Introduction
|
124
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
125
|
+
coradoc.level_int = 1
|
126
|
+
when "改定の概要" # Revision overview
|
127
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
128
|
+
coradoc.level_int = 1
|
129
|
+
when "参考文献" # Bibliography
|
130
|
+
coradoc.style = "bibliography"
|
131
|
+
coradoc.level_int = 1
|
132
|
+
when "改訂履歴" # Document history
|
133
|
+
coradoc.style = "appendix"
|
134
|
+
coradoc.level_int = 1
|
135
|
+
when "0 概要" # Overview
|
136
|
+
coradoc.style = "abstract" # I'm not sure this is correct
|
137
|
+
coradoc.level_int = 1
|
138
|
+
when "索引" # Index
|
139
|
+
coradoc.style = "index" # I'm not sure this is correct
|
140
|
+
coradoc.level_int = 1
|
141
|
+
else
|
142
|
+
warn "Unknown section #{content.inspect}"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
if node.name == "h1"
|
147
|
+
if content.start_with?("Annex")
|
148
|
+
coradoc.style = "appendix"
|
149
|
+
coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, "")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# Remove numbers
|
154
|
+
coradoc.content.first.content.sub!(/\A(#{IM}\.)*#{IM}[[:space:]]/, "")
|
155
|
+
|
156
|
+
coradoc
|
157
|
+
end
|
158
|
+
|
159
|
+
def handle_headers_h4(node, coradoc, state)
|
160
|
+
title = Coradoc.strip_unicode(coradoc.content.first.content)
|
161
|
+
case title
|
162
|
+
when /\A\(\d+\)(.*)/
|
163
|
+
coradoc.level_int = 4
|
164
|
+
coradoc.content.first.content = $1.strip
|
165
|
+
coradoc
|
166
|
+
when /\A\d+\)(.*)/
|
167
|
+
coradoc.level_int = 5
|
168
|
+
coradoc.content.first.content = $1.strip
|
169
|
+
coradoc
|
170
|
+
when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/
|
171
|
+
coradoc.level_int = 4
|
172
|
+
coradoc.content.first.content = $1.strip
|
173
|
+
else
|
174
|
+
if title.empty?
|
175
|
+
# Strip instances of faulty empty paragraphs
|
176
|
+
nil
|
177
|
+
else
|
178
|
+
["// FIXME\n", coradoc]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def postprocess_asciidoc_string
|
184
|
+
str = self.asciidoc_string
|
185
|
+
|
186
|
+
### Custom indentation handling
|
187
|
+
# If there's a step up, add [none]
|
188
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
|
189
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
|
190
|
+
str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
|
191
|
+
# Collapse blocks of text[2,3]data
|
192
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
|
193
|
+
# In the beginning, add [none]
|
194
|
+
str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
|
195
|
+
# If following with another list, ensure we readd styling
|
196
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
|
197
|
+
# Otherwise, clean up
|
198
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
|
199
|
+
|
200
|
+
self.asciidoc_string = str
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau
|