coradoc 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.docker/Dockerfile +1 -1
- data/.docker/docker-compose.yml +2 -2
- data/.editorconfig +15 -0
- data/CHANGELOG.md +4 -0
- data/README.md +4 -0
- data/Rakefile +10 -0
- data/coradoc.gemspec +11 -2
- data/exe/reverse_adoc +91 -0
- data/exe/w2a +72 -0
- data/lib/coradoc/document.rb +6 -6
- data/lib/coradoc/element/admonition.rb +8 -6
- data/lib/coradoc/element/attribute.rb +2 -2
- data/lib/coradoc/element/attribute_list.rb +94 -15
- data/lib/coradoc/element/audio.rb +14 -3
- data/lib/coradoc/element/author.rb +18 -14
- data/lib/coradoc/element/base.rb +69 -8
- data/lib/coradoc/element/block/core.rb +10 -6
- data/lib/coradoc/element/block/literal.rb +1 -1
- data/lib/coradoc/element/block/quote.rb +1 -1
- data/lib/coradoc/element/block/sourcecode.rb +2 -2
- data/lib/coradoc/element/break.rb +1 -1
- data/lib/coradoc/element/document_attributes.rb +6 -6
- data/lib/coradoc/element/header.rb +4 -2
- data/lib/coradoc/element/image/block_image.rb +13 -2
- data/lib/coradoc/element/image/core.rb +35 -5
- data/lib/coradoc/element/image/inline_image.rb +2 -2
- data/lib/coradoc/element/image.rb +0 -1
- data/lib/coradoc/element/inline/anchor.rb +4 -2
- data/lib/coradoc/element/inline/bold.rb +10 -4
- data/lib/coradoc/element/inline/cross_reference.rb +4 -2
- data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
- data/lib/coradoc/element/inline/highlight.rb +12 -6
- data/lib/coradoc/element/inline/italic.rb +10 -4
- data/lib/coradoc/element/inline/link.rb +26 -10
- data/lib/coradoc/element/inline/monospace.rb +10 -4
- data/lib/coradoc/element/inline/quotation.rb +4 -1
- data/lib/coradoc/element/inline/subscript.rb +5 -2
- data/lib/coradoc/element/inline/superscript.rb +5 -2
- data/lib/coradoc/element/inline.rb +0 -1
- data/lib/coradoc/element/list/core.rb +10 -8
- data/lib/coradoc/element/list/definition.rb +19 -0
- data/lib/coradoc/element/list/ordered.rb +1 -1
- data/lib/coradoc/element/list/unordered.rb +1 -1
- data/lib/coradoc/element/list.rb +1 -1
- data/lib/coradoc/element/list_item.rb +9 -4
- data/lib/coradoc/element/list_item_definition.rb +32 -0
- data/lib/coradoc/element/paragraph.rb +5 -3
- data/lib/coradoc/element/revision.rb +20 -16
- data/lib/coradoc/element/section.rb +21 -4
- data/lib/coradoc/element/table.rb +36 -19
- data/lib/coradoc/element/text_element.rb +63 -17
- data/lib/coradoc/element/title.rb +27 -7
- data/lib/coradoc/element/video.rb +33 -6
- data/lib/coradoc/generator.rb +2 -2
- data/lib/coradoc/legacy_parser.rb +41 -41
- data/lib/coradoc/oscal.rb +2 -4
- data/lib/coradoc/parser/asciidoc/content.rb +15 -15
- data/lib/coradoc/parser/asciidoc/document_attributes.rb +1 -1
- data/lib/coradoc/parser/asciidoc/header.rb +6 -6
- data/lib/coradoc/parser/asciidoc/section.rb +1 -1
- data/lib/coradoc/reverse_adoc/LICENSE.txt +25 -0
- data/lib/coradoc/reverse_adoc/README.adoc +308 -0
- data/lib/coradoc/reverse_adoc/cleaner.rb +125 -0
- data/lib/coradoc/reverse_adoc/config.rb +73 -0
- data/lib/coradoc/reverse_adoc/converters/a.rb +47 -0
- data/lib/coradoc/reverse_adoc/converters/aside.rb +12 -0
- data/lib/coradoc/reverse_adoc/converters/audio.rb +25 -0
- data/lib/coradoc/reverse_adoc/converters/base.rb +104 -0
- data/lib/coradoc/reverse_adoc/converters/blockquote.rb +18 -0
- data/lib/coradoc/reverse_adoc/converters/br.rb +11 -0
- data/lib/coradoc/reverse_adoc/converters/bypass.rb +77 -0
- data/lib/coradoc/reverse_adoc/converters/code.rb +19 -0
- data/lib/coradoc/reverse_adoc/converters/div.rb +14 -0
- data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
- data/lib/coradoc/reverse_adoc/converters/drop.rb +22 -0
- data/lib/coradoc/reverse_adoc/converters/em.rb +17 -0
- data/lib/coradoc/reverse_adoc/converters/figure.rb +21 -0
- data/lib/coradoc/reverse_adoc/converters/h.rb +38 -0
- data/lib/coradoc/reverse_adoc/converters/head.rb +19 -0
- data/lib/coradoc/reverse_adoc/converters/hr.rb +11 -0
- data/lib/coradoc/reverse_adoc/converters/ignore.rb +16 -0
- data/lib/coradoc/reverse_adoc/converters/img.rb +98 -0
- data/lib/coradoc/reverse_adoc/converters/li.rb +13 -0
- data/lib/coradoc/reverse_adoc/converters/mark.rb +15 -0
- data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
- data/lib/coradoc/reverse_adoc/converters/math.rb +31 -0
- data/lib/coradoc/reverse_adoc/converters/ol.rb +60 -0
- data/lib/coradoc/reverse_adoc/converters/p.rb +19 -0
- data/lib/coradoc/reverse_adoc/converters/pass_through.rb +13 -0
- data/lib/coradoc/reverse_adoc/converters/pre.rb +51 -0
- data/lib/coradoc/reverse_adoc/converters/q.rb +12 -0
- data/lib/coradoc/reverse_adoc/converters/strong.rb +16 -0
- data/lib/coradoc/reverse_adoc/converters/sub.rb +18 -0
- data/lib/coradoc/reverse_adoc/converters/sup.rb +18 -0
- data/lib/coradoc/reverse_adoc/converters/table.rb +280 -0
- data/lib/coradoc/reverse_adoc/converters/td.rb +77 -0
- data/lib/coradoc/reverse_adoc/converters/text.rb +28 -0
- data/lib/coradoc/reverse_adoc/converters/th.rb +14 -0
- data/lib/coradoc/reverse_adoc/converters/tr.rb +18 -0
- data/lib/coradoc/reverse_adoc/converters/video.rb +25 -0
- data/lib/coradoc/reverse_adoc/converters.rb +53 -0
- data/lib/coradoc/reverse_adoc/errors.rb +10 -0
- data/lib/coradoc/reverse_adoc/html_converter.rb +150 -0
- data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
- data/lib/coradoc/reverse_adoc/plugins/plateau.rb +174 -0
- data/lib/coradoc/reverse_adoc/postprocessor.rb +148 -0
- data/lib/coradoc/reverse_adoc.rb +30 -0
- data/lib/coradoc/transformer.rb +24 -14
- data/lib/coradoc/version.rb +1 -1
- data/lib/reverse_adoc.rb +20 -0
- metadata +184 -5
- data/lib/coradoc/element/inline/image.rb +0 -25
@@ -0,0 +1,25 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
module Converters
|
3
|
+
class Video < Base
|
4
|
+
def to_coradoc(node, _state = {})
|
5
|
+
src = node["src"]
|
6
|
+
id = node["id"]
|
7
|
+
title = extract_title(node)
|
8
|
+
attributes = Coradoc::Element::AttributeList.new
|
9
|
+
options = options(node)
|
10
|
+
attributes.add_named("options", options) if options.any?
|
11
|
+
Coradoc::Element::Video.new(title, id: id, src: src,
|
12
|
+
attributes: attributes)
|
13
|
+
end
|
14
|
+
|
15
|
+
def options(node)
|
16
|
+
autoplay = node["autoplay"]
|
17
|
+
loop_attr = node["loop"]
|
18
|
+
controls = node["controls"]
|
19
|
+
[autoplay, loop_attr, controls].compact
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
register :video, Video.new
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
module Converters
|
3
|
+
def self.register(tag_name, converter)
|
4
|
+
@@converters ||= {}
|
5
|
+
@@converters[tag_name.to_sym] = converter
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.unregister(tag_name)
|
9
|
+
@@converters.delete(tag_name.to_sym)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.lookup(tag_name)
|
13
|
+
@@converters[tag_name.to_sym] or default_converter(tag_name)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Note: process won't run plugin hooks
|
17
|
+
def self.process(node, state)
|
18
|
+
node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
|
19
|
+
return node.map { |i| process(i, state) }.join if node.is_a? Array
|
20
|
+
|
21
|
+
lookup(node.name).convert(node, state)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.process_coradoc(node, state)
|
25
|
+
node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
|
26
|
+
return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
|
27
|
+
|
28
|
+
plugins = state[:plugin_instances] || {}
|
29
|
+
process = proc { lookup(node.name).to_coradoc(node, state) }
|
30
|
+
plugins.each do |i|
|
31
|
+
prev_process = process
|
32
|
+
process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
|
33
|
+
end
|
34
|
+
process.(node, state)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.default_converter(tag_name)
|
38
|
+
case Coradoc::ReverseAdoc.config.unknown_tags.to_sym
|
39
|
+
when :pass_through
|
40
|
+
Coradoc::ReverseAdoc::Converters::PassThrough.new
|
41
|
+
when :drop
|
42
|
+
Coradoc::ReverseAdoc::Converters::Drop.new
|
43
|
+
when :bypass
|
44
|
+
Coradoc::ReverseAdoc::Converters::Bypass.new
|
45
|
+
when :raise
|
46
|
+
raise UnknownTagError, "unknown tag: #{tag_name}"
|
47
|
+
else
|
48
|
+
raise InvalidConfigurationError,
|
49
|
+
"unknown value #{Coradoc::ReverseAdoc.config.unknown_tags.inspect} for Coradoc::ReverseAdoc.config.unknown_tags"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "converters/markup"
|
4
|
+
require_relative "converters/a"
|
5
|
+
require_relative "converters/aside"
|
6
|
+
require_relative "converters/audio"
|
7
|
+
require_relative "converters/blockquote"
|
8
|
+
require_relative "converters/br"
|
9
|
+
require_relative "converters/bypass"
|
10
|
+
require_relative "converters/code"
|
11
|
+
require_relative "converters/div"
|
12
|
+
require_relative "converters/dl"
|
13
|
+
require_relative "converters/drop"
|
14
|
+
require_relative "converters/em"
|
15
|
+
require_relative "converters/figure"
|
16
|
+
require_relative "converters/h"
|
17
|
+
require_relative "converters/head"
|
18
|
+
require_relative "converters/hr"
|
19
|
+
require_relative "converters/ignore"
|
20
|
+
require_relative "converters/img"
|
21
|
+
require_relative "converters/mark"
|
22
|
+
require_relative "converters/li"
|
23
|
+
require_relative "converters/ol"
|
24
|
+
require_relative "converters/p"
|
25
|
+
require_relative "converters/pass_through"
|
26
|
+
require_relative "converters/pre"
|
27
|
+
require_relative "converters/q"
|
28
|
+
require_relative "converters/strong"
|
29
|
+
require_relative "converters/sup"
|
30
|
+
require_relative "converters/sub"
|
31
|
+
require_relative "converters/table"
|
32
|
+
require_relative "converters/td"
|
33
|
+
require_relative "converters/th"
|
34
|
+
require_relative "converters/text"
|
35
|
+
require_relative "converters/tr"
|
36
|
+
require_relative "converters/video"
|
37
|
+
require_relative "converters/math"
|
38
|
+
|
39
|
+
module Coradoc
|
40
|
+
module ReverseAdoc
|
41
|
+
class HtmlConverter
|
42
|
+
def self.to_coradoc(input, options = {})
|
43
|
+
plugin_instances = options.delete(:plugin_instances)
|
44
|
+
ReverseAdoc.config.with(options) do
|
45
|
+
plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
|
46
|
+
|
47
|
+
root = track_time "Loading input HTML document" do
|
48
|
+
case input
|
49
|
+
when String
|
50
|
+
Nokogiri::HTML(input).root
|
51
|
+
when Nokogiri::XML::Document
|
52
|
+
input.root
|
53
|
+
when Nokogiri::XML::Node
|
54
|
+
input
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
return "" unless root
|
59
|
+
|
60
|
+
plugin_instances.each do |plugin|
|
61
|
+
plugin.html_tree = root
|
62
|
+
if plugin.respond_to?(:preprocess_html_tree)
|
63
|
+
track_time "Preprocessing document with #{plugin.name} plugin" do
|
64
|
+
plugin.preprocess_html_tree
|
65
|
+
end
|
66
|
+
end
|
67
|
+
root = plugin.html_tree
|
68
|
+
end
|
69
|
+
|
70
|
+
coradoc = track_time "Converting input document tree to Coradoc tree" do
|
71
|
+
Converters.process_coradoc(root, plugin_instances: plugin_instances)
|
72
|
+
end
|
73
|
+
|
74
|
+
coradoc = track_time "Post-process Coradoc tree" do
|
75
|
+
Postprocessor.process(coradoc)
|
76
|
+
end
|
77
|
+
|
78
|
+
plugin_instances.each do |plugin|
|
79
|
+
if plugin.respond_to?(:postprocess_coradoc_tree)
|
80
|
+
plugin.coradoc_tree = coradoc
|
81
|
+
track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
|
82
|
+
plugin.postprocess_coradoc_tree
|
83
|
+
end
|
84
|
+
coradoc = plugin.coradoc_tree
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
coradoc
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.convert(input, options = {})
|
93
|
+
ReverseAdoc.config.with(options) do
|
94
|
+
plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
|
95
|
+
|
96
|
+
options = options.merge(plugin_instances: plugin_instances)
|
97
|
+
|
98
|
+
coradoc = to_coradoc(input, options)
|
99
|
+
|
100
|
+
if coradoc.is_a?(Hash)
|
101
|
+
coradoc.to_h do |file, tree|
|
102
|
+
track_time "Converting file #{file || 'main'}" do
|
103
|
+
[file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
else
|
107
|
+
convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
|
113
|
+
result = track_time "Converting Coradoc tree into Asciidoc" do
|
114
|
+
Coradoc::Generator.gen_adoc(coradoc)
|
115
|
+
end
|
116
|
+
result = track_time "Cleaning up the result" do
|
117
|
+
ReverseAdoc.cleaner.tidy(result)
|
118
|
+
end
|
119
|
+
plugin_instances.each do |plugin|
|
120
|
+
if plugin.respond_to?(:postprocess_asciidoc_string)
|
121
|
+
plugin.asciidoc_string = result
|
122
|
+
track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
|
123
|
+
plugin.postprocess_asciidoc_string
|
124
|
+
end
|
125
|
+
result = plugin.asciidoc_string
|
126
|
+
end
|
127
|
+
end
|
128
|
+
result
|
129
|
+
end
|
130
|
+
|
131
|
+
@track_time_indentation = 0
|
132
|
+
def self.track_time(task)
|
133
|
+
if ReverseAdoc.config.track_time
|
134
|
+
warn " " * @track_time_indentation +
|
135
|
+
"* #{task} is starting..."
|
136
|
+
@track_time_indentation += 1
|
137
|
+
t0 = Time.now
|
138
|
+
ret = yield
|
139
|
+
time_elapsed = Time.now - t0
|
140
|
+
@track_time_indentation -= 1
|
141
|
+
warn " " * @track_time_indentation +
|
142
|
+
"* #{task} took #{time_elapsed.round(3)} seconds"
|
143
|
+
ret
|
144
|
+
else
|
145
|
+
yield
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
class Plugin
|
3
|
+
#### Plugin system general
|
4
|
+
|
5
|
+
# Allow building plugins with a shorthand syntax:
|
6
|
+
# plugin = Coradoc::ReverseAdoc::Plugin.new do
|
7
|
+
# def name = "Test"
|
8
|
+
# end
|
9
|
+
|
10
|
+
def self.new(&block)
|
11
|
+
if self == Plugin
|
12
|
+
Class.new(Plugin, &block)
|
13
|
+
else
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
@html_tree_hooks_pre = {}
|
20
|
+
@html_tree_hooks_post = {}
|
21
|
+
end
|
22
|
+
|
23
|
+
# define name to name a Plugin
|
24
|
+
def name
|
25
|
+
self.class.name
|
26
|
+
end
|
27
|
+
|
28
|
+
#### HTML Tree functionalities
|
29
|
+
|
30
|
+
attr_accessor :html_tree
|
31
|
+
|
32
|
+
def html_tree_change_tag_name_by_css(css, new_name)
|
33
|
+
html_tree.css(css).each do |e|
|
34
|
+
e.name = new_name
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def html_tree_change_properties_by_css(css, properties)
|
39
|
+
html_tree.css(css).each do |e|
|
40
|
+
properties.each do |k,v|
|
41
|
+
e[k.to_s] = v
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def html_tree_remove_by_css(css)
|
47
|
+
html_tree.css(css).each(&:remove)
|
48
|
+
end
|
49
|
+
|
50
|
+
def html_tree_replace_with_children_by_css(css)
|
51
|
+
html_tree.css(css).each do |e|
|
52
|
+
e.replace(e.children)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def html_tree_process_to_coradoc(tree, state={})
|
57
|
+
Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
|
58
|
+
end
|
59
|
+
|
60
|
+
def html_tree_process_to_adoc(tree, state={})
|
61
|
+
Coradoc::ReverseAdoc::Converters.process(tree, state)
|
62
|
+
end
|
63
|
+
|
64
|
+
def html_tree_preview
|
65
|
+
Tempfile.open(%w"coradoc .html") do |i|
|
66
|
+
i << html_tree.to_html
|
67
|
+
system "chromium-browser", "--no-sandbox", i.path
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# define preprocess_html_tree to process HTML trees
|
72
|
+
|
73
|
+
# Creates a hook to be called instead of converting an element
|
74
|
+
# to a Coradoc node.
|
75
|
+
#
|
76
|
+
# proc |html_node, state|
|
77
|
+
# coradoc_node
|
78
|
+
# end
|
79
|
+
def html_tree_add_hook_pre(element, &block)
|
80
|
+
@html_tree_hooks_pre[element] = block
|
81
|
+
end
|
82
|
+
|
83
|
+
def html_tree_add_hook_pre_by_css(css, &block)
|
84
|
+
html_tree.css(css).each do |e|
|
85
|
+
html_tree_add_hook_pre(e, &block)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Creates a hook to be called after converting an element
|
90
|
+
# to a Coradoc node.
|
91
|
+
#
|
92
|
+
# proc |html_node, coradoc_node, state|
|
93
|
+
# coradoc_node
|
94
|
+
# end
|
95
|
+
def html_tree_add_hook_post(element, &block)
|
96
|
+
@html_tree_hooks_post[element] = block
|
97
|
+
end
|
98
|
+
|
99
|
+
def html_tree_add_hook_post_by_css(css, &block)
|
100
|
+
html_tree.css(css).each do |e|
|
101
|
+
html_tree_add_hook_post(e, &block)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def html_tree_run_hooks(node, state, &_block)
|
106
|
+
hook_pre = @html_tree_hooks_pre[node]
|
107
|
+
hook_post = @html_tree_hooks_post[node]
|
108
|
+
|
109
|
+
coradoc = hook_pre.(node, state) if hook_pre
|
110
|
+
coradoc ||= yield node, state
|
111
|
+
|
112
|
+
if hook_post
|
113
|
+
coradoc = hook_post.(node, coradoc, state)
|
114
|
+
end
|
115
|
+
|
116
|
+
coradoc
|
117
|
+
end
|
118
|
+
|
119
|
+
#### Coradoc tree functionalities
|
120
|
+
|
121
|
+
attr_accessor :coradoc_tree
|
122
|
+
|
123
|
+
# define postprocess_coradoc_tree to change coradoc tree
|
124
|
+
|
125
|
+
#### AsciiDoc string functionalities
|
126
|
+
|
127
|
+
attr_accessor :asciidoc_string
|
128
|
+
|
129
|
+
# define postprocess_asciidoc_string to change the coradoc string
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
class Plugin
|
3
|
+
# This plugin enhances documents from the PLATEAU project
|
4
|
+
# to extract more data.
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
# reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
|
8
|
+
# --external-images -u raise --output _out/index.adoc index.html
|
9
|
+
class Plateau < Plugin
|
10
|
+
def name
|
11
|
+
"PLATEAU"
|
12
|
+
end
|
13
|
+
|
14
|
+
def preprocess_html_tree
|
15
|
+
# Let's simplify the tree by removing what's extraneous
|
16
|
+
# html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
|
17
|
+
# html_tree_replace_with_children_by_css("div.container_box")
|
18
|
+
# html_tree_replace_with_children_by_css("div.col.col-12")
|
19
|
+
# html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
|
20
|
+
# html_tree_replace_with_children_by_css("div.row")
|
21
|
+
#
|
22
|
+
# We can remove that, but it messes up the images and paragraphs.
|
23
|
+
|
24
|
+
# Remove side menu, so we can generate TOC ourselves
|
25
|
+
html_tree_remove_by_css(".sideMenu")
|
26
|
+
|
27
|
+
# Correct non-semantic classes into semantic HTML tags
|
28
|
+
html_tree_change_tag_name_by_css(".titledata", "h1")
|
29
|
+
html_tree_change_tag_name_by_css(".subtitledata", "h2")
|
30
|
+
html_tree_change_tag_name_by_css(".pitemdata", "h3")
|
31
|
+
html_tree_change_tag_name_by_css(".sitemdata", "h4")
|
32
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
|
33
|
+
|
34
|
+
# Remove some CSS ids that are not important to us
|
35
|
+
html_tree_change_properties_by_css("#__nuxt", id: nil)
|
36
|
+
html_tree_change_properties_by_css("#__layout", id: nil)
|
37
|
+
html_tree_change_properties_by_css("#app", id: nil)
|
38
|
+
|
39
|
+
# Convert table/img caption to become a caption
|
40
|
+
html_tree.css(".imagedata").each do |e|
|
41
|
+
table = e.parent.next&.children&.first
|
42
|
+
if table&.name == "table"
|
43
|
+
e.name = "caption"
|
44
|
+
table.prepend_child(e)
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
img = e.parent.previous&.children&.first
|
49
|
+
if img&.name == "img" && img["src"]
|
50
|
+
title = e.text.strip
|
51
|
+
img["title"] = title
|
52
|
+
e.remove
|
53
|
+
next
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Add hooks for H1, H2, H3, H4
|
58
|
+
html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
|
59
|
+
html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
|
60
|
+
|
61
|
+
# Table cells aligned to center
|
62
|
+
html_tree_change_properties_by_css(".tableTopCenter", align: "center")
|
63
|
+
|
64
|
+
# Handle non-semantic lists and indentation
|
65
|
+
html_tree_add_hook_pre_by_css ".text2data" do |node,|
|
66
|
+
text = html_tree_process_to_adoc(node).strip
|
67
|
+
next "" if text.empty? || text == "\u3000"
|
68
|
+
|
69
|
+
if text.start_with?(/\d+\./)
|
70
|
+
text = text.sub(/\A\d+.\s*/, "")
|
71
|
+
".. #{text}\n"
|
72
|
+
else
|
73
|
+
text = text.gsub(/^/, "** ")
|
74
|
+
"\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
html_tree_add_hook_pre_by_css ".text3data" do |node,|
|
79
|
+
text = html_tree_process_to_adoc(node).strip
|
80
|
+
next "" if text.empty? || text == "\u3000"
|
81
|
+
|
82
|
+
text = text.strip.gsub(/^/, "*** ")
|
83
|
+
"\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
|
84
|
+
end
|
85
|
+
|
86
|
+
html_tree_add_hook_pre_by_css ".text4data" do |node,|
|
87
|
+
text = html_tree_process_to_adoc(node).strip
|
88
|
+
next "" if text.empty? || text == "\u3000"
|
89
|
+
|
90
|
+
text = text.strip.gsub(/^/, "**** ")
|
91
|
+
"\n\n//-PT4D\n#{text}\n//-ENDPT4D\n\n"
|
92
|
+
end
|
93
|
+
|
94
|
+
html_tree_add_hook_pre_by_css ".text2data_point ul" do |node,|
|
95
|
+
text = html_tree_process_to_adoc(node.children.first.children).strip
|
96
|
+
|
97
|
+
"** #{text}\n"
|
98
|
+
end
|
99
|
+
|
100
|
+
html_tree_add_hook_pre_by_css ".text3data_point ul" do |node,|
|
101
|
+
text = html_tree_process_to_adoc(node.children.first.children).strip
|
102
|
+
|
103
|
+
"*** #{text}\n"
|
104
|
+
end
|
105
|
+
|
106
|
+
# html_tree_preview
|
107
|
+
end
|
108
|
+
|
109
|
+
def handle_headers(node, coradoc, state)
|
110
|
+
if coradoc.id.start_with?("toc0_")
|
111
|
+
content = coradoc.content.map(&:content).join
|
112
|
+
# Special content
|
113
|
+
case content.strip
|
114
|
+
when "はじめに" # Introduction
|
115
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
116
|
+
when "改定の概要" # Revision overview
|
117
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
118
|
+
when "参考文献" # Bibliography
|
119
|
+
coradoc.style = "bibliography"
|
120
|
+
when "改訂履歴" # Document history
|
121
|
+
coradoc.style = "appendix"
|
122
|
+
else
|
123
|
+
warn "Unknown section #{coradoc.content.map(&:content).join.inspect}"
|
124
|
+
end
|
125
|
+
|
126
|
+
# Ensure they are generated as level 1
|
127
|
+
coradoc.level_int = 1
|
128
|
+
end
|
129
|
+
|
130
|
+
# Remove numbers
|
131
|
+
coradoc.content.first.content.sub!(/\A[\d\s.]+/, "")
|
132
|
+
|
133
|
+
coradoc
|
134
|
+
end
|
135
|
+
|
136
|
+
def handle_headers_h4(node, coradoc, state)
|
137
|
+
case coradoc.content.first.content
|
138
|
+
when /\A\(\d+\)(.*)/
|
139
|
+
coradoc.level_int = 4
|
140
|
+
coradoc.content.first.content = $1.strip
|
141
|
+
coradoc
|
142
|
+
when /\A\d+\)(.*)/
|
143
|
+
coradoc.level_int = 5
|
144
|
+
coradoc.content.first.content = $1.strip
|
145
|
+
coradoc
|
146
|
+
else
|
147
|
+
["// FIXME\n", coradoc]
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def postprocess_asciidoc_string
|
152
|
+
str = self.asciidoc_string
|
153
|
+
|
154
|
+
### Custom indentation handling
|
155
|
+
# If there's a step up, add [none]
|
156
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
|
157
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
|
158
|
+
str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
|
159
|
+
# Collapse blocks of text[2,3]data
|
160
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
|
161
|
+
# In the beginning, add [none]
|
162
|
+
str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
|
163
|
+
# If following with another list, ensure we readd styling
|
164
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
|
165
|
+
# Otherwise, clean up
|
166
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
|
167
|
+
|
168
|
+
self.asciidoc_string = str
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau
|
@@ -0,0 +1,148 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
# Postprocessor's aim is to convert a Coradoc tree from
|
3
|
+
# a mess that has been created from HTML into a tree that
|
4
|
+
# is compatible with what we would get out of Coradoc, if
|
5
|
+
# it parsed it directly.
|
6
|
+
class Postprocessor
|
7
|
+
def self.process(coradoc)
|
8
|
+
new(coradoc).process
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(coradoc)
|
12
|
+
@tree = coradoc
|
13
|
+
end
|
14
|
+
|
15
|
+
# Collapse DIVs that only have a title, or nest another DIV.
|
16
|
+
def collapse_meaningless_sections
|
17
|
+
@tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
|
18
|
+
if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
|
19
|
+
children_classes = Array(elem.contents).map(&:class)
|
20
|
+
count = children_classes.length
|
21
|
+
safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
|
22
|
+
|
23
|
+
# Count > 0 because some documents use <div> as a <br>.
|
24
|
+
if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
|
25
|
+
next elem.contents
|
26
|
+
end
|
27
|
+
end
|
28
|
+
elem
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# tree should now be more cleaned up, so we can progress with
|
33
|
+
# creating meaningful sections
|
34
|
+
def generate_meaningful_sections
|
35
|
+
@tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
|
36
|
+
# We are searching for an array, that has a title. This
|
37
|
+
# will be a candidate for our section array.
|
38
|
+
if dir == :post &&
|
39
|
+
elem.is_a?(Array) &&
|
40
|
+
!elem.grep(Coradoc::Element::Title).empty?
|
41
|
+
|
42
|
+
new_array = []
|
43
|
+
content_array = new_array
|
44
|
+
section_arrays_by_level = [new_array] * 8
|
45
|
+
|
46
|
+
# For each title element, we create a new section. Then we push
|
47
|
+
# all descendant sections into those sections. Otherwise, we push
|
48
|
+
# an element as content of current section.
|
49
|
+
elem.each do |e|
|
50
|
+
if e.is_a? Coradoc::Element::Title
|
51
|
+
title = e
|
52
|
+
content_array = []
|
53
|
+
section_array = []
|
54
|
+
level = title.level_int
|
55
|
+
section = Coradoc::Element::Section.new(
|
56
|
+
title, contents: content_array, sections: section_array
|
57
|
+
)
|
58
|
+
# Some documents may not be consistent and eg. follow H4 after
|
59
|
+
# H2. Let's ensure that proceeding sections will land in a
|
60
|
+
# correct place.
|
61
|
+
(8 - level).times do |j|
|
62
|
+
section_arrays_by_level[level + j] = section_array
|
63
|
+
end
|
64
|
+
section_arrays_by_level[level - 1] << section
|
65
|
+
else
|
66
|
+
content_array << e
|
67
|
+
end
|
68
|
+
end
|
69
|
+
next new_array
|
70
|
+
end
|
71
|
+
elem
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def split_sections
|
76
|
+
max_level = Coradoc::ReverseAdoc.config.split_sections
|
77
|
+
|
78
|
+
return unless max_level
|
79
|
+
|
80
|
+
sections = {}
|
81
|
+
parent_sections = []
|
82
|
+
previous_sections = {}
|
83
|
+
|
84
|
+
determine_section_id = ->(elem) do
|
85
|
+
level = 0
|
86
|
+
section = elem
|
87
|
+
while section
|
88
|
+
level += 1 if elem.title.style == section.title.style
|
89
|
+
section = previous_sections[section]
|
90
|
+
end
|
91
|
+
level
|
92
|
+
end
|
93
|
+
|
94
|
+
determine_style = ->(elem) do
|
95
|
+
style = elem.title.style || "section"
|
96
|
+
style += "-"
|
97
|
+
style
|
98
|
+
end
|
99
|
+
|
100
|
+
@tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
|
101
|
+
title = elem.title if elem.is_a?(Coradoc::Element::Section)
|
102
|
+
|
103
|
+
if title && title.level_int <= max_level
|
104
|
+
if dir == :pre
|
105
|
+
# In the PRE pass, we build a tree of sections, so that
|
106
|
+
# we can compute numbers
|
107
|
+
previous_sections[elem] = parent_sections[title.level_int]
|
108
|
+
parent_sections[title.level_int] = elem
|
109
|
+
parent_sections[(title.level_int+1)..nil] = nil
|
110
|
+
|
111
|
+
elem
|
112
|
+
else
|
113
|
+
# In the POST pass, we replace the sections with their
|
114
|
+
# include tag.
|
115
|
+
section_file = "sections/"
|
116
|
+
section_file += parent_sections[1..title.level_int].map do |parent|
|
117
|
+
style = determine_style.(parent)
|
118
|
+
"%s%02d" % [style, determine_section_id.(parent)]
|
119
|
+
end.join("/")
|
120
|
+
section_file += ".adoc"
|
121
|
+
|
122
|
+
sections[section_file] = elem
|
123
|
+
up = "../" * (title.level_int - 1)
|
124
|
+
"\ninclude::#{up}#{section_file}[]\n"
|
125
|
+
end
|
126
|
+
else
|
127
|
+
elem
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
sections[nil] = @tree
|
132
|
+
@tree = sections
|
133
|
+
end
|
134
|
+
|
135
|
+
def process
|
136
|
+
collapse_meaningless_sections
|
137
|
+
generate_meaningful_sections
|
138
|
+
# Do it again to simplify the document further.
|
139
|
+
# Since the structure is changed, we may have new meaningful
|
140
|
+
# sections as only children of some meaningless sections.
|
141
|
+
collapse_meaningless_sections
|
142
|
+
|
143
|
+
split_sections
|
144
|
+
|
145
|
+
@tree
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|