coradoc 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/exe/reverse_adoc +24 -3
- data/lib/coradoc/document.rb +1 -0
- data/lib/coradoc/element/admonition.rb +2 -2
- data/lib/coradoc/element/attribute.rb +2 -2
- data/lib/coradoc/element/attribute_list.rb +94 -15
- data/lib/coradoc/element/audio.rb +13 -2
- data/lib/coradoc/element/author.rb +4 -2
- data/lib/coradoc/element/base.rb +70 -7
- data/lib/coradoc/element/block/core.rb +8 -4
- data/lib/coradoc/element/block/quote.rb +1 -1
- data/lib/coradoc/element/break.rb +1 -1
- data/lib/coradoc/element/document_attributes.rb +6 -6
- data/lib/coradoc/element/header.rb +4 -2
- data/lib/coradoc/element/image/block_image.rb +13 -2
- data/lib/coradoc/element/image/core.rb +34 -5
- data/lib/coradoc/element/image/inline_image.rb +2 -2
- data/lib/coradoc/element/inline/anchor.rb +4 -2
- data/lib/coradoc/element/inline/bold.rb +9 -4
- data/lib/coradoc/element/inline/cross_reference.rb +4 -2
- data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
- data/lib/coradoc/element/inline/highlight.rb +11 -6
- data/lib/coradoc/element/inline/italic.rb +9 -4
- data/lib/coradoc/element/inline/link.rb +22 -6
- data/lib/coradoc/element/inline/monospace.rb +9 -4
- data/lib/coradoc/element/inline/quotation.rb +3 -1
- data/lib/coradoc/element/inline/subscript.rb +4 -2
- data/lib/coradoc/element/inline/superscript.rb +4 -2
- data/lib/coradoc/element/list/core.rb +9 -6
- data/lib/coradoc/element/list/definition.rb +19 -0
- data/lib/coradoc/element/list/ordered.rb +1 -1
- data/lib/coradoc/element/list/unordered.rb +1 -1
- data/lib/coradoc/element/list.rb +1 -0
- data/lib/coradoc/element/list_item.rb +8 -3
- data/lib/coradoc/element/list_item_definition.rb +32 -0
- data/lib/coradoc/element/paragraph.rb +4 -2
- data/lib/coradoc/element/revision.rb +4 -2
- data/lib/coradoc/element/section.rb +21 -4
- data/lib/coradoc/element/table.rb +27 -10
- data/lib/coradoc/element/text_element.rb +48 -8
- data/lib/coradoc/element/title.rb +26 -6
- data/lib/coradoc/element/video.rb +32 -5
- data/lib/coradoc/reverse_adoc/README.adoc +14 -8
- data/lib/coradoc/reverse_adoc/cleaner.rb +20 -8
- data/lib/coradoc/reverse_adoc/config.rb +35 -16
- data/lib/coradoc/reverse_adoc/converters/a.rb +17 -12
- data/lib/coradoc/reverse_adoc/converters/aside.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/audio.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/base.rb +48 -44
- data/lib/coradoc/reverse_adoc/converters/blockquote.rb +2 -11
- data/lib/coradoc/reverse_adoc/converters/br.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/bypass.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/code.rb +5 -42
- data/lib/coradoc/reverse_adoc/converters/div.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
- data/lib/coradoc/reverse_adoc/converters/em.rb +5 -43
- data/lib/coradoc/reverse_adoc/converters/figure.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/h.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/head.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/hr.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/img.rb +21 -16
- data/lib/coradoc/reverse_adoc/converters/li.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/mark.rb +5 -11
- data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
- data/lib/coradoc/reverse_adoc/converters/ol.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/p.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/pre.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/q.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/strong.rb +5 -41
- data/lib/coradoc/reverse_adoc/converters/sub.rb +6 -4
- data/lib/coradoc/reverse_adoc/converters/sup.rb +7 -5
- data/lib/coradoc/reverse_adoc/converters/table.rb +215 -4
- data/lib/coradoc/reverse_adoc/converters/td.rb +1 -7
- data/lib/coradoc/reverse_adoc/converters/text.rb +1 -38
- data/lib/coradoc/reverse_adoc/converters/tr.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/video.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters.rb +21 -0
- data/lib/coradoc/reverse_adoc/html_converter.rb +109 -20
- data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
- data/lib/coradoc/reverse_adoc/plugins/plateau.rb +174 -0
- data/lib/coradoc/reverse_adoc/postprocessor.rb +148 -0
- data/lib/coradoc/reverse_adoc.rb +3 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/reverse_adoc.rb +1 -1
- metadata +8 -3
- data/lib/coradoc/element/inline/image.rb +0 -26
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "converters/markup"
|
|
3
4
|
require_relative "converters/a"
|
|
4
5
|
require_relative "converters/aside"
|
|
5
6
|
require_relative "converters/audio"
|
|
@@ -8,6 +9,7 @@ require_relative "converters/br"
|
|
|
8
9
|
require_relative "converters/bypass"
|
|
9
10
|
require_relative "converters/code"
|
|
10
11
|
require_relative "converters/div"
|
|
12
|
+
require_relative "converters/dl"
|
|
11
13
|
require_relative "converters/drop"
|
|
12
14
|
require_relative "converters/em"
|
|
13
15
|
require_relative "converters/figure"
|
|
@@ -34,28 +36,115 @@ require_relative "converters/tr"
|
|
|
34
36
|
require_relative "converters/video"
|
|
35
37
|
require_relative "converters/math"
|
|
36
38
|
|
|
37
|
-
module Coradoc
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
39
|
+
module Coradoc
|
|
40
|
+
module ReverseAdoc
|
|
41
|
+
class HtmlConverter
|
|
42
|
+
def self.to_coradoc(input, options = {})
|
|
43
|
+
plugin_instances = options.delete(:plugin_instances)
|
|
44
|
+
ReverseAdoc.config.with(options) do
|
|
45
|
+
plugin_instances ||= Coradoc::ReverseAdoc.config.plugins.map(&:new)
|
|
46
|
+
|
|
47
|
+
root = track_time "Loading input HTML document" do
|
|
48
|
+
case input
|
|
49
|
+
when String
|
|
50
|
+
Nokogiri::HTML(input).root
|
|
51
|
+
when Nokogiri::XML::Document
|
|
52
|
+
input.root
|
|
53
|
+
when Nokogiri::XML::Node
|
|
54
|
+
input
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
return "" unless root
|
|
59
|
+
|
|
60
|
+
plugin_instances.each do |plugin|
|
|
61
|
+
plugin.html_tree = root
|
|
62
|
+
if plugin.respond_to?(:preprocess_html_tree)
|
|
63
|
+
track_time "Preprocessing document with #{plugin.name} plugin" do
|
|
64
|
+
plugin.preprocess_html_tree
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
root = plugin.html_tree
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
coradoc = track_time "Converting input document tree to Coradoc tree" do
|
|
71
|
+
Converters.process_coradoc(root, plugin_instances: plugin_instances)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
coradoc = track_time "Post-process Coradoc tree" do
|
|
75
|
+
Postprocessor.process(coradoc)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
plugin_instances.each do |plugin|
|
|
79
|
+
if plugin.respond_to?(:postprocess_coradoc_tree)
|
|
80
|
+
plugin.coradoc_tree = coradoc
|
|
81
|
+
track_time "Postprocessing Coradoc tree with #{plugin.name} plugin" do
|
|
82
|
+
plugin.postprocess_coradoc_tree
|
|
83
|
+
end
|
|
84
|
+
coradoc = plugin.coradoc_tree
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
coradoc
|
|
89
|
+
end
|
|
53
90
|
end
|
|
54
|
-
end
|
|
55
91
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
92
|
+
def self.convert(input, options = {})
|
|
93
|
+
ReverseAdoc.config.with(options) do
|
|
94
|
+
plugin_instances = Coradoc::ReverseAdoc.config.plugins.map(&:new)
|
|
95
|
+
|
|
96
|
+
options = options.merge(plugin_instances: plugin_instances)
|
|
97
|
+
|
|
98
|
+
coradoc = to_coradoc(input, options)
|
|
99
|
+
|
|
100
|
+
if coradoc.is_a?(Hash)
|
|
101
|
+
coradoc.to_h do |file, tree|
|
|
102
|
+
track_time "Converting file #{file || 'main'}" do
|
|
103
|
+
[file, convert_single_coradoc_to_adoc(file, tree, plugin_instances)]
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
else
|
|
107
|
+
convert_single_coradoc_to_adoc(nil, coradoc, plugin_instances)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def self.convert_single_coradoc_to_adoc(_file, coradoc, plugin_instances)
|
|
113
|
+
result = track_time "Converting Coradoc tree into Asciidoc" do
|
|
114
|
+
Coradoc::Generator.gen_adoc(coradoc)
|
|
115
|
+
end
|
|
116
|
+
result = track_time "Cleaning up the result" do
|
|
117
|
+
ReverseAdoc.cleaner.tidy(result)
|
|
118
|
+
end
|
|
119
|
+
plugin_instances.each do |plugin|
|
|
120
|
+
if plugin.respond_to?(:postprocess_asciidoc_string)
|
|
121
|
+
plugin.asciidoc_string = result
|
|
122
|
+
track_time "Postprocessing AsciiDoc string with #{plugin.name} plugin" do
|
|
123
|
+
plugin.postprocess_asciidoc_string
|
|
124
|
+
end
|
|
125
|
+
result = plugin.asciidoc_string
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
result
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
@track_time_indentation = 0
|
|
132
|
+
def self.track_time(task)
|
|
133
|
+
if ReverseAdoc.config.track_time
|
|
134
|
+
warn " " * @track_time_indentation +
|
|
135
|
+
"* #{task} is starting..."
|
|
136
|
+
@track_time_indentation += 1
|
|
137
|
+
t0 = Time.now
|
|
138
|
+
ret = yield
|
|
139
|
+
time_elapsed = Time.now - t0
|
|
140
|
+
@track_time_indentation -= 1
|
|
141
|
+
warn " " * @track_time_indentation +
|
|
142
|
+
"* #{task} took #{time_elapsed.round(3)} seconds"
|
|
143
|
+
ret
|
|
144
|
+
else
|
|
145
|
+
yield
|
|
146
|
+
end
|
|
147
|
+
end
|
|
59
148
|
end
|
|
60
149
|
end
|
|
61
150
|
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
module Coradoc::ReverseAdoc
|
|
2
|
+
class Plugin
|
|
3
|
+
#### Plugin system general
|
|
4
|
+
|
|
5
|
+
# Allow building plugins with a shorthand syntax:
|
|
6
|
+
# plugin = Coradoc::ReverseAdoc::Plugin.new do
|
|
7
|
+
# def name = "Test"
|
|
8
|
+
# end
|
|
9
|
+
|
|
10
|
+
def self.new(&block)
|
|
11
|
+
if self == Plugin
|
|
12
|
+
Class.new(Plugin, &block)
|
|
13
|
+
else
|
|
14
|
+
super
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def initialize
|
|
19
|
+
@html_tree_hooks_pre = {}
|
|
20
|
+
@html_tree_hooks_post = {}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# define name to name a Plugin
|
|
24
|
+
def name
|
|
25
|
+
self.class.name
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
#### HTML Tree functionalities
|
|
29
|
+
|
|
30
|
+
attr_accessor :html_tree
|
|
31
|
+
|
|
32
|
+
def html_tree_change_tag_name_by_css(css, new_name)
|
|
33
|
+
html_tree.css(css).each do |e|
|
|
34
|
+
e.name = new_name
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def html_tree_change_properties_by_css(css, properties)
|
|
39
|
+
html_tree.css(css).each do |e|
|
|
40
|
+
properties.each do |k,v|
|
|
41
|
+
e[k.to_s] = v
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def html_tree_remove_by_css(css)
|
|
47
|
+
html_tree.css(css).each(&:remove)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def html_tree_replace_with_children_by_css(css)
|
|
51
|
+
html_tree.css(css).each do |e|
|
|
52
|
+
e.replace(e.children)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def html_tree_process_to_coradoc(tree, state={})
|
|
57
|
+
Coradoc::ReverseAdoc::Converters.process_coradoc(tree, state)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def html_tree_process_to_adoc(tree, state={})
|
|
61
|
+
Coradoc::ReverseAdoc::Converters.process(tree, state)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def html_tree_preview
|
|
65
|
+
Tempfile.open(%w"coradoc .html") do |i|
|
|
66
|
+
i << html_tree.to_html
|
|
67
|
+
system "chromium-browser", "--no-sandbox", i.path
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# define preprocess_html_tree to process HTML trees
|
|
72
|
+
|
|
73
|
+
# Creates a hook to be called instead of converting an element
|
|
74
|
+
# to a Coradoc node.
|
|
75
|
+
#
|
|
76
|
+
# proc |html_node, state|
|
|
77
|
+
# coradoc_node
|
|
78
|
+
# end
|
|
79
|
+
def html_tree_add_hook_pre(element, &block)
|
|
80
|
+
@html_tree_hooks_pre[element] = block
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def html_tree_add_hook_pre_by_css(css, &block)
|
|
84
|
+
html_tree.css(css).each do |e|
|
|
85
|
+
html_tree_add_hook_pre(e, &block)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Creates a hook to be called after converting an element
|
|
90
|
+
# to a Coradoc node.
|
|
91
|
+
#
|
|
92
|
+
# proc |html_node, coradoc_node, state|
|
|
93
|
+
# coradoc_node
|
|
94
|
+
# end
|
|
95
|
+
def html_tree_add_hook_post(element, &block)
|
|
96
|
+
@html_tree_hooks_post[element] = block
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def html_tree_add_hook_post_by_css(css, &block)
|
|
100
|
+
html_tree.css(css).each do |e|
|
|
101
|
+
html_tree_add_hook_post(e, &block)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def html_tree_run_hooks(node, state, &_block)
|
|
106
|
+
hook_pre = @html_tree_hooks_pre[node]
|
|
107
|
+
hook_post = @html_tree_hooks_post[node]
|
|
108
|
+
|
|
109
|
+
coradoc = hook_pre.(node, state) if hook_pre
|
|
110
|
+
coradoc ||= yield node, state
|
|
111
|
+
|
|
112
|
+
if hook_post
|
|
113
|
+
coradoc = hook_post.(node, coradoc, state)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
coradoc
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
#### Coradoc tree functionalities
|
|
120
|
+
|
|
121
|
+
attr_accessor :coradoc_tree
|
|
122
|
+
|
|
123
|
+
# define postprocess_coradoc_tree to change coradoc tree
|
|
124
|
+
|
|
125
|
+
#### AsciiDoc string functionalities
|
|
126
|
+
|
|
127
|
+
attr_accessor :asciidoc_string
|
|
128
|
+
|
|
129
|
+
# define postprocess_asciidoc_string to change the coradoc string
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
module Coradoc::ReverseAdoc
|
|
2
|
+
class Plugin
|
|
3
|
+
# This plugin enhances documents from the PLATEAU project
|
|
4
|
+
# to extract more data.
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# reverse_adoc -rcoradoc/reverse_adoc/plugins/plateau
|
|
8
|
+
# --external-images -u raise --output _out/index.adoc index.html
|
|
9
|
+
class Plateau < Plugin
|
|
10
|
+
def name
|
|
11
|
+
"PLATEAU"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def preprocess_html_tree
|
|
15
|
+
# Let's simplify the tree by removing what's extraneous
|
|
16
|
+
# html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
|
|
17
|
+
# html_tree_replace_with_children_by_css("div.container_box")
|
|
18
|
+
# html_tree_replace_with_children_by_css("div.col.col-12")
|
|
19
|
+
# html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
|
|
20
|
+
# html_tree_replace_with_children_by_css("div.row")
|
|
21
|
+
#
|
|
22
|
+
# We can remove that, but it messes up the images and paragraphs.
|
|
23
|
+
|
|
24
|
+
# Remove side menu, so we can generate TOC ourselves
|
|
25
|
+
html_tree_remove_by_css(".sideMenu")
|
|
26
|
+
|
|
27
|
+
# Correct non-semantic classes into semantic HTML tags
|
|
28
|
+
html_tree_change_tag_name_by_css(".titledata", "h1")
|
|
29
|
+
html_tree_change_tag_name_by_css(".subtitledata", "h2")
|
|
30
|
+
html_tree_change_tag_name_by_css(".pitemdata", "h3")
|
|
31
|
+
html_tree_change_tag_name_by_css(".sitemdata", "h4")
|
|
32
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
|
|
33
|
+
|
|
34
|
+
# Remove some CSS ids that are not important to us
|
|
35
|
+
html_tree_change_properties_by_css("#__nuxt", id: nil)
|
|
36
|
+
html_tree_change_properties_by_css("#__layout", id: nil)
|
|
37
|
+
html_tree_change_properties_by_css("#app", id: nil)
|
|
38
|
+
|
|
39
|
+
# Convert table/img caption to become a caption
|
|
40
|
+
html_tree.css(".imagedata").each do |e|
|
|
41
|
+
table = e.parent.next&.children&.first
|
|
42
|
+
if table&.name == "table"
|
|
43
|
+
e.name = "caption"
|
|
44
|
+
table.prepend_child(e)
|
|
45
|
+
next
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
img = e.parent.previous&.children&.first
|
|
49
|
+
if img&.name == "img" && img["src"]
|
|
50
|
+
title = e.text.strip
|
|
51
|
+
img["title"] = title
|
|
52
|
+
e.remove
|
|
53
|
+
next
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Add hooks for H1, H2, H3, H4
|
|
58
|
+
html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
|
|
59
|
+
html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
|
|
60
|
+
|
|
61
|
+
# Table cells aligned to center
|
|
62
|
+
html_tree_change_properties_by_css(".tableTopCenter", align: "center")
|
|
63
|
+
|
|
64
|
+
# Handle non-semantic lists and indentation
|
|
65
|
+
html_tree_add_hook_pre_by_css ".text2data" do |node,|
|
|
66
|
+
text = html_tree_process_to_adoc(node).strip
|
|
67
|
+
next "" if text.empty? || text == "\u3000"
|
|
68
|
+
|
|
69
|
+
if text.start_with?(/\d+\./)
|
|
70
|
+
text = text.sub(/\A\d+.\s*/, "")
|
|
71
|
+
".. #{text}\n"
|
|
72
|
+
else
|
|
73
|
+
text = text.gsub(/^/, "** ")
|
|
74
|
+
"\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
html_tree_add_hook_pre_by_css ".text3data" do |node,|
|
|
79
|
+
text = html_tree_process_to_adoc(node).strip
|
|
80
|
+
next "" if text.empty? || text == "\u3000"
|
|
81
|
+
|
|
82
|
+
text = text.strip.gsub(/^/, "*** ")
|
|
83
|
+
"\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
html_tree_add_hook_pre_by_css ".text4data" do |node,|
|
|
87
|
+
text = html_tree_process_to_adoc(node).strip
|
|
88
|
+
next "" if text.empty? || text == "\u3000"
|
|
89
|
+
|
|
90
|
+
text = text.strip.gsub(/^/, "**** ")
|
|
91
|
+
"\n\n//-PT4D\n#{text}\n//-ENDPT4D\n\n"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
html_tree_add_hook_pre_by_css ".text2data_point ul" do |node,|
|
|
95
|
+
text = html_tree_process_to_adoc(node.children.first.children).strip
|
|
96
|
+
|
|
97
|
+
"** #{text}\n"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
html_tree_add_hook_pre_by_css ".text3data_point ul" do |node,|
|
|
101
|
+
text = html_tree_process_to_adoc(node.children.first.children).strip
|
|
102
|
+
|
|
103
|
+
"*** #{text}\n"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# html_tree_preview
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def handle_headers(node, coradoc, state)
|
|
110
|
+
if coradoc.id.start_with?("toc0_")
|
|
111
|
+
content = coradoc.content.map(&:content).join
|
|
112
|
+
# Special content
|
|
113
|
+
case content.strip
|
|
114
|
+
when "はじめに" # Introduction
|
|
115
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
|
116
|
+
when "改定の概要" # Revision overview
|
|
117
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
|
118
|
+
when "参考文献" # Bibliography
|
|
119
|
+
coradoc.style = "bibliography"
|
|
120
|
+
when "改訂履歴" # Document history
|
|
121
|
+
coradoc.style = "appendix"
|
|
122
|
+
else
|
|
123
|
+
warn "Unknown section #{coradoc.content.map(&:content).join.inspect}"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Ensure they are generated as level 1
|
|
127
|
+
coradoc.level_int = 1
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Remove numbers
|
|
131
|
+
coradoc.content.first.content.sub!(/\A[\d\s.]+/, "")
|
|
132
|
+
|
|
133
|
+
coradoc
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def handle_headers_h4(node, coradoc, state)
|
|
137
|
+
case coradoc.content.first.content
|
|
138
|
+
when /\A\(\d+\)(.*)/
|
|
139
|
+
coradoc.level_int = 4
|
|
140
|
+
coradoc.content.first.content = $1.strip
|
|
141
|
+
coradoc
|
|
142
|
+
when /\A\d+\)(.*)/
|
|
143
|
+
coradoc.level_int = 5
|
|
144
|
+
coradoc.content.first.content = $1.strip
|
|
145
|
+
coradoc
|
|
146
|
+
else
|
|
147
|
+
["// FIXME\n", coradoc]
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def postprocess_asciidoc_string
|
|
152
|
+
str = self.asciidoc_string
|
|
153
|
+
|
|
154
|
+
### Custom indentation handling
|
|
155
|
+
# If there's a step up, add [none]
|
|
156
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
|
|
157
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
|
|
158
|
+
str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
|
|
159
|
+
# Collapse blocks of text[2,3]data
|
|
160
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
|
|
161
|
+
# In the beginning, add [none]
|
|
162
|
+
str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
|
|
163
|
+
# If following with another list, ensure we readd styling
|
|
164
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
|
|
165
|
+
# Otherwise, clean up
|
|
166
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
|
|
167
|
+
|
|
168
|
+
self.asciidoc_string = str
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
Coradoc::ReverseAdoc.config.plugins << Coradoc::ReverseAdoc::Plugin::Plateau
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
module Coradoc::ReverseAdoc
|
|
2
|
+
# Postprocessor's aim is to convert a Coradoc tree from
|
|
3
|
+
# a mess that has been created from HTML into a tree that
|
|
4
|
+
# is compatible with what we would get out of Coradoc, if
|
|
5
|
+
# it parsed it directly.
|
|
6
|
+
class Postprocessor
|
|
7
|
+
def self.process(coradoc)
|
|
8
|
+
new(coradoc).process
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def initialize(coradoc)
|
|
12
|
+
@tree = coradoc
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Collapse DIVs that only have a title, or nest another DIV.
|
|
16
|
+
def collapse_meaningless_sections
|
|
17
|
+
@tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
|
|
18
|
+
if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
|
|
19
|
+
children_classes = Array(elem.contents).map(&:class)
|
|
20
|
+
count = children_classes.length
|
|
21
|
+
safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
|
|
22
|
+
|
|
23
|
+
# Count > 0 because some documents use <div> as a <br>.
|
|
24
|
+
if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
|
|
25
|
+
next elem.contents
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
elem
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# tree should now be more cleaned up, so we can progress with
|
|
33
|
+
# creating meaningful sections
|
|
34
|
+
def generate_meaningful_sections
|
|
35
|
+
@tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
|
|
36
|
+
# We are searching for an array, that has a title. This
|
|
37
|
+
# will be a candidate for our section array.
|
|
38
|
+
if dir == :post &&
|
|
39
|
+
elem.is_a?(Array) &&
|
|
40
|
+
!elem.grep(Coradoc::Element::Title).empty?
|
|
41
|
+
|
|
42
|
+
new_array = []
|
|
43
|
+
content_array = new_array
|
|
44
|
+
section_arrays_by_level = [new_array] * 8
|
|
45
|
+
|
|
46
|
+
# For each title element, we create a new section. Then we push
|
|
47
|
+
# all descendant sections into those sections. Otherwise, we push
|
|
48
|
+
# an element as content of current section.
|
|
49
|
+
elem.each do |e|
|
|
50
|
+
if e.is_a? Coradoc::Element::Title
|
|
51
|
+
title = e
|
|
52
|
+
content_array = []
|
|
53
|
+
section_array = []
|
|
54
|
+
level = title.level_int
|
|
55
|
+
section = Coradoc::Element::Section.new(
|
|
56
|
+
title, contents: content_array, sections: section_array
|
|
57
|
+
)
|
|
58
|
+
# Some documents may not be consistent and eg. follow H4 after
|
|
59
|
+
# H2. Let's ensure that proceeding sections will land in a
|
|
60
|
+
# correct place.
|
|
61
|
+
(8 - level).times do |j|
|
|
62
|
+
section_arrays_by_level[level + j] = section_array
|
|
63
|
+
end
|
|
64
|
+
section_arrays_by_level[level - 1] << section
|
|
65
|
+
else
|
|
66
|
+
content_array << e
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
next new_array
|
|
70
|
+
end
|
|
71
|
+
elem
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def split_sections
|
|
76
|
+
max_level = Coradoc::ReverseAdoc.config.split_sections
|
|
77
|
+
|
|
78
|
+
return unless max_level
|
|
79
|
+
|
|
80
|
+
sections = {}
|
|
81
|
+
parent_sections = []
|
|
82
|
+
previous_sections = {}
|
|
83
|
+
|
|
84
|
+
determine_section_id = ->(elem) do
|
|
85
|
+
level = 0
|
|
86
|
+
section = elem
|
|
87
|
+
while section
|
|
88
|
+
level += 1 if elem.title.style == section.title.style
|
|
89
|
+
section = previous_sections[section]
|
|
90
|
+
end
|
|
91
|
+
level
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
determine_style = ->(elem) do
|
|
95
|
+
style = elem.title.style || "section"
|
|
96
|
+
style += "-"
|
|
97
|
+
style
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
@tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
|
|
101
|
+
title = elem.title if elem.is_a?(Coradoc::Element::Section)
|
|
102
|
+
|
|
103
|
+
if title && title.level_int <= max_level
|
|
104
|
+
if dir == :pre
|
|
105
|
+
# In the PRE pass, we build a tree of sections, so that
|
|
106
|
+
# we can compute numbers
|
|
107
|
+
previous_sections[elem] = parent_sections[title.level_int]
|
|
108
|
+
parent_sections[title.level_int] = elem
|
|
109
|
+
parent_sections[(title.level_int+1)..nil] = nil
|
|
110
|
+
|
|
111
|
+
elem
|
|
112
|
+
else
|
|
113
|
+
# In the POST pass, we replace the sections with their
|
|
114
|
+
# include tag.
|
|
115
|
+
section_file = "sections/"
|
|
116
|
+
section_file += parent_sections[1..title.level_int].map do |parent|
|
|
117
|
+
style = determine_style.(parent)
|
|
118
|
+
"%s%02d" % [style, determine_section_id.(parent)]
|
|
119
|
+
end.join("/")
|
|
120
|
+
section_file += ".adoc"
|
|
121
|
+
|
|
122
|
+
sections[section_file] = elem
|
|
123
|
+
up = "../" * (title.level_int - 1)
|
|
124
|
+
"\ninclude::#{up}#{section_file}[]\n"
|
|
125
|
+
end
|
|
126
|
+
else
|
|
127
|
+
elem
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
sections[nil] = @tree
|
|
132
|
+
@tree = sections
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def process
|
|
136
|
+
collapse_meaningless_sections
|
|
137
|
+
generate_meaningful_sections
|
|
138
|
+
# Do it again to simplify the document further.
|
|
139
|
+
# Since the structure is changed, we may have new meaningful
|
|
140
|
+
# sections as only children of some meaningless sections.
|
|
141
|
+
collapse_meaningless_sections
|
|
142
|
+
|
|
143
|
+
split_sections
|
|
144
|
+
|
|
145
|
+
@tree
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
data/lib/coradoc/reverse_adoc.rb
CHANGED
|
@@ -9,6 +9,9 @@ require_relative "reverse_adoc/config"
|
|
|
9
9
|
require_relative "reverse_adoc/converters"
|
|
10
10
|
require_relative "reverse_adoc/converters/base"
|
|
11
11
|
require_relative "reverse_adoc/html_converter"
|
|
12
|
+
require_relative "reverse_adoc/plugin"
|
|
13
|
+
require_relative "reverse_adoc/postprocessor"
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
module Coradoc::ReverseAdoc
|
|
14
17
|
def self.convert(input, options = {})
|
data/lib/coradoc/version.rb
CHANGED
data/lib/reverse_adoc.rb
CHANGED
|
@@ -8,7 +8,7 @@ warn <<~END
|
|
|
8
8
|
| You are referencing an old require here:
|
|
9
9
|
| #{caller.join("\n| ")}
|
|
10
10
|
|
|
|
11
|
-
| You should also replace '
|
|
11
|
+
| You should also replace 'reverse_adoc' with 'coradoc' in your gem dependencies.
|
|
12
12
|
| reverse_adoc 2.0.0 will be kept with 'coradoc' as the only dependency.
|
|
13
13
|
|
|
|
14
14
|
| Please also ensure that you replace all references to ReverseAdoc in your code
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: coradoc
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2024-
|
|
12
|
+
date: 2024-06-01 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: marcel
|
|
@@ -278,7 +278,6 @@ files:
|
|
|
278
278
|
- lib/coradoc/element/inline/cross_reference.rb
|
|
279
279
|
- lib/coradoc/element/inline/hard_line_break.rb
|
|
280
280
|
- lib/coradoc/element/inline/highlight.rb
|
|
281
|
-
- lib/coradoc/element/inline/image.rb
|
|
282
281
|
- lib/coradoc/element/inline/italic.rb
|
|
283
282
|
- lib/coradoc/element/inline/link.rb
|
|
284
283
|
- lib/coradoc/element/inline/monospace.rb
|
|
@@ -291,6 +290,7 @@ files:
|
|
|
291
290
|
- lib/coradoc/element/list/ordered.rb
|
|
292
291
|
- lib/coradoc/element/list/unordered.rb
|
|
293
292
|
- lib/coradoc/element/list_item.rb
|
|
293
|
+
- lib/coradoc/element/list_item_definition.rb
|
|
294
294
|
- lib/coradoc/element/paragraph.rb
|
|
295
295
|
- lib/coradoc/element/revision.rb
|
|
296
296
|
- lib/coradoc/element/section.rb
|
|
@@ -323,6 +323,7 @@ files:
|
|
|
323
323
|
- lib/coradoc/reverse_adoc/converters/bypass.rb
|
|
324
324
|
- lib/coradoc/reverse_adoc/converters/code.rb
|
|
325
325
|
- lib/coradoc/reverse_adoc/converters/div.rb
|
|
326
|
+
- lib/coradoc/reverse_adoc/converters/dl.rb
|
|
326
327
|
- lib/coradoc/reverse_adoc/converters/drop.rb
|
|
327
328
|
- lib/coradoc/reverse_adoc/converters/em.rb
|
|
328
329
|
- lib/coradoc/reverse_adoc/converters/figure.rb
|
|
@@ -333,6 +334,7 @@ files:
|
|
|
333
334
|
- lib/coradoc/reverse_adoc/converters/img.rb
|
|
334
335
|
- lib/coradoc/reverse_adoc/converters/li.rb
|
|
335
336
|
- lib/coradoc/reverse_adoc/converters/mark.rb
|
|
337
|
+
- lib/coradoc/reverse_adoc/converters/markup.rb
|
|
336
338
|
- lib/coradoc/reverse_adoc/converters/math.rb
|
|
337
339
|
- lib/coradoc/reverse_adoc/converters/ol.rb
|
|
338
340
|
- lib/coradoc/reverse_adoc/converters/p.rb
|
|
@@ -350,6 +352,9 @@ files:
|
|
|
350
352
|
- lib/coradoc/reverse_adoc/converters/video.rb
|
|
351
353
|
- lib/coradoc/reverse_adoc/errors.rb
|
|
352
354
|
- lib/coradoc/reverse_adoc/html_converter.rb
|
|
355
|
+
- lib/coradoc/reverse_adoc/plugin.rb
|
|
356
|
+
- lib/coradoc/reverse_adoc/plugins/plateau.rb
|
|
357
|
+
- lib/coradoc/reverse_adoc/postprocessor.rb
|
|
353
358
|
- lib/coradoc/transformer.rb
|
|
354
359
|
- lib/coradoc/version.rb
|
|
355
360
|
- lib/reverse_adoc.rb
|