coradoc 1.1.5 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +1 -0
- data/.irbrc +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +5 -1
- data/.rubocop_todo.yml +179 -0
- data/Gemfile +11 -0
- data/README.adoc +5 -7
- data/coradoc.gemspec +5 -16
- data/exe/reverse_adoc +1 -1
- data/exe/w2a +1 -1
- data/flake.lock +114 -0
- data/flake.nix +135 -0
- data/lib/coradoc/cli.rb +1 -1
- data/lib/coradoc/converter.rb +4 -5
- data/lib/coradoc/element/attribute.rb +10 -1
- data/lib/coradoc/element/attribute_list.rb +4 -3
- data/lib/coradoc/element/audio.rb +1 -1
- data/lib/coradoc/element/author.rb +2 -2
- data/lib/coradoc/element/base.rb +14 -2
- data/lib/coradoc/element/bibliography.rb +1 -1
- data/lib/coradoc/element/bibliography_entry.rb +1 -1
- data/lib/coradoc/element/block/open.rb +1 -1
- data/lib/coradoc/element/block.rb +1 -1
- data/lib/coradoc/element/document_attributes.rb +8 -2
- data/lib/coradoc/element/image/block_image.rb +3 -2
- data/lib/coradoc/element/image/core.rb +5 -4
- data/lib/coradoc/element/inline/attribute_reference.rb +19 -0
- data/lib/coradoc/element/inline/cross_reference.rb +4 -3
- data/lib/coradoc/element/inline/footnote.rb +24 -0
- data/lib/coradoc/element/inline/small.rb +19 -0
- data/lib/coradoc/element/inline/span.rb +37 -0
- data/lib/coradoc/element/inline/underline.rb +19 -0
- data/lib/coradoc/element/inline.rb +5 -1
- data/lib/coradoc/element/list/core.rb +2 -2
- data/lib/coradoc/element/list/ordered.rb +1 -0
- data/lib/coradoc/element/list/unordered.rb +1 -0
- data/lib/coradoc/element/list_item.rb +19 -20
- data/lib/coradoc/element/table.rb +4 -2
- data/lib/coradoc/element/term.rb +1 -0
- data/lib/coradoc/element/text_element.rb +4 -1
- data/lib/coradoc/element/title.rb +1 -1
- data/lib/coradoc/element/video.rb +2 -2
- data/lib/coradoc/input/adoc.rb +20 -18
- data/lib/coradoc/input/docx.rb +25 -23
- data/lib/coradoc/input/html/README.adoc +1 -1
- data/lib/coradoc/input/html/cleaner.rb +121 -117
- data/lib/coradoc/input/html/config.rb +58 -56
- data/lib/coradoc/input/html/converters/a.rb +44 -39
- data/lib/coradoc/input/html/converters/aside.rb +12 -8
- data/lib/coradoc/input/html/converters/audio.rb +24 -20
- data/lib/coradoc/input/html/converters/base.rb +103 -99
- data/lib/coradoc/input/html/converters/blockquote.rb +18 -14
- data/lib/coradoc/input/html/converters/br.rb +11 -7
- data/lib/coradoc/input/html/converters/bypass.rb +77 -73
- data/lib/coradoc/input/html/converters/code.rb +18 -14
- data/lib/coradoc/input/html/converters/div.rb +15 -11
- data/lib/coradoc/input/html/converters/dl.rb +51 -44
- data/lib/coradoc/input/html/converters/drop.rb +21 -17
- data/lib/coradoc/input/html/converters/em.rb +16 -12
- data/lib/coradoc/input/html/converters/figure.rb +19 -15
- data/lib/coradoc/input/html/converters/h.rb +32 -30
- data/lib/coradoc/input/html/converters/head.rb +17 -13
- data/lib/coradoc/input/html/converters/hr.rb +11 -7
- data/lib/coradoc/input/html/converters/ignore.rb +15 -11
- data/lib/coradoc/input/html/converters/img.rb +98 -93
- data/lib/coradoc/input/html/converters/li.rb +13 -9
- data/lib/coradoc/input/html/converters/mark.rb +14 -10
- data/lib/coradoc/input/html/converters/markup.rb +22 -18
- data/lib/coradoc/input/html/converters/math.rb +26 -19
- data/lib/coradoc/input/html/converters/ol.rb +55 -50
- data/lib/coradoc/input/html/converters/p.rb +16 -12
- data/lib/coradoc/input/html/converters/pass_through.rb +12 -8
- data/lib/coradoc/input/html/converters/pre.rb +49 -45
- data/lib/coradoc/input/html/converters/q.rb +12 -8
- data/lib/coradoc/input/html/converters/strong.rb +15 -11
- data/lib/coradoc/input/html/converters/sub.rb +15 -11
- data/lib/coradoc/input/html/converters/sup.rb +15 -11
- data/lib/coradoc/input/html/converters/table.rb +21 -13
- data/lib/coradoc/input/html/converters/td.rb +64 -60
- data/lib/coradoc/input/html/converters/text.rb +24 -20
- data/lib/coradoc/input/html/converters/th.rb +13 -9
- data/lib/coradoc/input/html/converters/tr.rb +17 -13
- data/lib/coradoc/input/html/converters/video.rb +24 -20
- data/lib/coradoc/input/html/converters.rb +45 -43
- data/lib/coradoc/input/html/errors.rb +8 -6
- data/lib/coradoc/input/html/html_converter.rb +93 -90
- data/lib/coradoc/input/html/plugin.rb +104 -104
- data/lib/coradoc/input/html/plugins/plateau.rb +197 -190
- data/lib/coradoc/input/html/postprocessor.rb +188 -182
- data/lib/coradoc/input/html.rb +34 -32
- data/lib/coradoc/oscal.rb +18 -5
- data/lib/coradoc/output/adoc.rb +13 -11
- data/lib/coradoc/output/coradoc_tree_debug.rb +15 -13
- data/lib/coradoc/parser/asciidoc/admonition.rb +6 -6
- data/lib/coradoc/parser/asciidoc/attribute_list.rb +43 -27
- data/lib/coradoc/parser/asciidoc/base.rb +3 -6
- data/lib/coradoc/parser/asciidoc/bibliography.rb +5 -6
- data/lib/coradoc/parser/asciidoc/block.rb +30 -31
- data/lib/coradoc/parser/asciidoc/citation.rb +11 -29
- data/lib/coradoc/parser/asciidoc/content.rb +23 -33
- data/lib/coradoc/parser/asciidoc/document_attributes.rb +2 -3
- data/lib/coradoc/parser/asciidoc/header.rb +1 -2
- data/lib/coradoc/parser/asciidoc/inline.rb +165 -42
- data/lib/coradoc/parser/asciidoc/list.rb +27 -27
- data/lib/coradoc/parser/asciidoc/paragraph.rb +28 -19
- data/lib/coradoc/parser/asciidoc/section.rb +11 -17
- data/lib/coradoc/parser/asciidoc/table.rb +5 -5
- data/lib/coradoc/parser/asciidoc/term.rb +24 -8
- data/lib/coradoc/parser/asciidoc/text.rb +18 -21
- data/lib/coradoc/parser/base.rb +0 -3
- data/lib/coradoc/reverse_adoc.rb +3 -3
- data/lib/coradoc/transformer.rb +167 -137
- data/lib/coradoc/version.rb +1 -1
- data/lib/reverse_adoc.rb +1 -1
- data/utils/inspect_asciidoc.rb +29 -0
- data/utils/parser_analyzer.rb +14 -14
- data/utils/round_trip.rb +31 -15
- metadata +34 -137
- data/.hound.yml +0 -5
- data/lib/coradoc/element/inline/citation.rb +0 -24
- data/todo.md +0 -10
@@ -1,131 +1,131 @@
|
|
1
|
-
module Coradoc
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
@html_tree_hooks_pre = {}
|
20
|
-
@html_tree_hooks_post = {}
|
21
|
-
end
|
1
|
+
module Coradoc
|
2
|
+
module Input
|
3
|
+
module Html
|
4
|
+
class Plugin
|
5
|
+
#### Plugin system general
|
6
|
+
|
7
|
+
# Allow building plugins with a shorthand syntax:
|
8
|
+
# plugin = Coradoc::Input::Html::Plugin.new do
|
9
|
+
# def name = "Test"
|
10
|
+
# end
|
11
|
+
|
12
|
+
def self.new(&)
|
13
|
+
if self == Plugin
|
14
|
+
Class.new(Plugin, &)
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
22
19
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
20
|
+
def initialize
|
21
|
+
@html_tree_hooks_pre = {}
|
22
|
+
@html_tree_hooks_post = {}
|
23
|
+
end
|
27
24
|
|
28
|
-
|
25
|
+
# define name to name a Plugin
|
26
|
+
def name
|
27
|
+
self.class.name
|
28
|
+
end
|
29
29
|
|
30
|
-
|
30
|
+
#### HTML Tree functionalities
|
31
31
|
|
32
|
-
|
33
|
-
html_tree.css(css).each do |e|
|
34
|
-
e.name = new_name
|
35
|
-
end
|
36
|
-
end
|
32
|
+
attr_accessor :html_tree, :coradoc_tree, :asciidoc_string
|
37
33
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
34
|
+
def html_tree_change_tag_name_by_css(css, new_name)
|
35
|
+
html_tree.css(css).each do |e|
|
36
|
+
e.name = new_name
|
37
|
+
end
|
42
38
|
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def html_tree_remove_by_css(css)
|
47
|
-
html_tree.css(css).each(&:remove)
|
48
|
-
end
|
49
39
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
40
|
+
def html_tree_change_properties_by_css(css, properties)
|
41
|
+
html_tree.css(css).each do |e|
|
42
|
+
properties.each do |k, v|
|
43
|
+
e[k.to_s] = v
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
55
47
|
|
56
|
-
|
57
|
-
|
58
|
-
|
48
|
+
def html_tree_remove_by_css(css)
|
49
|
+
html_tree.css(css).each(&:remove)
|
50
|
+
end
|
59
51
|
|
60
|
-
|
61
|
-
|
62
|
-
|
52
|
+
def html_tree_replace_with_children_by_css(css)
|
53
|
+
html_tree.css(css).each do |e|
|
54
|
+
e.replace(e.children)
|
55
|
+
end
|
56
|
+
end
|
63
57
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
system "chromium-browser", "--no-sandbox", i.path
|
68
|
-
end
|
69
|
-
end
|
58
|
+
def html_tree_process_to_coradoc(tree, state = {})
|
59
|
+
Coradoc::Input::Html::Converters.process_coradoc(tree, state)
|
60
|
+
end
|
70
61
|
|
71
|
-
|
62
|
+
def html_tree_process_to_adoc(tree, state = {})
|
63
|
+
Coradoc::Input::Html::Converters.process(tree, state)
|
64
|
+
end
|
72
65
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
def html_tree_add_hook_pre(element, &block)
|
80
|
-
@html_tree_hooks_pre[element] = block
|
81
|
-
end
|
66
|
+
def html_tree_preview
|
67
|
+
Tempfile.open(%w"coradoc .html") do |i|
|
68
|
+
i << html_tree.to_html
|
69
|
+
system "chromium-browser", "--no-sandbox", i.path
|
70
|
+
end
|
71
|
+
end
|
82
72
|
|
83
|
-
|
84
|
-
html_tree.css(css).each do |e|
|
85
|
-
html_tree_add_hook_pre(e, &block)
|
86
|
-
end
|
87
|
-
end
|
73
|
+
# define preprocess_html_tree to process HTML trees
|
88
74
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
75
|
+
# Creates a hook to be called instead of converting an element
|
76
|
+
# to a Coradoc node.
|
77
|
+
#
|
78
|
+
# proc |html_node, state|
|
79
|
+
# coradoc_node
|
80
|
+
# end
|
81
|
+
def html_tree_add_hook_pre(element, &block)
|
82
|
+
@html_tree_hooks_pre[element] = block
|
83
|
+
end
|
98
84
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
85
|
+
def html_tree_add_hook_pre_by_css(css, &block)
|
86
|
+
html_tree.css(css).each do |e|
|
87
|
+
html_tree_add_hook_pre(e, &block)
|
88
|
+
end
|
89
|
+
end
|
104
90
|
|
105
|
-
|
106
|
-
|
107
|
-
|
91
|
+
# Creates a hook to be called after converting an element
|
92
|
+
# to a Coradoc node.
|
93
|
+
#
|
94
|
+
# proc |html_node, coradoc_node, state|
|
95
|
+
# coradoc_node
|
96
|
+
# end
|
97
|
+
def html_tree_add_hook_post(element, &block)
|
98
|
+
@html_tree_hooks_post[element] = block
|
99
|
+
end
|
108
100
|
|
109
|
-
|
110
|
-
|
101
|
+
def html_tree_add_hook_post_by_css(css, &block)
|
102
|
+
html_tree.css(css).each do |e|
|
103
|
+
html_tree_add_hook_post(e, &block)
|
104
|
+
end
|
105
|
+
end
|
111
106
|
|
112
|
-
|
113
|
-
|
114
|
-
|
107
|
+
def html_tree_run_hooks(node, state, &_block)
|
108
|
+
hook_pre = @html_tree_hooks_pre[node]
|
109
|
+
hook_post = @html_tree_hooks_post[node]
|
115
110
|
|
116
|
-
|
117
|
-
|
111
|
+
coradoc = hook_pre.(node, state) if hook_pre
|
112
|
+
coradoc ||= yield node, state
|
118
113
|
|
119
|
-
|
114
|
+
if hook_post
|
115
|
+
coradoc = hook_post.(node, coradoc, state)
|
116
|
+
end
|
120
117
|
|
121
|
-
|
118
|
+
coradoc
|
119
|
+
end
|
122
120
|
|
123
|
-
|
121
|
+
#### Coradoc tree functionalities
|
124
122
|
|
125
|
-
|
123
|
+
# define postprocess_coradoc_tree to change coradoc tree
|
126
124
|
|
127
|
-
|
125
|
+
#### AsciiDoc string functionalities
|
128
126
|
|
129
|
-
|
127
|
+
# define postprocess_asciidoc_string to change the coradoc string
|
128
|
+
end
|
129
|
+
end
|
130
130
|
end
|
131
131
|
end
|
@@ -1,206 +1,213 @@
|
|
1
|
-
module Coradoc
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
# reverse_adoc -rcoradoc/input/html/plugins/plateau
|
8
|
-
# --external-images -u raise --output _out/index.adoc index.html
|
9
|
-
class Plateau < Plugin
|
10
|
-
def name
|
11
|
-
"PLATEAU"
|
12
|
-
end
|
13
|
-
|
14
|
-
def preprocess_html_tree
|
15
|
-
# Let's simplify the tree by removing what's extraneous
|
16
|
-
# html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
|
17
|
-
# html_tree_replace_with_children_by_css("div.container_box")
|
18
|
-
# html_tree_replace_with_children_by_css("div.col.col-12")
|
19
|
-
# html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
|
20
|
-
# html_tree_replace_with_children_by_css("div.row")
|
1
|
+
module Coradoc
|
2
|
+
module Input
|
3
|
+
module Html
|
4
|
+
class Plugin
|
5
|
+
# This plugin enhances documents from the PLATEAU project
|
6
|
+
# to extract more data.
|
21
7
|
#
|
22
|
-
#
|
23
|
-
|
24
|
-
#
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
html_tree_change_tag_name_by_css(".titledata", "h1")
|
29
|
-
html_tree_change_tag_name_by_css(".subtitledata", "h2")
|
30
|
-
html_tree_change_tag_name_by_css(".pitemdata", "h3")
|
31
|
-
html_tree_change_tag_name_by_css(".sitemdata", "h4")
|
32
|
-
html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
|
33
|
-
html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
|
34
|
-
html_tree_change_tag_name_by_css('.framedata, .frame_container_box', 'aside')
|
35
|
-
html_tree_change_tag_name_by_css('.frame2data', 'pre')
|
36
|
-
# Assumption that all code snippets in those documents are XML...
|
37
|
-
html_tree_change_properties_by_css(".frame2data", class: "brush:xml;")
|
38
|
-
|
39
|
-
# Remove some CSS ids that are not important to us
|
40
|
-
html_tree_change_properties_by_css("#__nuxt", id: nil)
|
41
|
-
html_tree_change_properties_by_css("#__layout", id: nil)
|
42
|
-
html_tree_change_properties_by_css("#app", id: nil)
|
43
|
-
|
44
|
-
# Handle lists of document 02
|
45
|
-
html_tree_replace_with_children_by_css(".list_num-wrap")
|
46
|
-
|
47
|
-
# Convert table/img caption to become a caption
|
48
|
-
html_tree.css(".imagedata").each do |e|
|
49
|
-
table = e.parent.next&.children&.first
|
50
|
-
if table&.name == "table"
|
51
|
-
e.name = "caption"
|
52
|
-
table.prepend_child(e)
|
53
|
-
next
|
54
|
-
end
|
55
|
-
|
56
|
-
img = e.parent.previous&.children&.first
|
57
|
-
if img&.name == "img" && img["src"]
|
58
|
-
title = e.text.strip
|
59
|
-
img["title"] = title
|
60
|
-
e.remove
|
61
|
-
next
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# Add hooks for H1, H2, H3, H4
|
66
|
-
html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
|
67
|
-
html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
|
68
|
-
|
69
|
-
# Table cells aligned to center
|
70
|
-
html_tree_change_properties_by_css(".tableTopCenter", align: "center")
|
71
|
-
|
72
|
-
# Handle non-semantic lists and indentation
|
73
|
-
html_tree_add_hook_pre_by_css ".text2data" do |node,|
|
74
|
-
text = html_tree_process_to_adoc(node).strip
|
75
|
-
next "" if text.empty? || text == "\u3000"
|
76
|
-
|
77
|
-
if text.start_with?(/\d+\./)
|
78
|
-
text = text.sub(/\A\d+.\s*/, "")
|
79
|
-
".. #{text}\n"
|
80
|
-
else
|
81
|
-
text = text.gsub(/^/, "** ")
|
82
|
-
"\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
(3..4).each do |i|
|
87
|
-
html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
|
88
|
-
text = html_tree_process_to_adoc(node).strip
|
89
|
-
next "" if text.empty? || text == "\u3000"
|
90
|
-
|
91
|
-
text = text.strip.gsub(/^/, "#{'*' * i} ")
|
92
|
-
"\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
(2..3).each do |i|
|
97
|
-
html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
|
98
|
-
text = html_tree_process_to_adoc(node.children.first.children).strip
|
99
|
-
|
100
|
-
"#{'*' * i} #{text}\n"
|
8
|
+
# Usage:
|
9
|
+
# reverse_adoc -rcoradoc/input/html/plugins/plateau
|
10
|
+
# --external-images -u raise --output _out/index.adoc index.html
|
11
|
+
class Plateau < Plugin
|
12
|
+
def name
|
13
|
+
"PLATEAU"
|
101
14
|
end
|
102
|
-
end
|
103
15
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
"
|
16
|
+
def preprocess_html_tree
|
17
|
+
# Let's simplify the tree by removing what's extraneous
|
18
|
+
# html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
|
19
|
+
# html_tree_replace_with_children_by_css("div.container_box")
|
20
|
+
# html_tree_replace_with_children_by_css("div.col.col-12")
|
21
|
+
# html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
|
22
|
+
# html_tree_replace_with_children_by_css("div.row")
|
23
|
+
#
|
24
|
+
# We can remove that, but it messes up the images and paragraphs.
|
25
|
+
|
26
|
+
# Remove side menu, so we can generate TOC ourselves
|
27
|
+
html_tree_remove_by_css(".sideMenu")
|
28
|
+
|
29
|
+
# Correct non-semantic classes into semantic HTML tags
|
30
|
+
html_tree_change_tag_name_by_css(".titledata", "h1")
|
31
|
+
html_tree_change_tag_name_by_css(".subtitledata", "h2")
|
32
|
+
html_tree_change_tag_name_by_css(".pitemdata", "h3")
|
33
|
+
html_tree_change_tag_name_by_css(".sitemdata", "h4")
|
34
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
|
35
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
|
36
|
+
html_tree_change_tag_name_by_css(".framedata, .frame_container_box",
|
37
|
+
"aside")
|
38
|
+
html_tree_change_tag_name_by_css(".frame2data", "pre")
|
39
|
+
# Assumption that all code snippets in those documents are XML...
|
40
|
+
html_tree_change_properties_by_css(".frame2data",
|
41
|
+
class: "brush:xml;")
|
42
|
+
|
43
|
+
# Remove some CSS ids that are not important to us
|
44
|
+
html_tree_change_properties_by_css("#__nuxt", id: nil)
|
45
|
+
html_tree_change_properties_by_css("#__layout", id: nil)
|
46
|
+
html_tree_change_properties_by_css("#app", id: nil)
|
47
|
+
|
48
|
+
# Handle lists of document 02
|
49
|
+
html_tree_replace_with_children_by_css(".list_num-wrap")
|
50
|
+
|
51
|
+
# Convert table/img caption to become a caption
|
52
|
+
html_tree.css(".imagedata").each do |e|
|
53
|
+
table = e.parent.next&.children&.first
|
54
|
+
if table&.name == "table"
|
55
|
+
e.name = "caption"
|
56
|
+
table.prepend_child(e)
|
57
|
+
next
|
58
|
+
end
|
59
|
+
|
60
|
+
img = e.parent.previous&.children&.first
|
61
|
+
if img&.name == "img" && img["src"]
|
62
|
+
title = e.text.strip
|
63
|
+
img["title"] = title
|
64
|
+
e.remove
|
65
|
+
next
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Add hooks for H1, H2, H3, H4
|
70
|
+
html_tree_add_hook_post_by_css("h1, h2, h3",
|
71
|
+
&method(:handle_headers))
|
72
|
+
html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
|
73
|
+
|
74
|
+
# Table cells aligned to center
|
75
|
+
html_tree_change_properties_by_css(".tableTopCenter",
|
76
|
+
align: "center")
|
77
|
+
|
78
|
+
# Handle non-semantic lists and indentation
|
79
|
+
html_tree_add_hook_pre_by_css ".text2data" do |node,|
|
80
|
+
text = html_tree_process_to_adoc(node).strip
|
81
|
+
next "" if text.empty? || text == "\u3000"
|
82
|
+
|
83
|
+
if text.start_with?(/\d+\./)
|
84
|
+
text = text.sub(/\A\d+.\s*/, "")
|
85
|
+
".. #{text}\n"
|
86
|
+
else
|
87
|
+
text = text.gsub(/^/, "** ")
|
88
|
+
"\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
(3..4).each do |i|
|
93
|
+
html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
|
94
|
+
text = html_tree_process_to_adoc(node).strip
|
95
|
+
next "" if text.empty? || text == "\u3000"
|
96
|
+
|
97
|
+
text = text.strip.gsub(/^/, "#{'*' * i} ")
|
98
|
+
"\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
(2..3).each do |i|
|
103
|
+
html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
|
104
|
+
text = html_tree_process_to_adoc(node.children.first.children).strip
|
105
|
+
|
106
|
+
"#{'*' * i} #{text}\n"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
(1..20).each do |i|
|
111
|
+
html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
|
112
|
+
text = html_tree_process_to_adoc(node).strip
|
113
|
+
|
114
|
+
"[start=#{i}]\n. #{text}\n"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# html_tree_preview
|
109
119
|
end
|
110
|
-
end
|
111
|
-
|
112
|
-
# html_tree_preview
|
113
|
-
end
|
114
120
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
121
|
+
IM = /[A-Z0-9]{1,3}/
|
122
|
+
|
123
|
+
def handle_headers(node, coradoc, _state)
|
124
|
+
content = coradoc.content.map(&:content).join
|
125
|
+
|
126
|
+
if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
|
127
|
+
# Special content
|
128
|
+
case content.strip
|
129
|
+
when "はじめに" # Introduction
|
130
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
131
|
+
coradoc.level_int = 1
|
132
|
+
when "改定の概要" # Revision overview
|
133
|
+
coradoc.style = "abstract" # The older version document has ".preface"
|
134
|
+
coradoc.level_int = 1
|
135
|
+
when "参考文献" # Bibliography
|
136
|
+
coradoc.style = "bibliography"
|
137
|
+
coradoc.level_int = 1
|
138
|
+
when "改訂履歴" # Document history
|
139
|
+
coradoc.style = "appendix"
|
140
|
+
coradoc.level_int = 1
|
141
|
+
when "0 概要" # Overview
|
142
|
+
coradoc.style = "abstract" # I'm not sure this is correct
|
143
|
+
coradoc.level_int = 1
|
144
|
+
when "索引" # Index
|
145
|
+
coradoc.style = "index" # I'm not sure this is correct
|
146
|
+
coradoc.level_int = 1
|
147
|
+
else
|
148
|
+
warn "Unknown section #{content.inspect}"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
if node.name == "h1" && content.start_with?("Annex")
|
153
|
+
coradoc.style = "appendix"
|
154
|
+
coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, "")
|
155
|
+
end
|
156
|
+
|
157
|
+
# Remove numbers
|
158
|
+
coradoc.content.first.content.sub!(/\A(#{IM}\.)*#{IM}[[:space:]]/o,
|
159
|
+
"")
|
160
|
+
|
161
|
+
coradoc
|
143
162
|
end
|
144
|
-
end
|
145
163
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
164
|
+
def handle_headers_h4(_node, coradoc, _state)
|
165
|
+
title = Coradoc.strip_unicode(coradoc.content.first.content)
|
166
|
+
case title
|
167
|
+
when /\A\(\d+\)(.*)/
|
168
|
+
coradoc.level_int = 4
|
169
|
+
coradoc.content.first.content = $1.strip
|
170
|
+
coradoc
|
171
|
+
when /\A\d+\)(.*)/
|
172
|
+
coradoc.level_int = 5
|
173
|
+
coradoc.content.first.content = $1.strip
|
174
|
+
coradoc
|
175
|
+
when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/o
|
176
|
+
coradoc.level_int = 4
|
177
|
+
coradoc.content.first.content = $1.strip
|
178
|
+
else
|
179
|
+
if title.empty?
|
180
|
+
# Strip instances of faulty empty paragraphs
|
181
|
+
nil
|
182
|
+
else
|
183
|
+
["// FIXME\n", coradoc]
|
184
|
+
end
|
185
|
+
end
|
150
186
|
end
|
151
|
-
end
|
152
187
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
coradoc.level_int = 4
|
172
|
-
coradoc.content.first.content = $1.strip
|
173
|
-
else
|
174
|
-
if title.empty?
|
175
|
-
# Strip instances of faulty empty paragraphs
|
176
|
-
nil
|
177
|
-
else
|
178
|
-
["// FIXME\n", coradoc]
|
188
|
+
def postprocess_asciidoc_string
|
189
|
+
str = asciidoc_string
|
190
|
+
|
191
|
+
### Custom indentation handling
|
192
|
+
# If there's a step up, add [none]
|
193
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
|
194
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
|
195
|
+
str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
|
196
|
+
# Collapse blocks of text[2,3]data
|
197
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
|
198
|
+
# In the beginning, add [none]
|
199
|
+
str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
|
200
|
+
# If following with another list, ensure we readd styling
|
201
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
|
202
|
+
# Otherwise, clean up
|
203
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
|
204
|
+
|
205
|
+
self.asciidoc_string = str
|
179
206
|
end
|
180
207
|
end
|
181
208
|
end
|
182
|
-
|
183
|
-
def postprocess_asciidoc_string
|
184
|
-
str = self.asciidoc_string
|
185
|
-
|
186
|
-
### Custom indentation handling
|
187
|
-
# If there's a step up, add [none]
|
188
|
-
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
|
189
|
-
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
|
190
|
-
str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
|
191
|
-
# Collapse blocks of text[2,3]data
|
192
|
-
str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
|
193
|
-
# In the beginning, add [none]
|
194
|
-
str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
|
195
|
-
# If following with another list, ensure we readd styling
|
196
|
-
str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
|
197
|
-
# Otherwise, clean up
|
198
|
-
str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
|
199
|
-
|
200
|
-
self.asciidoc_string = str
|
201
|
-
end
|
202
209
|
end
|
203
210
|
end
|
204
211
|
end
|
205
212
|
|
206
|
-
Coradoc::Input::
|
213
|
+
Coradoc::Input::Html.config.plugins << Coradoc::Input::Html::Plugin::Plateau
|