coradoc-html 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/html/cleaner.rb +128 -0
- data/lib/coradoc/html/converters/a.rb +77 -0
- data/lib/coradoc/html/converters/aside.rb +20 -0
- data/lib/coradoc/html/converters/audio.rb +19 -0
- data/lib/coradoc/html/converters/base.rb +98 -0
- data/lib/coradoc/html/converters/blockquote.rb +25 -0
- data/lib/coradoc/html/converters/br.rb +17 -0
- data/lib/coradoc/html/converters/bypass.rb +82 -0
- data/lib/coradoc/html/converters/code.rb +25 -0
- data/lib/coradoc/html/converters/div.rb +23 -0
- data/lib/coradoc/html/converters/dl.rb +82 -0
- data/lib/coradoc/html/converters/drop.rb +26 -0
- data/lib/coradoc/html/converters/em.rb +23 -0
- data/lib/coradoc/html/converters/figure.rb +33 -0
- data/lib/coradoc/html/converters/h.rb +58 -0
- data/lib/coradoc/html/converters/head.rb +29 -0
- data/lib/coradoc/html/converters/hr.rb +17 -0
- data/lib/coradoc/html/converters/img.rb +103 -0
- data/lib/coradoc/html/converters/li.rb +35 -0
- data/lib/coradoc/html/converters/mark.rb +21 -0
- data/lib/coradoc/html/converters/markup.rb +93 -0
- data/lib/coradoc/html/converters/math.rb +37 -0
- data/lib/coradoc/html/converters/media_base.rb +48 -0
- data/lib/coradoc/html/converters/ol.rb +42 -0
- data/lib/coradoc/html/converters/p.rb +64 -0
- data/lib/coradoc/html/converters/pass_through.rb +15 -0
- data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
- data/lib/coradoc/html/converters/pre.rb +57 -0
- data/lib/coradoc/html/converters/q.rb +25 -0
- data/lib/coradoc/html/converters/strong.rb +22 -0
- data/lib/coradoc/html/converters/sub.rb +20 -0
- data/lib/coradoc/html/converters/sup.rb +20 -0
- data/lib/coradoc/html/converters/table.rb +64 -0
- data/lib/coradoc/html/converters/td.rb +42 -0
- data/lib/coradoc/html/converters/text.rb +66 -0
- data/lib/coradoc/html/converters/tr.rb +27 -0
- data/lib/coradoc/html/converters/video.rb +27 -0
- data/lib/coradoc/html/converters.rb +104 -0
- data/lib/coradoc/html/drop/drop_factory.rb +14 -22
- data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
- data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
- data/lib/coradoc/html/drop.rb +30 -8
- data/lib/coradoc/html/errors.rb +11 -0
- data/lib/coradoc/html/html_converter.rb +78 -0
- data/lib/coradoc/html/input_config.rb +66 -0
- data/lib/coradoc/html/plugin.rb +90 -0
- data/lib/coradoc/html/plugins/plateau.rb +212 -0
- data/lib/coradoc/html/postprocessor.rb +19 -0
- data/lib/coradoc/html/spa.rb +0 -2
- data/lib/coradoc/html/static.rb +0 -2
- data/lib/coradoc/html/tag_mapping.rb +3 -1
- data/lib/coradoc/html/transform/from_core_model.rb +2 -2
- data/lib/coradoc/html/transform/to_core_model.rb +3 -3
- data/lib/coradoc/html/version.rb +1 -1
- data/lib/coradoc/html.rb +30 -5
- metadata +46 -47
- data/lib/coradoc/html/input/cleaner.rb +0 -134
- data/lib/coradoc/html/input/config.rb +0 -80
- data/lib/coradoc/html/input/converters/a.rb +0 -79
- data/lib/coradoc/html/input/converters/aside.rb +0 -22
- data/lib/coradoc/html/input/converters/audio.rb +0 -21
- data/lib/coradoc/html/input/converters/base.rb +0 -118
- data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
- data/lib/coradoc/html/input/converters/br.rb +0 -19
- data/lib/coradoc/html/input/converters/bypass.rb +0 -84
- data/lib/coradoc/html/input/converters/code.rb +0 -27
- data/lib/coradoc/html/input/converters/div.rb +0 -25
- data/lib/coradoc/html/input/converters/dl.rb +0 -84
- data/lib/coradoc/html/input/converters/drop.rb +0 -28
- data/lib/coradoc/html/input/converters/em.rb +0 -25
- data/lib/coradoc/html/input/converters/figure.rb +0 -35
- data/lib/coradoc/html/input/converters/h.rb +0 -74
- data/lib/coradoc/html/input/converters/head.rb +0 -31
- data/lib/coradoc/html/input/converters/hr.rb +0 -19
- data/lib/coradoc/html/input/converters/img.rb +0 -105
- data/lib/coradoc/html/input/converters/li.rb +0 -37
- data/lib/coradoc/html/input/converters/mark.rb +0 -23
- data/lib/coradoc/html/input/converters/markup.rb +0 -103
- data/lib/coradoc/html/input/converters/math.rb +0 -39
- data/lib/coradoc/html/input/converters/media_base.rb +0 -50
- data/lib/coradoc/html/input/converters/ol.rb +0 -44
- data/lib/coradoc/html/input/converters/p.rb +0 -90
- data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
- data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
- data/lib/coradoc/html/input/converters/pre.rb +0 -59
- data/lib/coradoc/html/input/converters/q.rb +0 -27
- data/lib/coradoc/html/input/converters/strong.rb +0 -24
- data/lib/coradoc/html/input/converters/sub.rb +0 -22
- data/lib/coradoc/html/input/converters/sup.rb +0 -22
- data/lib/coradoc/html/input/converters/table.rb +0 -66
- data/lib/coradoc/html/input/converters/td.rb +0 -44
- data/lib/coradoc/html/input/converters/text.rb +0 -68
- data/lib/coradoc/html/input/converters/tr.rb +0 -29
- data/lib/coradoc/html/input/converters/video.rb +0 -29
- data/lib/coradoc/html/input/converters.rb +0 -107
- data/lib/coradoc/html/input/errors.rb +0 -22
- data/lib/coradoc/html/input/html_converter.rb +0 -98
- data/lib/coradoc/html/input/plugin.rb +0 -120
- data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
- data/lib/coradoc/html/input/postprocessor.rb +0 -25
- data/lib/coradoc/html/input.rb +0 -86
- data/lib/coradoc/html/output.rb +0 -89
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'tmpdir'
|
|
4
|
+
|
|
5
|
+
module Coradoc
|
|
6
|
+
module Html
|
|
7
|
+
class InputConfig
|
|
8
|
+
def initialize
|
|
9
|
+
@unknown_tags = :pass_through
|
|
10
|
+
@input_format = :html
|
|
11
|
+
@mathml2asciimath = false
|
|
12
|
+
@external_images = false
|
|
13
|
+
|
|
14
|
+
@destination = nil
|
|
15
|
+
@sourcedir = nil
|
|
16
|
+
|
|
17
|
+
@image_counter = 1
|
|
18
|
+
@image_counter_pattern = '%03d'
|
|
19
|
+
|
|
20
|
+
@em_delimiter = '_'
|
|
21
|
+
@strong_delimiter = '*'
|
|
22
|
+
@inline_options = {}
|
|
23
|
+
@tag_border = ' '
|
|
24
|
+
|
|
25
|
+
@split_sections = nil
|
|
26
|
+
|
|
27
|
+
@doc_width = 1000
|
|
28
|
+
|
|
29
|
+
@plugins = []
|
|
30
|
+
|
|
31
|
+
@track_time = false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def with(options = {})
|
|
35
|
+
old_options = @inline_options
|
|
36
|
+
@inline_options = options
|
|
37
|
+
result = yield
|
|
38
|
+
@inline_options = old_options
|
|
39
|
+
result
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.declare_option(option)
|
|
43
|
+
attr_accessor option
|
|
44
|
+
|
|
45
|
+
original_reader = instance_method(option)
|
|
46
|
+
define_method(option) do
|
|
47
|
+
@inline_options[option] || original_reader.bind_call(self)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
declare_option :unknown_tags
|
|
52
|
+
declare_option :tag_border
|
|
53
|
+
declare_option :mathml2asciimath
|
|
54
|
+
declare_option :external_images
|
|
55
|
+
declare_option :destination
|
|
56
|
+
declare_option :sourcedir
|
|
57
|
+
declare_option :image_counter
|
|
58
|
+
declare_option :image_counter_pattern
|
|
59
|
+
declare_option :input_format
|
|
60
|
+
declare_option :split_sections
|
|
61
|
+
declare_option :doc_width
|
|
62
|
+
declare_option :plugins
|
|
63
|
+
declare_option :track_time
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
class Plugin
|
|
6
|
+
def self.new(&)
|
|
7
|
+
if self == Plugin
|
|
8
|
+
Class.new(Plugin, &)
|
|
9
|
+
else
|
|
10
|
+
super
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize
|
|
15
|
+
@html_tree_hooks_pre = {}
|
|
16
|
+
@html_tree_hooks_post = {}
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def name
|
|
20
|
+
self.class.name
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def preprocess_html_tree; end
|
|
24
|
+
def postprocess_coremodel_tree; end
|
|
25
|
+
def postprocess_output_string; end
|
|
26
|
+
|
|
27
|
+
attr_accessor :html_tree, :coremodel_tree, :output_string
|
|
28
|
+
|
|
29
|
+
def html_tree_change_tag_name_by_css(css, new_name)
|
|
30
|
+
html_tree.css(css).each do |e|
|
|
31
|
+
e.name = new_name
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def html_tree_change_properties_by_css(css, properties)
|
|
36
|
+
html_tree.css(css).each do |e|
|
|
37
|
+
properties.each do |k, v|
|
|
38
|
+
e[k.to_s] = v
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def html_tree_remove_by_css(css)
|
|
44
|
+
html_tree.css(css).each(&:remove)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def html_tree_replace_with_children_by_css(css)
|
|
48
|
+
html_tree.css(css).each do |e|
|
|
49
|
+
e.replace(e.children)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def html_tree_process_to_coremodel(tree, state = {})
|
|
54
|
+
Coradoc::Html::Converters.process_coradoc(tree, state)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def html_tree_add_hook_pre(element, &block)
|
|
58
|
+
@html_tree_hooks_pre[element] = block
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def html_tree_add_hook_pre_by_css(css, &block)
|
|
62
|
+
html_tree.css(css).each do |e|
|
|
63
|
+
html_tree_add_hook_pre(e, &block)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def html_tree_add_hook_post(element, &block)
|
|
68
|
+
@html_tree_hooks_post[element] = block
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def html_tree_add_hook_post_by_css(css, &block)
|
|
72
|
+
html_tree.css(css).each do |e|
|
|
73
|
+
html_tree_add_hook_post(e, &block)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def html_tree_run_hooks(node, state, &)
|
|
78
|
+
hook_pre = @html_tree_hooks_pre[node]
|
|
79
|
+
hook_post = @html_tree_hooks_post[node]
|
|
80
|
+
|
|
81
|
+
coremodel = hook_pre.call(node, state) if hook_pre
|
|
82
|
+
coremodel ||= yield node, state
|
|
83
|
+
|
|
84
|
+
coremodel = hook_post.call(node, coremodel, state) if hook_post
|
|
85
|
+
|
|
86
|
+
coremodel
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
class Plugin
|
|
6
|
+
# This plugin enhances documents from the PLATEAU project
|
|
7
|
+
# to extract more data.
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# coradoc convert -rcoradoc/html/plugins/plateau
|
|
11
|
+
# --external-images -u raise --output _out/index.adoc index.html
|
|
12
|
+
class Plateau < Plugin
|
|
13
|
+
def name
|
|
14
|
+
'PLATEAU'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def preprocess_html_tree
|
|
18
|
+
# Remove side menu, so we can generate TOC ourselves
|
|
19
|
+
html_tree_remove_by_css('.sideMenu')
|
|
20
|
+
|
|
21
|
+
# Correct non-semantic classes into semantic HTML tags
|
|
22
|
+
html_tree_change_tag_name_by_css('.titledata', 'h1')
|
|
23
|
+
html_tree_change_tag_name_by_css('.subtitledata', 'h2')
|
|
24
|
+
html_tree_change_tag_name_by_css('.pitemdata', 'h3')
|
|
25
|
+
html_tree_change_tag_name_by_css('.sitemdata', 'h4')
|
|
26
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', 'th')
|
|
27
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', 'th')
|
|
28
|
+
html_tree_change_tag_name_by_css(
|
|
29
|
+
'.framedata, .frame_container_box',
|
|
30
|
+
'aside'
|
|
31
|
+
)
|
|
32
|
+
html_tree_change_tag_name_by_css('.frame2data', 'pre')
|
|
33
|
+
# Assumption that all code snippets in those documents are XML...
|
|
34
|
+
html_tree_change_properties_by_css(
|
|
35
|
+
'.frame2data',
|
|
36
|
+
class: 'brush:xml;'
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Remove some CSS ids that are not important to us
|
|
40
|
+
html_tree_change_properties_by_css('#__nuxt', id: nil)
|
|
41
|
+
html_tree_change_properties_by_css('#__layout', id: nil)
|
|
42
|
+
html_tree_change_properties_by_css('#app', id: nil)
|
|
43
|
+
|
|
44
|
+
# Handle lists of document 02
|
|
45
|
+
html_tree_replace_with_children_by_css('.list_num-wrap')
|
|
46
|
+
|
|
47
|
+
# Convert table/img caption to become a caption
|
|
48
|
+
html_tree.css('.imagedata').each do |e|
|
|
49
|
+
table = e.parent.next&.children&.first
|
|
50
|
+
if table&.name == 'table'
|
|
51
|
+
e.name = 'caption'
|
|
52
|
+
table.prepend_child(e)
|
|
53
|
+
next
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
img = e.parent.previous&.children&.first
|
|
57
|
+
next unless img&.name == 'img' && img['src']
|
|
58
|
+
|
|
59
|
+
title = e.text.strip
|
|
60
|
+
img['title'] = title
|
|
61
|
+
e.remove
|
|
62
|
+
next
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Add hooks for H1, H2, H3, H4
|
|
66
|
+
html_tree_add_hook_post_by_css(
|
|
67
|
+
'h1, h2, h3',
|
|
68
|
+
&method(:handle_headers)
|
|
69
|
+
)
|
|
70
|
+
html_tree_add_hook_post_by_css('h4', &method(:handle_headers_h4))
|
|
71
|
+
|
|
72
|
+
# Table cells aligned to center
|
|
73
|
+
html_tree_change_properties_by_css(
|
|
74
|
+
'.tableTopCenter',
|
|
75
|
+
align: 'center'
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Handle non-semantic lists and indentation
|
|
79
|
+
html_tree_add_hook_pre_by_css '.text2data' do |node,|
|
|
80
|
+
text = html_tree_process_to_coremodel(node).strip
|
|
81
|
+
next '' if text.empty? || text == "\u3000"
|
|
82
|
+
|
|
83
|
+
if text.start_with?(/\d+\./)
|
|
84
|
+
text = text.sub(/\A\d+.\s*/, '')
|
|
85
|
+
".. #{text}\n"
|
|
86
|
+
else
|
|
87
|
+
text = text.gsub(/^/, '** ')
|
|
88
|
+
"\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
(3..4).each do |i|
|
|
93
|
+
html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
|
|
94
|
+
text = html_tree_process_to_coremodel(node).strip
|
|
95
|
+
next '' if text.empty? || text == "\u3000"
|
|
96
|
+
|
|
97
|
+
text = text.strip.gsub(/^/, "#{'*' * i} ")
|
|
98
|
+
"\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
(2..3).each do |i|
|
|
103
|
+
html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
|
|
104
|
+
text = html_tree_process_to_coremodel(node.children.first.children).strip
|
|
105
|
+
|
|
106
|
+
"#{'*' * i} #{text}\n"
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
(1..20).each do |i|
|
|
111
|
+
html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
|
|
112
|
+
text = html_tree_process_to_coremodel(node).strip
|
|
113
|
+
|
|
114
|
+
"[start=#{i}]\n. #{text}\n"
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
IM = /[A-Z0-9]{1,3}/
|
|
120
|
+
|
|
121
|
+
def handle_headers(node, coradoc, _state)
|
|
122
|
+
content = coradoc.content.map(&:content).join
|
|
123
|
+
|
|
124
|
+
if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
|
|
125
|
+
# Special content
|
|
126
|
+
case content.strip
|
|
127
|
+
when 'はじめに' # Introduction
|
|
128
|
+
coradoc.style = 'abstract' # The older version document has ".preface"
|
|
129
|
+
coradoc.level_int = 1
|
|
130
|
+
when '改定の概要' # Revision overview
|
|
131
|
+
coradoc.style = 'abstract' # The older version document has ".preface"
|
|
132
|
+
coradoc.level_int = 1
|
|
133
|
+
when '参考文献' # Bibliography
|
|
134
|
+
coradoc.style = 'bibliography'
|
|
135
|
+
coradoc.level_int = 1
|
|
136
|
+
when '改訂履歴' # Document history
|
|
137
|
+
coradoc.style = 'appendix'
|
|
138
|
+
coradoc.level_int = 1
|
|
139
|
+
when '0 概要' # Overview
|
|
140
|
+
coradoc.style = 'abstract' # I'm not sure this is correct
|
|
141
|
+
coradoc.level_int = 1
|
|
142
|
+
when '索引' # Index
|
|
143
|
+
coradoc.style = 'index' # I'm not sure this is correct
|
|
144
|
+
coradoc.level_int = 1
|
|
145
|
+
else
|
|
146
|
+
warn "Unknown section #{content.inspect}"
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
if node.name == 'h1' && content.start_with?('Annex')
|
|
151
|
+
coradoc.style = 'appendix'
|
|
152
|
+
coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, '')
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Remove numbers
|
|
156
|
+
coradoc.content.first.content.sub!(
|
|
157
|
+
/\A(#{IM}\.)*#{IM}[[:space:]]/o,
|
|
158
|
+
''
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
coradoc
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def handle_headers_h4(_node, coradoc, _state)
|
|
165
|
+
title = Coradoc.strip_unicode(coradoc.content.first.content)
|
|
166
|
+
case title
|
|
167
|
+
when /\A\(\d+\)(.*)/
|
|
168
|
+
coradoc.level_int = 4
|
|
169
|
+
coradoc.content.first.content = ::Regexp.last_match(1).strip
|
|
170
|
+
coradoc
|
|
171
|
+
when /\A\d+\)(.*)/
|
|
172
|
+
coradoc.level_int = 5
|
|
173
|
+
coradoc.content.first.content = ::Regexp.last_match(1).strip
|
|
174
|
+
coradoc
|
|
175
|
+
when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/o
|
|
176
|
+
coradoc.level_int = 4
|
|
177
|
+
coradoc.content.first.content = ::Regexp.last_match(1).strip
|
|
178
|
+
else
|
|
179
|
+
if title.empty?
|
|
180
|
+
# Strip instances of faulty empty paragraphs
|
|
181
|
+
nil
|
|
182
|
+
else
|
|
183
|
+
["// Unhandled h4 content\n", coradoc]
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def postprocess_output_string
|
|
189
|
+
str = output_string
|
|
190
|
+
|
|
191
|
+
### Custom indentation handling
|
|
192
|
+
# If there's a step up, add [none]
|
|
193
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
|
|
194
|
+
str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
|
|
195
|
+
str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
|
|
196
|
+
# Collapse blocks of text[2,3]data
|
|
197
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
|
|
198
|
+
# In the beginning, add [none]
|
|
199
|
+
str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
|
|
200
|
+
# If following with another list, ensure we readd styling
|
|
201
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
|
|
202
|
+
# Otherwise, clean up
|
|
203
|
+
str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
|
|
204
|
+
|
|
205
|
+
self.output_string = str
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
Coradoc::Html.input_config.plugins << Coradoc::Html::Plugin::Plateau
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
class Postprocessor
|
|
6
|
+
def self.process(coradoc)
|
|
7
|
+
new(coradoc).process
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def initialize(coradoc)
|
|
11
|
+
@tree = coradoc
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def process
|
|
15
|
+
@tree
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/coradoc/html/spa.rb
CHANGED
data/lib/coradoc/html/static.rb
CHANGED
|
@@ -10,8 +10,6 @@ module Coradoc
|
|
|
10
10
|
# This transformer converts CoreModel to HTML strings by delegating
|
|
11
11
|
# to the existing theme/renderer pipeline.
|
|
12
12
|
class FromCoreModel
|
|
13
|
-
include Coradoc::Transform::Base
|
|
14
|
-
|
|
15
13
|
class << self
|
|
16
14
|
# Transform a CoreModel to HTML string
|
|
17
15
|
#
|
|
@@ -29,6 +27,8 @@ module Coradoc
|
|
|
29
27
|
end
|
|
30
28
|
end
|
|
31
29
|
end
|
|
30
|
+
|
|
31
|
+
def transform(model, options = {}) = self.class.transform(model, options)
|
|
32
32
|
end
|
|
33
33
|
end
|
|
34
34
|
end
|
|
@@ -12,8 +12,6 @@ module Coradoc
|
|
|
12
12
|
# Nokogiri::XML::Document or Nokogiri::XML::Node objects into CoreModel
|
|
13
13
|
# by delegating to the existing input converter pipeline.
|
|
14
14
|
class ToCoreModel
|
|
15
|
-
include Coradoc::Transform::Base
|
|
16
|
-
|
|
17
15
|
class << self
|
|
18
16
|
# Transform an HTML model (Nokogiri node) to CoreModel
|
|
19
17
|
#
|
|
@@ -25,7 +23,7 @@ module Coradoc
|
|
|
25
23
|
when Coradoc::CoreModel::Base
|
|
26
24
|
model
|
|
27
25
|
when Nokogiri::XML::Document, Nokogiri::XML::Node
|
|
28
|
-
::Coradoc::
|
|
26
|
+
::Coradoc::Html::HtmlConverter.to_core_model(model)
|
|
29
27
|
when Array
|
|
30
28
|
model.map { |item| transform(item) }
|
|
31
29
|
else
|
|
@@ -33,6 +31,8 @@ module Coradoc
|
|
|
33
31
|
end
|
|
34
32
|
end
|
|
35
33
|
end
|
|
34
|
+
|
|
35
|
+
def transform(model) = self.class.transform(model)
|
|
36
36
|
end
|
|
37
37
|
end
|
|
38
38
|
end
|
data/lib/coradoc/html/version.rb
CHANGED
data/lib/coradoc/html.rb
CHANGED
|
@@ -12,10 +12,35 @@ module Coradoc
|
|
|
12
12
|
end
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
module Coradoc
|
|
16
|
+
module Html
|
|
17
|
+
autoload :InputConfig, 'coradoc/html/input_config'
|
|
18
|
+
autoload :Cleaner, 'coradoc/html/cleaner'
|
|
19
|
+
autoload :Errors, 'coradoc/html/errors'
|
|
20
|
+
autoload :Plugin, 'coradoc/html/plugin'
|
|
21
|
+
autoload :Postprocessor, 'coradoc/html/postprocessor'
|
|
22
|
+
autoload :Converters, 'coradoc/html/converters'
|
|
23
|
+
autoload :HtmlConverter, 'coradoc/html/html_converter'
|
|
24
|
+
|
|
25
|
+
def self.input_config
|
|
26
|
+
@input_config ||= InputConfig.new
|
|
27
|
+
yield @input_config if block_given?
|
|
28
|
+
@input_config
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.reset_input_config!
|
|
32
|
+
@input_config = nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.cleaner
|
|
36
|
+
@cleaner ||= Cleaner.new
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.to_coradoc(html, options = {})
|
|
40
|
+
HtmlConverter.to_core_model(html, options)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
19
44
|
|
|
20
45
|
module Coradoc
|
|
21
46
|
module Html
|
|
@@ -77,7 +102,7 @@ module Coradoc
|
|
|
77
102
|
|
|
78
103
|
# Parse HTML content and return CoreModel elements (may be an Array)
|
|
79
104
|
def self.parse(html, options = {})
|
|
80
|
-
|
|
105
|
+
HtmlConverter.to_core_model(html, options)
|
|
81
106
|
end
|
|
82
107
|
|
|
83
108
|
# Parse HTML content directly into a CoreModel document
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: coradoc-html
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.19
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -118,8 +118,46 @@ files:
|
|
|
118
118
|
- LICENSE.txt
|
|
119
119
|
- lib/coradoc/html.rb
|
|
120
120
|
- lib/coradoc/html/asset_resolver.rb
|
|
121
|
+
- lib/coradoc/html/cleaner.rb
|
|
121
122
|
- lib/coradoc/html/config.rb
|
|
122
123
|
- lib/coradoc/html/converter_base.rb
|
|
124
|
+
- lib/coradoc/html/converters.rb
|
|
125
|
+
- lib/coradoc/html/converters/a.rb
|
|
126
|
+
- lib/coradoc/html/converters/aside.rb
|
|
127
|
+
- lib/coradoc/html/converters/audio.rb
|
|
128
|
+
- lib/coradoc/html/converters/base.rb
|
|
129
|
+
- lib/coradoc/html/converters/blockquote.rb
|
|
130
|
+
- lib/coradoc/html/converters/br.rb
|
|
131
|
+
- lib/coradoc/html/converters/bypass.rb
|
|
132
|
+
- lib/coradoc/html/converters/code.rb
|
|
133
|
+
- lib/coradoc/html/converters/div.rb
|
|
134
|
+
- lib/coradoc/html/converters/dl.rb
|
|
135
|
+
- lib/coradoc/html/converters/drop.rb
|
|
136
|
+
- lib/coradoc/html/converters/em.rb
|
|
137
|
+
- lib/coradoc/html/converters/figure.rb
|
|
138
|
+
- lib/coradoc/html/converters/h.rb
|
|
139
|
+
- lib/coradoc/html/converters/head.rb
|
|
140
|
+
- lib/coradoc/html/converters/hr.rb
|
|
141
|
+
- lib/coradoc/html/converters/img.rb
|
|
142
|
+
- lib/coradoc/html/converters/li.rb
|
|
143
|
+
- lib/coradoc/html/converters/mark.rb
|
|
144
|
+
- lib/coradoc/html/converters/markup.rb
|
|
145
|
+
- lib/coradoc/html/converters/math.rb
|
|
146
|
+
- lib/coradoc/html/converters/media_base.rb
|
|
147
|
+
- lib/coradoc/html/converters/ol.rb
|
|
148
|
+
- lib/coradoc/html/converters/p.rb
|
|
149
|
+
- lib/coradoc/html/converters/pass_through.rb
|
|
150
|
+
- lib/coradoc/html/converters/positional_formatting.rb
|
|
151
|
+
- lib/coradoc/html/converters/pre.rb
|
|
152
|
+
- lib/coradoc/html/converters/q.rb
|
|
153
|
+
- lib/coradoc/html/converters/strong.rb
|
|
154
|
+
- lib/coradoc/html/converters/sub.rb
|
|
155
|
+
- lib/coradoc/html/converters/sup.rb
|
|
156
|
+
- lib/coradoc/html/converters/table.rb
|
|
157
|
+
- lib/coradoc/html/converters/td.rb
|
|
158
|
+
- lib/coradoc/html/converters/text.rb
|
|
159
|
+
- lib/coradoc/html/converters/tr.rb
|
|
160
|
+
- lib/coradoc/html/converters/video.rb
|
|
123
161
|
- lib/coradoc/html/drop.rb
|
|
124
162
|
- lib/coradoc/html/drop/annotation_drop.rb
|
|
125
163
|
- lib/coradoc/html/drop/base.rb
|
|
@@ -135,6 +173,7 @@ files:
|
|
|
135
173
|
- lib/coradoc/html/drop/inline_element_drop.rb
|
|
136
174
|
- lib/coradoc/html/drop/list_block_drop.rb
|
|
137
175
|
- lib/coradoc/html/drop/list_item_drop.rb
|
|
176
|
+
- lib/coradoc/html/drop/raw_inline_element_drop.rb
|
|
138
177
|
- lib/coradoc/html/drop/table_cell_drop.rb
|
|
139
178
|
- lib/coradoc/html/drop/table_drop.rb
|
|
140
179
|
- lib/coradoc/html/drop/table_row_drop.rb
|
|
@@ -142,55 +181,15 @@ files:
|
|
|
142
181
|
- lib/coradoc/html/drop/text_content_drop.rb
|
|
143
182
|
- lib/coradoc/html/drop/toc_drop.rb
|
|
144
183
|
- lib/coradoc/html/drop/toc_entry_drop.rb
|
|
184
|
+
- lib/coradoc/html/errors.rb
|
|
145
185
|
- lib/coradoc/html/escape.rb
|
|
146
186
|
- lib/coradoc/html/frontmatter_meta.rb
|
|
147
|
-
- lib/coradoc/html/
|
|
148
|
-
- lib/coradoc/html/
|
|
149
|
-
- lib/coradoc/html/input/config.rb
|
|
150
|
-
- lib/coradoc/html/input/converters.rb
|
|
151
|
-
- lib/coradoc/html/input/converters/a.rb
|
|
152
|
-
- lib/coradoc/html/input/converters/aside.rb
|
|
153
|
-
- lib/coradoc/html/input/converters/audio.rb
|
|
154
|
-
- lib/coradoc/html/input/converters/base.rb
|
|
155
|
-
- lib/coradoc/html/input/converters/blockquote.rb
|
|
156
|
-
- lib/coradoc/html/input/converters/br.rb
|
|
157
|
-
- lib/coradoc/html/input/converters/bypass.rb
|
|
158
|
-
- lib/coradoc/html/input/converters/code.rb
|
|
159
|
-
- lib/coradoc/html/input/converters/div.rb
|
|
160
|
-
- lib/coradoc/html/input/converters/dl.rb
|
|
161
|
-
- lib/coradoc/html/input/converters/drop.rb
|
|
162
|
-
- lib/coradoc/html/input/converters/em.rb
|
|
163
|
-
- lib/coradoc/html/input/converters/figure.rb
|
|
164
|
-
- lib/coradoc/html/input/converters/h.rb
|
|
165
|
-
- lib/coradoc/html/input/converters/head.rb
|
|
166
|
-
- lib/coradoc/html/input/converters/hr.rb
|
|
167
|
-
- lib/coradoc/html/input/converters/img.rb
|
|
168
|
-
- lib/coradoc/html/input/converters/li.rb
|
|
169
|
-
- lib/coradoc/html/input/converters/mark.rb
|
|
170
|
-
- lib/coradoc/html/input/converters/markup.rb
|
|
171
|
-
- lib/coradoc/html/input/converters/math.rb
|
|
172
|
-
- lib/coradoc/html/input/converters/media_base.rb
|
|
173
|
-
- lib/coradoc/html/input/converters/ol.rb
|
|
174
|
-
- lib/coradoc/html/input/converters/p.rb
|
|
175
|
-
- lib/coradoc/html/input/converters/pass_through.rb
|
|
176
|
-
- lib/coradoc/html/input/converters/positional_formatting.rb
|
|
177
|
-
- lib/coradoc/html/input/converters/pre.rb
|
|
178
|
-
- lib/coradoc/html/input/converters/q.rb
|
|
179
|
-
- lib/coradoc/html/input/converters/strong.rb
|
|
180
|
-
- lib/coradoc/html/input/converters/sub.rb
|
|
181
|
-
- lib/coradoc/html/input/converters/sup.rb
|
|
182
|
-
- lib/coradoc/html/input/converters/table.rb
|
|
183
|
-
- lib/coradoc/html/input/converters/td.rb
|
|
184
|
-
- lib/coradoc/html/input/converters/text.rb
|
|
185
|
-
- lib/coradoc/html/input/converters/tr.rb
|
|
186
|
-
- lib/coradoc/html/input/converters/video.rb
|
|
187
|
-
- lib/coradoc/html/input/errors.rb
|
|
188
|
-
- lib/coradoc/html/input/html_converter.rb
|
|
189
|
-
- lib/coradoc/html/input/plugin.rb
|
|
190
|
-
- lib/coradoc/html/input/plugins/plateau.rb
|
|
191
|
-
- lib/coradoc/html/input/postprocessor.rb
|
|
187
|
+
- lib/coradoc/html/html_converter.rb
|
|
188
|
+
- lib/coradoc/html/input_config.rb
|
|
192
189
|
- lib/coradoc/html/layout_renderer.rb
|
|
193
|
-
- lib/coradoc/html/
|
|
190
|
+
- lib/coradoc/html/plugin.rb
|
|
191
|
+
- lib/coradoc/html/plugins/plateau.rb
|
|
192
|
+
- lib/coradoc/html/postprocessor.rb
|
|
194
193
|
- lib/coradoc/html/render_options.rb
|
|
195
194
|
- lib/coradoc/html/renderer.rb
|
|
196
195
|
- lib/coradoc/html/section_numberable.rb
|