coradoc-html 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/html/cleaner.rb +128 -0
- data/lib/coradoc/html/converters/a.rb +77 -0
- data/lib/coradoc/html/converters/aside.rb +20 -0
- data/lib/coradoc/html/converters/audio.rb +19 -0
- data/lib/coradoc/html/converters/base.rb +98 -0
- data/lib/coradoc/html/converters/blockquote.rb +25 -0
- data/lib/coradoc/html/converters/br.rb +17 -0
- data/lib/coradoc/html/converters/bypass.rb +82 -0
- data/lib/coradoc/html/converters/code.rb +25 -0
- data/lib/coradoc/html/converters/div.rb +23 -0
- data/lib/coradoc/html/converters/dl.rb +82 -0
- data/lib/coradoc/html/converters/drop.rb +26 -0
- data/lib/coradoc/html/converters/em.rb +23 -0
- data/lib/coradoc/html/converters/figure.rb +33 -0
- data/lib/coradoc/html/converters/h.rb +58 -0
- data/lib/coradoc/html/converters/head.rb +29 -0
- data/lib/coradoc/html/converters/hr.rb +17 -0
- data/lib/coradoc/html/converters/img.rb +103 -0
- data/lib/coradoc/html/converters/li.rb +35 -0
- data/lib/coradoc/html/converters/mark.rb +21 -0
- data/lib/coradoc/html/converters/markup.rb +93 -0
- data/lib/coradoc/html/converters/math.rb +37 -0
- data/lib/coradoc/html/converters/media_base.rb +48 -0
- data/lib/coradoc/html/converters/ol.rb +42 -0
- data/lib/coradoc/html/converters/p.rb +64 -0
- data/lib/coradoc/html/converters/pass_through.rb +15 -0
- data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
- data/lib/coradoc/html/converters/pre.rb +57 -0
- data/lib/coradoc/html/converters/q.rb +25 -0
- data/lib/coradoc/html/converters/strong.rb +22 -0
- data/lib/coradoc/html/converters/sub.rb +20 -0
- data/lib/coradoc/html/converters/sup.rb +20 -0
- data/lib/coradoc/html/converters/table.rb +64 -0
- data/lib/coradoc/html/converters/td.rb +42 -0
- data/lib/coradoc/html/converters/text.rb +66 -0
- data/lib/coradoc/html/converters/tr.rb +27 -0
- data/lib/coradoc/html/converters/video.rb +27 -0
- data/lib/coradoc/html/converters.rb +104 -0
- data/lib/coradoc/html/drop/drop_factory.rb +14 -22
- data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
- data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
- data/lib/coradoc/html/drop.rb +30 -8
- data/lib/coradoc/html/errors.rb +11 -0
- data/lib/coradoc/html/html_converter.rb +78 -0
- data/lib/coradoc/html/input_config.rb +66 -0
- data/lib/coradoc/html/plugin.rb +90 -0
- data/lib/coradoc/html/plugins/plateau.rb +212 -0
- data/lib/coradoc/html/postprocessor.rb +19 -0
- data/lib/coradoc/html/spa.rb +0 -2
- data/lib/coradoc/html/static.rb +0 -2
- data/lib/coradoc/html/tag_mapping.rb +3 -1
- data/lib/coradoc/html/transform/from_core_model.rb +2 -2
- data/lib/coradoc/html/transform/to_core_model.rb +3 -3
- data/lib/coradoc/html/version.rb +1 -1
- data/lib/coradoc/html.rb +30 -5
- metadata +46 -47
- data/lib/coradoc/html/input/cleaner.rb +0 -134
- data/lib/coradoc/html/input/config.rb +0 -80
- data/lib/coradoc/html/input/converters/a.rb +0 -79
- data/lib/coradoc/html/input/converters/aside.rb +0 -22
- data/lib/coradoc/html/input/converters/audio.rb +0 -21
- data/lib/coradoc/html/input/converters/base.rb +0 -118
- data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
- data/lib/coradoc/html/input/converters/br.rb +0 -19
- data/lib/coradoc/html/input/converters/bypass.rb +0 -84
- data/lib/coradoc/html/input/converters/code.rb +0 -27
- data/lib/coradoc/html/input/converters/div.rb +0 -25
- data/lib/coradoc/html/input/converters/dl.rb +0 -84
- data/lib/coradoc/html/input/converters/drop.rb +0 -28
- data/lib/coradoc/html/input/converters/em.rb +0 -25
- data/lib/coradoc/html/input/converters/figure.rb +0 -35
- data/lib/coradoc/html/input/converters/h.rb +0 -74
- data/lib/coradoc/html/input/converters/head.rb +0 -31
- data/lib/coradoc/html/input/converters/hr.rb +0 -19
- data/lib/coradoc/html/input/converters/img.rb +0 -105
- data/lib/coradoc/html/input/converters/li.rb +0 -37
- data/lib/coradoc/html/input/converters/mark.rb +0 -23
- data/lib/coradoc/html/input/converters/markup.rb +0 -103
- data/lib/coradoc/html/input/converters/math.rb +0 -39
- data/lib/coradoc/html/input/converters/media_base.rb +0 -50
- data/lib/coradoc/html/input/converters/ol.rb +0 -44
- data/lib/coradoc/html/input/converters/p.rb +0 -90
- data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
- data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
- data/lib/coradoc/html/input/converters/pre.rb +0 -59
- data/lib/coradoc/html/input/converters/q.rb +0 -27
- data/lib/coradoc/html/input/converters/strong.rb +0 -24
- data/lib/coradoc/html/input/converters/sub.rb +0 -22
- data/lib/coradoc/html/input/converters/sup.rb +0 -22
- data/lib/coradoc/html/input/converters/table.rb +0 -66
- data/lib/coradoc/html/input/converters/td.rb +0 -44
- data/lib/coradoc/html/input/converters/text.rb +0 -68
- data/lib/coradoc/html/input/converters/tr.rb +0 -29
- data/lib/coradoc/html/input/converters/video.rb +0 -29
- data/lib/coradoc/html/input/converters.rb +0 -107
- data/lib/coradoc/html/input/errors.rb +0 -22
- data/lib/coradoc/html/input/html_converter.rb +0 -98
- data/lib/coradoc/html/input/plugin.rb +0 -120
- data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
- data/lib/coradoc/html/input/postprocessor.rb +0 -25
- data/lib/coradoc/html/input.rb +0 -86
- data/lib/coradoc/html/output.rb +0 -89
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Q < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
content = treat_children_coradoc(node, state)
|
|
11
|
+
cite = node['cite']
|
|
12
|
+
|
|
13
|
+
Coradoc::CoreModel::InlineElement.new(
|
|
14
|
+
format_type: 'quotation',
|
|
15
|
+
nested_elements: content,
|
|
16
|
+
content: extract_text_from_content(content),
|
|
17
|
+
target: cite
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
register :q, Q::INSTANCE
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Strong < Markup
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def coradoc_format_type
|
|
10
|
+
'bold'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def markup_ancestor_tag_names
|
|
14
|
+
%w[strong b]
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
register :strong, Strong::INSTANCE
|
|
19
|
+
register :b, Strong::INSTANCE
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Sub < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
include PositionalFormatting
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def element_class
|
|
13
|
+
Coradoc::CoreModel::SubscriptElement
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
register :sub, Sub::INSTANCE
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Sup < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
include PositionalFormatting
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def element_class
|
|
13
|
+
Coradoc::CoreModel::SuperscriptElement
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
register :sup, Sup::INSTANCE
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Table < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
id = node['id']
|
|
11
|
+
title = extract_title(node)
|
|
12
|
+
content = treat_children_coradoc(node, state)
|
|
13
|
+
|
|
14
|
+
# Apply frame and grid attributes if available
|
|
15
|
+
frame_attr = frame(node)
|
|
16
|
+
grid_attr = rules(node)
|
|
17
|
+
|
|
18
|
+
Coradoc::CoreModel::Table.new(
|
|
19
|
+
title: title,
|
|
20
|
+
rows: content,
|
|
21
|
+
id: id,
|
|
22
|
+
frame: frame_attr,
|
|
23
|
+
grid: grid_attr
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def extract_title(node)
|
|
28
|
+
title = node.at('./caption')
|
|
29
|
+
return nil if title.nil?
|
|
30
|
+
|
|
31
|
+
title.text.strip
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def frame(node)
|
|
35
|
+
case node['frame']
|
|
36
|
+
when 'void'
|
|
37
|
+
'none'
|
|
38
|
+
when 'hsides'
|
|
39
|
+
'topbot'
|
|
40
|
+
when 'vsides'
|
|
41
|
+
'sides'
|
|
42
|
+
when 'box', 'border'
|
|
43
|
+
'all'
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def rules(node)
|
|
48
|
+
case node['rules']
|
|
49
|
+
when 'all'
|
|
50
|
+
'all'
|
|
51
|
+
when 'rows'
|
|
52
|
+
'rows'
|
|
53
|
+
when 'cols'
|
|
54
|
+
'cols'
|
|
55
|
+
when 'none'
|
|
56
|
+
'none'
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
register :table, Table::INSTANCE
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Td < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
colspan = node['colspan']&.to_i
|
|
11
|
+
rowspan = node['rowspan']&.to_i
|
|
12
|
+
alignment = extract_alignment(node)
|
|
13
|
+
|
|
14
|
+
singlepara = node.elements.size == 1 && node.elements.first.name == 'p'
|
|
15
|
+
state[:tdsinglepara] = singlepara if singlepara
|
|
16
|
+
|
|
17
|
+
content = treat_children_coradoc(node, state)
|
|
18
|
+
|
|
19
|
+
Coradoc::CoreModel::TableCell.new(
|
|
20
|
+
content: extract_text_from_content(content),
|
|
21
|
+
alignment: alignment,
|
|
22
|
+
colspan: colspan && colspan > 1 ? colspan : nil,
|
|
23
|
+
rowspan: rowspan && rowspan > 1 ? rowspan : nil,
|
|
24
|
+
header: node.name == 'th'
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def extract_alignment(node)
|
|
29
|
+
align = node['align']
|
|
30
|
+
case align
|
|
31
|
+
when 'left' then 'left'
|
|
32
|
+
when 'center' then 'center'
|
|
33
|
+
when 'right' then 'right'
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
register :td, Td::INSTANCE
|
|
39
|
+
register :th, Td::INSTANCE
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Text < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
return treat_empty(node, state) if node.text.strip.empty?
|
|
11
|
+
|
|
12
|
+
# HTML cleanup is performed in the converter layer
|
|
13
|
+
cleaned_content = cleanup_html_text(node.text)
|
|
14
|
+
|
|
15
|
+
# Return as CoreModel::InlineElement with format_type "text"
|
|
16
|
+
Coradoc::CoreModel::TextElement.new(
|
|
17
|
+
content: cleaned_content
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def treat_empty(node, state)
|
|
24
|
+
parent = node.parent.name.to_sym
|
|
25
|
+
if %i[ol ul].include?(parent) # Otherwise the indentation is broken
|
|
26
|
+
nil
|
|
27
|
+
elsif state[:tdsinglepara]
|
|
28
|
+
nil
|
|
29
|
+
elsif node.text == ' ' # Regular whitespace text node
|
|
30
|
+
' '
|
|
31
|
+
else
|
|
32
|
+
nil
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# HTML-to-CoreModel text cleanup
|
|
37
|
+
def cleanup_html_text(text)
|
|
38
|
+
text = preserve_nbsp(text)
|
|
39
|
+
text = remove_border_newlines(text)
|
|
40
|
+
text = remove_inner_newlines(text)
|
|
41
|
+
escape_links(text)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def preserve_nbsp(text)
|
|
45
|
+
text.gsub("\u00A0", ' ')
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def escape_links(text)
|
|
49
|
+
text.gsub(/<<([^ ][^>]*)>>/, '\\<<\\1>>')
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def remove_border_newlines(text)
|
|
53
|
+
text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def remove_inner_newlines(text)
|
|
57
|
+
# Convert newlines/tabs to spaces and squeeze multiple spaces
|
|
58
|
+
# Preserve single leading/trailing space for inline contexts
|
|
59
|
+
text.tr("\n\t", ' ').squeeze(' ')
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
register :text, Text::INSTANCE
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Tr < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
content = treat_children_coradoc(node, state)
|
|
11
|
+
header = table_header_row?(node)
|
|
12
|
+
# Use CoreModel::TableRow with cells (not columns)
|
|
13
|
+
Coradoc::CoreModel::TableRow.new(
|
|
14
|
+
cells: content,
|
|
15
|
+
header: header
|
|
16
|
+
)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def table_header_row?(node)
|
|
20
|
+
node.previous_element.nil?
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
register :tr, Tr::INSTANCE
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Video < MediaBase
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
def semantic_type
|
|
12
|
+
:video
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def build_attributes(node)
|
|
16
|
+
base_attributes(node).merge(
|
|
17
|
+
poster: node['poster'],
|
|
18
|
+
width: node['width'],
|
|
19
|
+
height: node['height']
|
|
20
|
+
).compact
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
register :video, Video::INSTANCE
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
CONVERTERS = {
|
|
7
|
+
Base: 'coradoc/html/converters/base',
|
|
8
|
+
Markup: 'coradoc/html/converters/markup',
|
|
9
|
+
A: 'coradoc/html/converters/a',
|
|
10
|
+
Aside: 'coradoc/html/converters/aside',
|
|
11
|
+
Audio: 'coradoc/html/converters/audio',
|
|
12
|
+
Blockquote: 'coradoc/html/converters/blockquote',
|
|
13
|
+
Br: 'coradoc/html/converters/br',
|
|
14
|
+
Bypass: 'coradoc/html/converters/bypass',
|
|
15
|
+
Code: 'coradoc/html/converters/code',
|
|
16
|
+
Div: 'coradoc/html/converters/div',
|
|
17
|
+
Dl: 'coradoc/html/converters/dl',
|
|
18
|
+
Skip: 'coradoc/html/converters/drop',
|
|
19
|
+
Em: 'coradoc/html/converters/em',
|
|
20
|
+
Figure: 'coradoc/html/converters/figure',
|
|
21
|
+
H: 'coradoc/html/converters/h',
|
|
22
|
+
Head: 'coradoc/html/converters/head',
|
|
23
|
+
Hr: 'coradoc/html/converters/hr',
|
|
24
|
+
Img: 'coradoc/html/converters/img',
|
|
25
|
+
Li: 'coradoc/html/converters/li',
|
|
26
|
+
Mark: 'coradoc/html/converters/mark',
|
|
27
|
+
Math: 'coradoc/html/converters/math',
|
|
28
|
+
MediaBase: 'coradoc/html/converters/media_base',
|
|
29
|
+
Ol: 'coradoc/html/converters/ol',
|
|
30
|
+
P: 'coradoc/html/converters/p',
|
|
31
|
+
PassThrough: 'coradoc/html/converters/pass_through',
|
|
32
|
+
PositionalFormatting: 'coradoc/html/converters/positional_formatting',
|
|
33
|
+
Pre: 'coradoc/html/converters/pre',
|
|
34
|
+
Q: 'coradoc/html/converters/q',
|
|
35
|
+
Strong: 'coradoc/html/converters/strong',
|
|
36
|
+
Sup: 'coradoc/html/converters/sup',
|
|
37
|
+
Sub: 'coradoc/html/converters/sub',
|
|
38
|
+
Table: 'coradoc/html/converters/table',
|
|
39
|
+
Td: 'coradoc/html/converters/td',
|
|
40
|
+
Text: 'coradoc/html/converters/text',
|
|
41
|
+
Tr: 'coradoc/html/converters/tr',
|
|
42
|
+
Video: 'coradoc/html/converters/video'
|
|
43
|
+
}.freeze
|
|
44
|
+
private_constant :CONVERTERS
|
|
45
|
+
|
|
46
|
+
CONVERTERS.each do |name, path|
|
|
47
|
+
autoload name, path
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
@converters = {}
|
|
51
|
+
@converters_loaded = false
|
|
52
|
+
|
|
53
|
+
def self.register(tag_name, converter)
|
|
54
|
+
@converters[tag_name.to_sym] = converter
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.unregister(tag_name)
|
|
58
|
+
@converters.delete(tag_name.to_sym)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def self.ensure_converters_loaded
|
|
62
|
+
return if @converters_loaded
|
|
63
|
+
|
|
64
|
+
@converters_loaded = true
|
|
65
|
+
CONVERTERS.each_key { |name| const_get(name) }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.lookup(tag_name)
|
|
69
|
+
ensure_converters_loaded
|
|
70
|
+
@converters[tag_name.to_sym] || default_converter(tag_name)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def self.process_coradoc(node, state)
|
|
74
|
+
node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
|
|
75
|
+
return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
|
|
76
|
+
|
|
77
|
+
plugins = state[:plugin_instances] || {}
|
|
78
|
+
process = proc { lookup(node.name).to_coradoc(node, state) }
|
|
79
|
+
plugins.each do |i|
|
|
80
|
+
prev_process = process
|
|
81
|
+
process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
|
|
82
|
+
end
|
|
83
|
+
process.call(node, state)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def self.default_converter(tag_name)
|
|
87
|
+
case Html.input_config.unknown_tags.to_sym
|
|
88
|
+
when :pass_through
|
|
89
|
+
PassThrough::INSTANCE
|
|
90
|
+
when :drop
|
|
91
|
+
Skip::INSTANCE
|
|
92
|
+
when :bypass
|
|
93
|
+
Bypass::INSTANCE
|
|
94
|
+
when :raise
|
|
95
|
+
raise Errors::UnknownTagError, "unknown tag: #{tag_name}"
|
|
96
|
+
else
|
|
97
|
+
raise Errors::InvalidConfigurationError,
|
|
98
|
+
"unknown value #{Html.input_config.unknown_tags.inspect} " \
|
|
99
|
+
'for Coradoc::Html.input_config.unknown_tags'
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -33,6 +33,20 @@ module Coradoc
|
|
|
33
33
|
drop&.new(model)&.template_type
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
# Walk the Drop namespace and trigger each declared autoload so the
|
|
37
|
+
# drop class body evaluates and self-registers. Called eagerly from
|
|
38
|
+
# drop.rb after autoloads are declared.
|
|
39
|
+
EAGER_LOAD_ORDER = %i[Base DropFactory AnnotationDrop BlockDrop ListBlockDrop ListItemDrop
|
|
40
|
+
TableDrop TableRowDrop TableCellDrop ImageDrop InlineElementDrop RawInlineElementDrop
|
|
41
|
+
BibliographyEntryDrop BibliographyDrop TocEntryDrop TocDrop DefinitionItemDrop
|
|
42
|
+
DefinitionListDrop TermDrop FootnoteDrop TextContentDrop DocumentDrop].freeze
|
|
43
|
+
private_constant :EAGER_LOAD_ORDER
|
|
44
|
+
|
|
45
|
+
def self.eager_load!
|
|
46
|
+
EAGER_LOAD_ORDER.each { |sym| Drop.const_get(sym) }
|
|
47
|
+
true
|
|
48
|
+
end
|
|
49
|
+
|
|
36
50
|
class << self
|
|
37
51
|
private
|
|
38
52
|
|
|
@@ -48,25 +62,3 @@ module Coradoc
|
|
|
48
62
|
end
|
|
49
63
|
end
|
|
50
64
|
end
|
|
51
|
-
|
|
52
|
-
# Load all drops — each self-registers with DropFactory.
|
|
53
|
-
# Registration order doesn't matter (sorted by ancestor depth).
|
|
54
|
-
require_relative 'annotation_drop'
|
|
55
|
-
require_relative 'block_drop'
|
|
56
|
-
require_relative 'list_block_drop'
|
|
57
|
-
require_relative 'list_item_drop'
|
|
58
|
-
require_relative 'table_drop'
|
|
59
|
-
require_relative 'table_row_drop'
|
|
60
|
-
require_relative 'table_cell_drop'
|
|
61
|
-
require_relative 'image_drop'
|
|
62
|
-
require_relative 'inline_element_drop'
|
|
63
|
-
require_relative 'bibliography_entry_drop'
|
|
64
|
-
require_relative 'bibliography_drop'
|
|
65
|
-
require_relative 'toc_entry_drop'
|
|
66
|
-
require_relative 'toc_drop'
|
|
67
|
-
require_relative 'definition_item_drop'
|
|
68
|
-
require_relative 'definition_list_drop'
|
|
69
|
-
require_relative 'term_drop'
|
|
70
|
-
require_relative 'footnote_drop'
|
|
71
|
-
require_relative 'text_content_drop'
|
|
72
|
-
require_relative 'document_drop'
|
|
@@ -27,11 +27,9 @@ module Coradoc
|
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
def css_class
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
when 'span' then @model.metadata('class')
|
|
34
|
-
end
|
|
30
|
+
return @model.metadata('class') if format_type == 'span'
|
|
31
|
+
|
|
32
|
+
TagMapping.css_class_for(format_type)
|
|
35
33
|
end
|
|
36
34
|
|
|
37
35
|
def term_ref
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Drop
|
|
6
|
+
# Drop for CoreModel::RawInlineElement.
|
|
7
|
+
#
|
|
8
|
+
# Passthrough content is raw output-format markup (typically HTML)
|
|
9
|
+
# that the source author explicitly marked as "emit verbatim." The
|
|
10
|
+
# generic InlineElementDrop escapes content; this subclass skips
|
|
11
|
+
# escaping so the rendered output mirrors the author's intent.
|
|
12
|
+
#
|
|
13
|
+
# The Liquid template is shared with InlineElementDrop — only the
|
|
14
|
+
# data preparation differs. InlineElementDrop is autoloaded by the
|
|
15
|
+
# Drop namespace shell (drop.rb) and is guaranteed to load before
|
|
16
|
+
# this class via DropFactory.eager_load! ordering.
|
|
17
|
+
class RawInlineElementDrop < InlineElementDrop
|
|
18
|
+
def text
|
|
19
|
+
extract_text(@model.content).to_s
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def template_type
|
|
23
|
+
'inline_element'
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
DropFactory.register(CoreModel::RawInlineElement, RawInlineElementDrop)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
data/lib/coradoc/html/drop.rb
CHANGED
|
@@ -1,18 +1,40 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# Drop namespace —
|
|
3
|
+
# Drop namespace — Liquid drop layer for template rendering.
|
|
4
4
|
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
5
|
+
# Each drop class is autoloaded from its own file (one class per file,
|
|
6
|
+
# mirroring the mirror/ReverseBuilder pattern). Eager loading is delegated
|
|
7
|
+
# to DropFactory.eager_load!, which triggers each autoload in dependency
|
|
8
|
+
# order so drops self-register with DropFactory at load time.
|
|
8
9
|
module Coradoc
|
|
9
10
|
module Html
|
|
10
11
|
module Drop
|
|
12
|
+
autoload :Base, "#{__dir__}/drop/base"
|
|
13
|
+
autoload :DropFactory, "#{__dir__}/drop/drop_factory"
|
|
14
|
+
autoload :AnnotationDrop, "#{__dir__}/drop/annotation_drop"
|
|
15
|
+
autoload :BlockDrop, "#{__dir__}/drop/block_drop"
|
|
16
|
+
autoload :ListBlockDrop, "#{__dir__}/drop/list_block_drop"
|
|
17
|
+
autoload :ListItemDrop, "#{__dir__}/drop/list_item_drop"
|
|
18
|
+
autoload :TableDrop, "#{__dir__}/drop/table_drop"
|
|
19
|
+
autoload :TableRowDrop, "#{__dir__}/drop/table_row_drop"
|
|
20
|
+
autoload :TableCellDrop, "#{__dir__}/drop/table_cell_drop"
|
|
21
|
+
autoload :ImageDrop, "#{__dir__}/drop/image_drop"
|
|
22
|
+
# InlineElementDrop must load before RawInlineElementDrop (subclass).
|
|
23
|
+
autoload :InlineElementDrop, "#{__dir__}/drop/inline_element_drop"
|
|
24
|
+
autoload :RawInlineElementDrop, "#{__dir__}/drop/raw_inline_element_drop"
|
|
25
|
+
autoload :BibliographyEntryDrop, "#{__dir__}/drop/bibliography_entry_drop"
|
|
26
|
+
autoload :BibliographyDrop, "#{__dir__}/drop/bibliography_drop"
|
|
27
|
+
autoload :TocEntryDrop, "#{__dir__}/drop/toc_entry_drop"
|
|
28
|
+
autoload :TocDrop, "#{__dir__}/drop/toc_drop"
|
|
29
|
+
autoload :DefinitionItemDrop, "#{__dir__}/drop/definition_item_drop"
|
|
30
|
+
autoload :DefinitionListDrop, "#{__dir__}/drop/definition_list_drop"
|
|
31
|
+
autoload :TermDrop, "#{__dir__}/drop/term_drop"
|
|
32
|
+
autoload :FootnoteDrop, "#{__dir__}/drop/footnote_drop"
|
|
33
|
+
autoload :TextContentDrop, "#{__dir__}/drop/text_content_drop"
|
|
34
|
+
autoload :DocumentDrop, "#{__dir__}/drop/document_drop"
|
|
11
35
|
end
|
|
12
36
|
end
|
|
13
37
|
end
|
|
14
38
|
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
# DropFactory loads next
|
|
18
|
-
require_relative 'drop/drop_factory'
|
|
39
|
+
# Trigger eager load so every drop class body evaluates and self-registers.
|
|
40
|
+
Coradoc::Html::Drop::DropFactory.eager_load!
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
class HtmlConverter
|
|
6
|
+
def self.to_core_model(input, options = {})
|
|
7
|
+
Html.input_config.with(options) do
|
|
8
|
+
plugin_instances = prepare_plugin_instances(options)
|
|
9
|
+
|
|
10
|
+
root = track_time 'Loading input HTML document' do
|
|
11
|
+
case input
|
|
12
|
+
when String
|
|
13
|
+
Nokogiri::HTML(input).root
|
|
14
|
+
when Nokogiri::XML::Document
|
|
15
|
+
input.root
|
|
16
|
+
when Nokogiri::XML::Node
|
|
17
|
+
input
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
return nil unless root
|
|
22
|
+
|
|
23
|
+
plugin_instances.each do |plugin|
|
|
24
|
+
plugin.html_tree = root
|
|
25
|
+
track_time "Preprocessing document with #{plugin.name} plugin" do
|
|
26
|
+
plugin.preprocess_html_tree
|
|
27
|
+
end
|
|
28
|
+
root = plugin.html_tree
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
coremodel = track_time 'Converting input document tree to CoreModel' do
|
|
32
|
+
Converters.process_coradoc(
|
|
33
|
+
root,
|
|
34
|
+
plugin_instances: plugin_instances
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
coremodel = track_time 'Post-process CoreModel tree' do
|
|
39
|
+
Postprocessor.process(coremodel)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
plugin_instances.each do |plugin|
|
|
43
|
+
plugin.coremodel_tree = coremodel
|
|
44
|
+
track_time "Postprocessing CoreModel tree with #{plugin.name} plugin" do
|
|
45
|
+
plugin.postprocess_coremodel_tree
|
|
46
|
+
end
|
|
47
|
+
coremodel = plugin.coremodel_tree
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
options[:plugin_instances] = plugin_instances unless options.frozen?
|
|
51
|
+
|
|
52
|
+
coremodel
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.prepare_plugin_instances(options)
|
|
57
|
+
options[:plugin_instances] || Html.input_config.plugins.map(&:new)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
@track_time_indentation = 0
|
|
61
|
+
def self.track_time(task)
|
|
62
|
+
if Html.input_config.track_time
|
|
63
|
+
warn (' ' * @track_time_indentation) + "* #{task} is starting..."
|
|
64
|
+
@track_time_indentation += 1
|
|
65
|
+
t0 = Time.now
|
|
66
|
+
ret = yield
|
|
67
|
+
time_elapsed = Time.now - t0
|
|
68
|
+
@track_time_indentation -= 1
|
|
69
|
+
warn (' ' * @track_time_indentation) +
|
|
70
|
+
"* #{task} took #{time_elapsed.round(3)} seconds"
|
|
71
|
+
ret
|
|
72
|
+
else
|
|
73
|
+
yield
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|