coradoc-html 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/html/cleaner.rb +128 -0
- data/lib/coradoc/html/converters/a.rb +77 -0
- data/lib/coradoc/html/converters/aside.rb +20 -0
- data/lib/coradoc/html/converters/audio.rb +19 -0
- data/lib/coradoc/html/converters/base.rb +98 -0
- data/lib/coradoc/html/converters/blockquote.rb +25 -0
- data/lib/coradoc/html/converters/br.rb +17 -0
- data/lib/coradoc/html/converters/bypass.rb +82 -0
- data/lib/coradoc/html/converters/code.rb +25 -0
- data/lib/coradoc/html/converters/div.rb +23 -0
- data/lib/coradoc/html/converters/dl.rb +82 -0
- data/lib/coradoc/html/converters/drop.rb +26 -0
- data/lib/coradoc/html/converters/em.rb +23 -0
- data/lib/coradoc/html/converters/figure.rb +33 -0
- data/lib/coradoc/html/converters/h.rb +58 -0
- data/lib/coradoc/html/converters/head.rb +29 -0
- data/lib/coradoc/html/converters/hr.rb +17 -0
- data/lib/coradoc/html/converters/img.rb +103 -0
- data/lib/coradoc/html/converters/li.rb +35 -0
- data/lib/coradoc/html/converters/mark.rb +21 -0
- data/lib/coradoc/html/converters/markup.rb +93 -0
- data/lib/coradoc/html/converters/math.rb +37 -0
- data/lib/coradoc/html/converters/media_base.rb +48 -0
- data/lib/coradoc/html/converters/ol.rb +42 -0
- data/lib/coradoc/html/converters/p.rb +64 -0
- data/lib/coradoc/html/converters/pass_through.rb +15 -0
- data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
- data/lib/coradoc/html/converters/pre.rb +57 -0
- data/lib/coradoc/html/converters/q.rb +25 -0
- data/lib/coradoc/html/converters/strong.rb +22 -0
- data/lib/coradoc/html/converters/sub.rb +20 -0
- data/lib/coradoc/html/converters/sup.rb +20 -0
- data/lib/coradoc/html/converters/table.rb +64 -0
- data/lib/coradoc/html/converters/td.rb +42 -0
- data/lib/coradoc/html/converters/text.rb +66 -0
- data/lib/coradoc/html/converters/tr.rb +27 -0
- data/lib/coradoc/html/converters/video.rb +27 -0
- data/lib/coradoc/html/converters.rb +104 -0
- data/lib/coradoc/html/drop/drop_factory.rb +14 -22
- data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
- data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
- data/lib/coradoc/html/drop.rb +30 -8
- data/lib/coradoc/html/errors.rb +11 -0
- data/lib/coradoc/html/html_converter.rb +78 -0
- data/lib/coradoc/html/input_config.rb +66 -0
- data/lib/coradoc/html/plugin.rb +90 -0
- data/lib/coradoc/html/plugins/plateau.rb +212 -0
- data/lib/coradoc/html/postprocessor.rb +19 -0
- data/lib/coradoc/html/spa.rb +0 -2
- data/lib/coradoc/html/static.rb +0 -2
- data/lib/coradoc/html/tag_mapping.rb +3 -1
- data/lib/coradoc/html/transform/from_core_model.rb +2 -2
- data/lib/coradoc/html/transform/to_core_model.rb +3 -3
- data/lib/coradoc/html/version.rb +1 -1
- data/lib/coradoc/html.rb +30 -5
- metadata +46 -47
- data/lib/coradoc/html/input/cleaner.rb +0 -134
- data/lib/coradoc/html/input/config.rb +0 -80
- data/lib/coradoc/html/input/converters/a.rb +0 -79
- data/lib/coradoc/html/input/converters/aside.rb +0 -22
- data/lib/coradoc/html/input/converters/audio.rb +0 -21
- data/lib/coradoc/html/input/converters/base.rb +0 -118
- data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
- data/lib/coradoc/html/input/converters/br.rb +0 -19
- data/lib/coradoc/html/input/converters/bypass.rb +0 -84
- data/lib/coradoc/html/input/converters/code.rb +0 -27
- data/lib/coradoc/html/input/converters/div.rb +0 -25
- data/lib/coradoc/html/input/converters/dl.rb +0 -84
- data/lib/coradoc/html/input/converters/drop.rb +0 -28
- data/lib/coradoc/html/input/converters/em.rb +0 -25
- data/lib/coradoc/html/input/converters/figure.rb +0 -35
- data/lib/coradoc/html/input/converters/h.rb +0 -74
- data/lib/coradoc/html/input/converters/head.rb +0 -31
- data/lib/coradoc/html/input/converters/hr.rb +0 -19
- data/lib/coradoc/html/input/converters/img.rb +0 -105
- data/lib/coradoc/html/input/converters/li.rb +0 -37
- data/lib/coradoc/html/input/converters/mark.rb +0 -23
- data/lib/coradoc/html/input/converters/markup.rb +0 -103
- data/lib/coradoc/html/input/converters/math.rb +0 -39
- data/lib/coradoc/html/input/converters/media_base.rb +0 -50
- data/lib/coradoc/html/input/converters/ol.rb +0 -44
- data/lib/coradoc/html/input/converters/p.rb +0 -90
- data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
- data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
- data/lib/coradoc/html/input/converters/pre.rb +0 -59
- data/lib/coradoc/html/input/converters/q.rb +0 -27
- data/lib/coradoc/html/input/converters/strong.rb +0 -24
- data/lib/coradoc/html/input/converters/sub.rb +0 -22
- data/lib/coradoc/html/input/converters/sup.rb +0 -22
- data/lib/coradoc/html/input/converters/table.rb +0 -66
- data/lib/coradoc/html/input/converters/td.rb +0 -44
- data/lib/coradoc/html/input/converters/text.rb +0 -68
- data/lib/coradoc/html/input/converters/tr.rb +0 -29
- data/lib/coradoc/html/input/converters/video.rb +0 -29
- data/lib/coradoc/html/input/converters.rb +0 -107
- data/lib/coradoc/html/input/errors.rb +0 -22
- data/lib/coradoc/html/input/html_converter.rb +0 -98
- data/lib/coradoc/html/input/plugin.rb +0 -120
- data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
- data/lib/coradoc/html/input/postprocessor.rb +0 -25
- data/lib/coradoc/html/input.rb +0 -86
- data/lib/coradoc/html/output.rb +0 -89
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Ol < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
items = treat_children_coradoc(node, state)
|
|
13
|
-
|
|
14
|
-
marker_type = get_list_type(node, state)
|
|
15
|
-
|
|
16
|
-
Coradoc::CoreModel::ListBlock.new(
|
|
17
|
-
marker_type: marker_type,
|
|
18
|
-
items: items,
|
|
19
|
-
id: id,
|
|
20
|
-
start: node['start']&.to_i
|
|
21
|
-
)
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
def get_list_type(node, _state)
|
|
25
|
-
case node.name
|
|
26
|
-
when 'ol'
|
|
27
|
-
'ordered'
|
|
28
|
-
when 'ul'
|
|
29
|
-
'unordered'
|
|
30
|
-
when 'dir'
|
|
31
|
-
'unordered'
|
|
32
|
-
else
|
|
33
|
-
'unordered'
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
register :ol, Ol::INSTANCE
|
|
39
|
-
register :ul, Ol::INSTANCE
|
|
40
|
-
register :dir, Ol::INSTANCE
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
end
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class P < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
content = treat_children_coradoc(node, state)
|
|
13
|
-
|
|
14
|
-
content = strip_fullwidth_spaces(content)
|
|
15
|
-
|
|
16
|
-
Coradoc::CoreModel::ParagraphBlock.new(
|
|
17
|
-
children: content,
|
|
18
|
-
id: id
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
private
|
|
23
|
-
|
|
24
|
-
def strip_fullwidth_spaces(content)
|
|
25
|
-
return content unless content.is_a?(Array)
|
|
26
|
-
|
|
27
|
-
content.each do |item|
|
|
28
|
-
next unless item.is_a?(Coradoc::CoreModel::InlineElement)
|
|
29
|
-
next unless item.content.is_a?(String)
|
|
30
|
-
|
|
31
|
-
item.content = item.content.gsub(/\A +| +\z/, '')
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
strip_edge_whitespace(content)
|
|
35
|
-
reject_empty_elements(content)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
def strip_edge_whitespace(content)
|
|
39
|
-
first = content.find { |item| text_element?(item) }
|
|
40
|
-
strip_left(first) if first
|
|
41
|
-
|
|
42
|
-
last = content.reverse.find { |item| text_element?(item) }
|
|
43
|
-
strip_right(last) if last
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def strip_left(item)
|
|
47
|
-
case item
|
|
48
|
-
when Coradoc::CoreModel::InlineElement
|
|
49
|
-
item.content = item.content.lstrip if item.content.is_a?(String)
|
|
50
|
-
when String
|
|
51
|
-
item.replace(item.lstrip)
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def strip_right(item)
|
|
56
|
-
case item
|
|
57
|
-
when Coradoc::CoreModel::InlineElement
|
|
58
|
-
item.content = item.content.rstrip if item.content.is_a?(String)
|
|
59
|
-
when String
|
|
60
|
-
item.replace(item.rstrip)
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def text_element?(item)
|
|
65
|
-
item.is_a?(Coradoc::CoreModel::InlineElement) || item.is_a?(String)
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
def reject_empty_elements(content)
|
|
69
|
-
content.reject do |item|
|
|
70
|
-
if item.is_a?(Coradoc::CoreModel::InlineElement)
|
|
71
|
-
item.content.to_s.empty? && !has_nested_content?(item)
|
|
72
|
-
elsif item.is_a?(String)
|
|
73
|
-
item.empty?
|
|
74
|
-
else
|
|
75
|
-
false
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
def has_nested_content?(item)
|
|
81
|
-
item.is_a?(Coradoc::CoreModel::InlineElement) &&
|
|
82
|
-
item.nested_elements && !item.nested_elements.empty?
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
register :p, P::INSTANCE
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
end
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
# Shared logic for superscript/subscript converters.
|
|
8
|
-
#
|
|
9
|
-
# Subclasses must implement `element_class` returning the
|
|
10
|
-
# CoreModel class (e.g., SuperscriptElement, SubscriptElement).
|
|
11
|
-
module PositionalFormatting
|
|
12
|
-
def to_coradoc(node, state = {})
|
|
13
|
-
leading_whitespace, trailing_whitespace = extract_leading_trailing_whitespace(node)
|
|
14
|
-
|
|
15
|
-
content = treat_children_coradoc(node, state)
|
|
16
|
-
|
|
17
|
-
return nil if content_empty?(content)
|
|
18
|
-
|
|
19
|
-
e = element_class.new(content: content)
|
|
20
|
-
result = [leading_whitespace, e, trailing_whitespace].compact
|
|
21
|
-
result.length == 1 ? result.first : result
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
private
|
|
25
|
-
|
|
26
|
-
def content_empty?(content)
|
|
27
|
-
return true if content.nil?
|
|
28
|
-
return content.strip.empty? if content.is_a?(String)
|
|
29
|
-
return content.empty? if content.is_a?(Array)
|
|
30
|
-
|
|
31
|
-
false
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Pre < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, _state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
lang = language(node)
|
|
13
|
-
content = extract_text_content(node)
|
|
14
|
-
|
|
15
|
-
if lang
|
|
16
|
-
Coradoc::CoreModel::SourceBlock.new(
|
|
17
|
-
content: content,
|
|
18
|
-
id: id,
|
|
19
|
-
language: lang
|
|
20
|
-
)
|
|
21
|
-
else
|
|
22
|
-
Coradoc::CoreModel::LiteralBlock.new(
|
|
23
|
-
content: content,
|
|
24
|
-
id: id
|
|
25
|
-
)
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
private
|
|
30
|
-
|
|
31
|
-
def extract_text_content(node)
|
|
32
|
-
# Get text content from pre node
|
|
33
|
-
node.text
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def language(node)
|
|
37
|
-
lang = language_from_highlight_class(node)
|
|
38
|
-
lang || language_from_confluence_class(node)
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def language_from_highlight_class(node)
|
|
42
|
-
node.parent['class'].to_s[/highlight-([a-zA-Z0-9]+)/, 1]
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def language_from_confluence_class(node)
|
|
46
|
-
class_str = node['class'].to_s
|
|
47
|
-
return nil unless class_str.include?('brush:')
|
|
48
|
-
|
|
49
|
-
# Extract language from brush: language; pattern
|
|
50
|
-
match = class_str.match(/brush:\s*([^;]+);/)
|
|
51
|
-
match ? match[1].strip : nil
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
register :pre, Pre::INSTANCE
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
end
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Q < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
content = treat_children_coradoc(node, state)
|
|
12
|
-
cite = node['cite']
|
|
13
|
-
|
|
14
|
-
Coradoc::CoreModel::InlineElement.new(
|
|
15
|
-
format_type: 'quotation',
|
|
16
|
-
nested_elements: content,
|
|
17
|
-
content: extract_text_from_content(content),
|
|
18
|
-
target: cite
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
register :q, Q::INSTANCE
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Strong < Markup
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def coradoc_format_type
|
|
11
|
-
'bold'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def markup_ancestor_tag_names
|
|
15
|
-
%w[strong b]
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
register :strong, Strong::INSTANCE
|
|
20
|
-
register :b, Strong::INSTANCE
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Sub < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
include PositionalFormatting
|
|
10
|
-
|
|
11
|
-
private
|
|
12
|
-
|
|
13
|
-
def element_class
|
|
14
|
-
Coradoc::CoreModel::SubscriptElement
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
register :sub, Sub::INSTANCE
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Sup < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
include PositionalFormatting
|
|
10
|
-
|
|
11
|
-
private
|
|
12
|
-
|
|
13
|
-
def element_class
|
|
14
|
-
Coradoc::CoreModel::SuperscriptElement
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
register :sup, Sup::INSTANCE
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Table < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
title = extract_title(node)
|
|
13
|
-
content = treat_children_coradoc(node, state)
|
|
14
|
-
|
|
15
|
-
# Apply frame and grid attributes if available
|
|
16
|
-
frame_attr = frame(node)
|
|
17
|
-
grid_attr = rules(node)
|
|
18
|
-
|
|
19
|
-
Coradoc::CoreModel::Table.new(
|
|
20
|
-
title: title,
|
|
21
|
-
rows: content,
|
|
22
|
-
id: id,
|
|
23
|
-
frame: frame_attr,
|
|
24
|
-
grid: grid_attr
|
|
25
|
-
)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
def extract_title(node)
|
|
29
|
-
title = node.at('./caption')
|
|
30
|
-
return nil if title.nil?
|
|
31
|
-
|
|
32
|
-
title.text.strip
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def frame(node)
|
|
36
|
-
case node['frame']
|
|
37
|
-
when 'void'
|
|
38
|
-
'none'
|
|
39
|
-
when 'hsides'
|
|
40
|
-
'topbot'
|
|
41
|
-
when 'vsides'
|
|
42
|
-
'sides'
|
|
43
|
-
when 'box', 'border'
|
|
44
|
-
'all'
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def rules(node)
|
|
49
|
-
case node['rules']
|
|
50
|
-
when 'all'
|
|
51
|
-
'all'
|
|
52
|
-
when 'rows'
|
|
53
|
-
'rows'
|
|
54
|
-
when 'cols'
|
|
55
|
-
'cols'
|
|
56
|
-
when 'none'
|
|
57
|
-
'none'
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
register :table, Table::INSTANCE
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
end
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Td < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
colspan = node['colspan']&.to_i
|
|
12
|
-
rowspan = node['rowspan']&.to_i
|
|
13
|
-
alignment = extract_alignment(node)
|
|
14
|
-
|
|
15
|
-
singlepara = node.elements.size == 1 && node.elements.first.name == 'p'
|
|
16
|
-
state[:tdsinglepara] = singlepara if singlepara
|
|
17
|
-
|
|
18
|
-
content = treat_children_coradoc(node, state)
|
|
19
|
-
|
|
20
|
-
Coradoc::CoreModel::TableCell.new(
|
|
21
|
-
content: extract_text_from_content(content),
|
|
22
|
-
alignment: alignment,
|
|
23
|
-
colspan: colspan && colspan > 1 ? colspan : nil,
|
|
24
|
-
rowspan: rowspan && rowspan > 1 ? rowspan : nil,
|
|
25
|
-
header: node.name == 'th'
|
|
26
|
-
)
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def extract_alignment(node)
|
|
30
|
-
align = node['align']
|
|
31
|
-
case align
|
|
32
|
-
when 'left' then 'left'
|
|
33
|
-
when 'center' then 'center'
|
|
34
|
-
when 'right' then 'right'
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
register :td, Td::INSTANCE
|
|
40
|
-
register :th, Td::INSTANCE
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
end
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Text < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
return treat_empty(node, state) if node.text.strip.empty?
|
|
12
|
-
|
|
13
|
-
# HTML cleanup is performed in the converter layer
|
|
14
|
-
cleaned_content = cleanup_html_text(node.text)
|
|
15
|
-
|
|
16
|
-
# Return as CoreModel::InlineElement with format_type "text"
|
|
17
|
-
Coradoc::CoreModel::TextElement.new(
|
|
18
|
-
content: cleaned_content
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
private
|
|
23
|
-
|
|
24
|
-
def treat_empty(node, state)
|
|
25
|
-
parent = node.parent.name.to_sym
|
|
26
|
-
if %i[ol ul].include?(parent) # Otherwise the indentation is broken
|
|
27
|
-
nil
|
|
28
|
-
elsif state[:tdsinglepara]
|
|
29
|
-
nil
|
|
30
|
-
elsif node.text == ' ' # Regular whitespace text node
|
|
31
|
-
' '
|
|
32
|
-
else
|
|
33
|
-
nil
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# HTML-to-CoreModel text cleanup
|
|
38
|
-
def cleanup_html_text(text)
|
|
39
|
-
text = preserve_nbsp(text)
|
|
40
|
-
text = remove_border_newlines(text)
|
|
41
|
-
text = remove_inner_newlines(text)
|
|
42
|
-
escape_links(text)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def preserve_nbsp(text)
|
|
46
|
-
text.gsub("\u00A0", ' ')
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def escape_links(text)
|
|
50
|
-
text.gsub(/<<([^ ][^>]*)>>/, '\\<<\\1>>')
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def remove_border_newlines(text)
|
|
54
|
-
text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
def remove_inner_newlines(text)
|
|
58
|
-
# Convert newlines/tabs to spaces and squeeze multiple spaces
|
|
59
|
-
# Preserve single leading/trailing space for inline contexts
|
|
60
|
-
text.tr("\n\t", ' ').squeeze(' ')
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
register :text, Text::INSTANCE
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
end
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Tr < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
content = treat_children_coradoc(node, state)
|
|
12
|
-
header = table_header_row?(node)
|
|
13
|
-
# Use CoreModel::TableRow with cells (not columns)
|
|
14
|
-
Coradoc::CoreModel::TableRow.new(
|
|
15
|
-
cells: content,
|
|
16
|
-
header: header
|
|
17
|
-
)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def table_header_row?(node)
|
|
21
|
-
node.previous_element.nil?
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
register :tr, Tr::INSTANCE
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
end
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Video < MediaBase
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
private
|
|
11
|
-
|
|
12
|
-
def semantic_type
|
|
13
|
-
:video
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def build_attributes(node)
|
|
17
|
-
base_attributes(node).merge(
|
|
18
|
-
poster: node['poster'],
|
|
19
|
-
width: node['width'],
|
|
20
|
-
height: node['height']
|
|
21
|
-
).compact
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
register :video, Video::INSTANCE
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
end
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
# Autoload converter classes — they self-register when loaded.
|
|
8
|
-
# Adding a new converter requires only adding one entry here.
|
|
9
|
-
CONVERTERS = {
|
|
10
|
-
Base: 'coradoc/html/input/converters/base',
|
|
11
|
-
Markup: 'coradoc/html/input/converters/markup',
|
|
12
|
-
A: 'coradoc/html/input/converters/a',
|
|
13
|
-
Aside: 'coradoc/html/input/converters/aside',
|
|
14
|
-
Audio: 'coradoc/html/input/converters/audio',
|
|
15
|
-
Blockquote: 'coradoc/html/input/converters/blockquote',
|
|
16
|
-
Br: 'coradoc/html/input/converters/br',
|
|
17
|
-
Bypass: 'coradoc/html/input/converters/bypass',
|
|
18
|
-
Code: 'coradoc/html/input/converters/code',
|
|
19
|
-
Div: 'coradoc/html/input/converters/div',
|
|
20
|
-
Dl: 'coradoc/html/input/converters/dl',
|
|
21
|
-
Skip: 'coradoc/html/input/converters/drop',
|
|
22
|
-
Em: 'coradoc/html/input/converters/em',
|
|
23
|
-
Figure: 'coradoc/html/input/converters/figure',
|
|
24
|
-
H: 'coradoc/html/input/converters/h',
|
|
25
|
-
Head: 'coradoc/html/input/converters/head',
|
|
26
|
-
Hr: 'coradoc/html/input/converters/hr',
|
|
27
|
-
Img: 'coradoc/html/input/converters/img',
|
|
28
|
-
Li: 'coradoc/html/input/converters/li',
|
|
29
|
-
Mark: 'coradoc/html/input/converters/mark',
|
|
30
|
-
Math: 'coradoc/html/input/converters/math',
|
|
31
|
-
MediaBase: 'coradoc/html/input/converters/media_base',
|
|
32
|
-
Ol: 'coradoc/html/input/converters/ol',
|
|
33
|
-
P: 'coradoc/html/input/converters/p',
|
|
34
|
-
PassThrough: 'coradoc/html/input/converters/pass_through',
|
|
35
|
-
PositionalFormatting: 'coradoc/html/input/converters/positional_formatting',
|
|
36
|
-
Pre: 'coradoc/html/input/converters/pre',
|
|
37
|
-
Q: 'coradoc/html/input/converters/q',
|
|
38
|
-
Strong: 'coradoc/html/input/converters/strong',
|
|
39
|
-
Sup: 'coradoc/html/input/converters/sup',
|
|
40
|
-
Sub: 'coradoc/html/input/converters/sub',
|
|
41
|
-
Table: 'coradoc/html/input/converters/table',
|
|
42
|
-
Td: 'coradoc/html/input/converters/td',
|
|
43
|
-
Text: 'coradoc/html/input/converters/text',
|
|
44
|
-
Tr: 'coradoc/html/input/converters/tr',
|
|
45
|
-
Video: 'coradoc/html/input/converters/video'
|
|
46
|
-
}.freeze
|
|
47
|
-
private_constant :CONVERTERS
|
|
48
|
-
|
|
49
|
-
CONVERTERS.each do |name, path|
|
|
50
|
-
autoload name, path
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
@converters = {}
|
|
54
|
-
@converters_loaded = false
|
|
55
|
-
|
|
56
|
-
def self.register(tag_name, converter)
|
|
57
|
-
@converters[tag_name.to_sym] = converter
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def self.unregister(tag_name)
|
|
61
|
-
@converters.delete(tag_name.to_sym)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def self.ensure_converters_loaded
|
|
65
|
-
return if @converters_loaded
|
|
66
|
-
|
|
67
|
-
@converters_loaded = true
|
|
68
|
-
CONVERTERS.each_key { |name| const_get(name) }
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def self.lookup(tag_name)
|
|
72
|
-
ensure_converters_loaded
|
|
73
|
-
@converters[tag_name.to_sym] || default_converter(tag_name)
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
def self.process_coradoc(node, state)
|
|
77
|
-
node = node.to_a if node.is_a? Nokogiri::XML::NodeSet
|
|
78
|
-
return node.map { |i| process_coradoc(i, state) } if node.is_a? Array
|
|
79
|
-
|
|
80
|
-
plugins = state[:plugin_instances] || {}
|
|
81
|
-
process = proc { lookup(node.name).to_coradoc(node, state) }
|
|
82
|
-
plugins.each do |i|
|
|
83
|
-
prev_process = process
|
|
84
|
-
process = proc { i.html_tree_run_hooks(node, state, &prev_process) }
|
|
85
|
-
end
|
|
86
|
-
process.call(node, state)
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def self.default_converter(tag_name)
|
|
90
|
-
case Html.config.unknown_tags.to_sym
|
|
91
|
-
when :pass_through
|
|
92
|
-
PassThrough::INSTANCE
|
|
93
|
-
when :drop
|
|
94
|
-
Skip::INSTANCE
|
|
95
|
-
when :bypass
|
|
96
|
-
Bypass::INSTANCE
|
|
97
|
-
when :raise
|
|
98
|
-
raise Errors::UnknownTagError, "unknown tag: #{tag_name}"
|
|
99
|
-
else
|
|
100
|
-
raise Errors::InvalidConfigurationError,
|
|
101
|
-
"unknown value #{Html.config.unknown_tags.inspect} for Coradoc::Input::Html.config.unknown_tags"
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
end
|