coradoc-html 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/html/cleaner.rb +128 -0
- data/lib/coradoc/html/converters/a.rb +77 -0
- data/lib/coradoc/html/converters/aside.rb +20 -0
- data/lib/coradoc/html/converters/audio.rb +19 -0
- data/lib/coradoc/html/converters/base.rb +98 -0
- data/lib/coradoc/html/converters/blockquote.rb +25 -0
- data/lib/coradoc/html/converters/br.rb +17 -0
- data/lib/coradoc/html/converters/bypass.rb +82 -0
- data/lib/coradoc/html/converters/code.rb +25 -0
- data/lib/coradoc/html/converters/div.rb +23 -0
- data/lib/coradoc/html/converters/dl.rb +82 -0
- data/lib/coradoc/html/converters/drop.rb +26 -0
- data/lib/coradoc/html/converters/em.rb +23 -0
- data/lib/coradoc/html/converters/figure.rb +33 -0
- data/lib/coradoc/html/converters/h.rb +58 -0
- data/lib/coradoc/html/converters/head.rb +29 -0
- data/lib/coradoc/html/converters/hr.rb +17 -0
- data/lib/coradoc/html/converters/img.rb +103 -0
- data/lib/coradoc/html/converters/li.rb +35 -0
- data/lib/coradoc/html/converters/mark.rb +21 -0
- data/lib/coradoc/html/converters/markup.rb +93 -0
- data/lib/coradoc/html/converters/math.rb +37 -0
- data/lib/coradoc/html/converters/media_base.rb +48 -0
- data/lib/coradoc/html/converters/ol.rb +42 -0
- data/lib/coradoc/html/converters/p.rb +64 -0
- data/lib/coradoc/html/converters/pass_through.rb +15 -0
- data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
- data/lib/coradoc/html/converters/pre.rb +57 -0
- data/lib/coradoc/html/converters/q.rb +25 -0
- data/lib/coradoc/html/converters/strong.rb +22 -0
- data/lib/coradoc/html/converters/sub.rb +20 -0
- data/lib/coradoc/html/converters/sup.rb +20 -0
- data/lib/coradoc/html/converters/table.rb +64 -0
- data/lib/coradoc/html/converters/td.rb +42 -0
- data/lib/coradoc/html/converters/text.rb +66 -0
- data/lib/coradoc/html/converters/tr.rb +27 -0
- data/lib/coradoc/html/converters/video.rb +27 -0
- data/lib/coradoc/html/converters.rb +104 -0
- data/lib/coradoc/html/drop/drop_factory.rb +14 -22
- data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
- data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
- data/lib/coradoc/html/drop.rb +30 -8
- data/lib/coradoc/html/errors.rb +11 -0
- data/lib/coradoc/html/html_converter.rb +78 -0
- data/lib/coradoc/html/input_config.rb +66 -0
- data/lib/coradoc/html/plugin.rb +90 -0
- data/lib/coradoc/html/plugins/plateau.rb +212 -0
- data/lib/coradoc/html/postprocessor.rb +19 -0
- data/lib/coradoc/html/spa.rb +0 -2
- data/lib/coradoc/html/static.rb +0 -2
- data/lib/coradoc/html/tag_mapping.rb +3 -1
- data/lib/coradoc/html/transform/from_core_model.rb +2 -2
- data/lib/coradoc/html/transform/to_core_model.rb +3 -3
- data/lib/coradoc/html/version.rb +1 -1
- data/lib/coradoc/html.rb +30 -5
- metadata +46 -47
- data/lib/coradoc/html/input/cleaner.rb +0 -134
- data/lib/coradoc/html/input/config.rb +0 -80
- data/lib/coradoc/html/input/converters/a.rb +0 -79
- data/lib/coradoc/html/input/converters/aside.rb +0 -22
- data/lib/coradoc/html/input/converters/audio.rb +0 -21
- data/lib/coradoc/html/input/converters/base.rb +0 -118
- data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
- data/lib/coradoc/html/input/converters/br.rb +0 -19
- data/lib/coradoc/html/input/converters/bypass.rb +0 -84
- data/lib/coradoc/html/input/converters/code.rb +0 -27
- data/lib/coradoc/html/input/converters/div.rb +0 -25
- data/lib/coradoc/html/input/converters/dl.rb +0 -84
- data/lib/coradoc/html/input/converters/drop.rb +0 -28
- data/lib/coradoc/html/input/converters/em.rb +0 -25
- data/lib/coradoc/html/input/converters/figure.rb +0 -35
- data/lib/coradoc/html/input/converters/h.rb +0 -74
- data/lib/coradoc/html/input/converters/head.rb +0 -31
- data/lib/coradoc/html/input/converters/hr.rb +0 -19
- data/lib/coradoc/html/input/converters/img.rb +0 -105
- data/lib/coradoc/html/input/converters/li.rb +0 -37
- data/lib/coradoc/html/input/converters/mark.rb +0 -23
- data/lib/coradoc/html/input/converters/markup.rb +0 -103
- data/lib/coradoc/html/input/converters/math.rb +0 -39
- data/lib/coradoc/html/input/converters/media_base.rb +0 -50
- data/lib/coradoc/html/input/converters/ol.rb +0 -44
- data/lib/coradoc/html/input/converters/p.rb +0 -90
- data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
- data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
- data/lib/coradoc/html/input/converters/pre.rb +0 -59
- data/lib/coradoc/html/input/converters/q.rb +0 -27
- data/lib/coradoc/html/input/converters/strong.rb +0 -24
- data/lib/coradoc/html/input/converters/sub.rb +0 -22
- data/lib/coradoc/html/input/converters/sup.rb +0 -22
- data/lib/coradoc/html/input/converters/table.rb +0 -66
- data/lib/coradoc/html/input/converters/td.rb +0 -44
- data/lib/coradoc/html/input/converters/text.rb +0 -68
- data/lib/coradoc/html/input/converters/tr.rb +0 -29
- data/lib/coradoc/html/input/converters/video.rb +0 -29
- data/lib/coradoc/html/input/converters.rb +0 -107
- data/lib/coradoc/html/input/errors.rb +0 -22
- data/lib/coradoc/html/input/html_converter.rb +0 -98
- data/lib/coradoc/html/input/plugin.rb +0 -120
- data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
- data/lib/coradoc/html/input/postprocessor.rb +0 -25
- data/lib/coradoc/html/input.rb +0 -86
- data/lib/coradoc/html/output.rb +0 -89
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Figure < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
id = node['id']
|
|
11
|
+
title_content = extract_title(node)
|
|
12
|
+
content = treat_children_coradoc(node, state)
|
|
13
|
+
|
|
14
|
+
# Use CoreModel::ExampleBlock for example/figure
|
|
15
|
+
Coradoc::CoreModel::ExampleBlock.new(
|
|
16
|
+
title: extract_text_from_content(title_content),
|
|
17
|
+
children: content,
|
|
18
|
+
id: id
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def extract_title(node)
|
|
23
|
+
title = node.at('./figcaption')
|
|
24
|
+
return '' if title.nil?
|
|
25
|
+
|
|
26
|
+
treat_children_coradoc(title, {})
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
register :figure, Figure::INSTANCE
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class H < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
id = node['id']
|
|
11
|
+
internal_anchor = treat_children_anchors(node, state)
|
|
12
|
+
|
|
13
|
+
if id.to_s.empty? && internal_anchor.size.positive?
|
|
14
|
+
first_model = internal_anchor.first
|
|
15
|
+
id = first_model.target if first_model.is_a?(Coradoc::CoreModel::InlineElement) && first_model.target
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
level_int = node.name[/\d/].to_i
|
|
19
|
+
content = treat_children_no_anchors(node, state)
|
|
20
|
+
|
|
21
|
+
Coradoc::CoreModel::SectionElement.new(
|
|
22
|
+
title: extract_title_text(content),
|
|
23
|
+
level: level_int,
|
|
24
|
+
id: id,
|
|
25
|
+
children: []
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def treat_children_no_anchors(node, state)
|
|
30
|
+
node.children.reject { |a| a.name == 'a' }
|
|
31
|
+
.map do |child|
|
|
32
|
+
treat_coradoc(child, state)
|
|
33
|
+
end.flatten.compact
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def treat_children_anchors(node, state)
|
|
37
|
+
node.children.select { |a| a.name == 'a' }
|
|
38
|
+
.map do |child|
|
|
39
|
+
treat_coradoc(child, state)
|
|
40
|
+
end.flatten.compact
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def extract_title_text(content)
|
|
46
|
+
Coradoc::CoreModel::InlineContent.text_of(content).strip
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
register :h1, H::INSTANCE
|
|
51
|
+
register :h2, H::INSTANCE
|
|
52
|
+
register :h3, H::INSTANCE
|
|
53
|
+
register :h4, H::INSTANCE
|
|
54
|
+
register :h5, H::INSTANCE
|
|
55
|
+
register :h6, H::INSTANCE
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Head < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, _state = {})
|
|
10
|
+
title = extract_title(node)
|
|
11
|
+
# Use DocumentElement for document header
|
|
12
|
+
Coradoc::CoreModel::DocumentElement.new(
|
|
13
|
+
title: title,
|
|
14
|
+
level: 0
|
|
15
|
+
)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def extract_title(node)
|
|
19
|
+
title = node.at('./title')
|
|
20
|
+
return '(???)' if title.nil?
|
|
21
|
+
|
|
22
|
+
title.text
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
register :head, Head::INSTANCE
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Hr < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(_node, _state = {})
|
|
10
|
+
Coradoc::CoreModel::HorizontalRuleBlock.new
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
register :hr, Hr::INSTANCE
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
require 'pathname'
|
|
5
|
+
require 'tempfile'
|
|
6
|
+
require 'base64'
|
|
7
|
+
require 'marcel'
|
|
8
|
+
|
|
9
|
+
module Coradoc
|
|
10
|
+
module Html
|
|
11
|
+
module Converters
|
|
12
|
+
class Img < Base
|
|
13
|
+
INSTANCE = new
|
|
14
|
+
|
|
15
|
+
def image_number
|
|
16
|
+
format(
|
|
17
|
+
Html.input_config.image_counter_pattern,
|
|
18
|
+
Html.input_config.image_counter
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def image_number_increment
|
|
23
|
+
Html.input_config.image_counter += 1
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def datauri2file(src)
|
|
27
|
+
return unless src
|
|
28
|
+
|
|
29
|
+
%r{^data:image/(?:[^;]+);base64,(?<imgdata>.+)$} =~ src
|
|
30
|
+
|
|
31
|
+
dest_dir = Pathname.new(Html.input_config.destination).dirname
|
|
32
|
+
images_dir = dest_dir.join('images')
|
|
33
|
+
FileUtils.mkdir_p(images_dir)
|
|
34
|
+
|
|
35
|
+
ext, image_src_path, tempfile = determine_image_src_path(
|
|
36
|
+
src,
|
|
37
|
+
imgdata
|
|
38
|
+
)
|
|
39
|
+
image_dest_path = images_dir + "#{image_number}.#{ext}"
|
|
40
|
+
|
|
41
|
+
if File.exist?(image_src_path)
|
|
42
|
+
FileUtils.cp(image_src_path, image_dest_path)
|
|
43
|
+
else
|
|
44
|
+
Kernel.warn "Image #{image_src_path} does not exist"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
image_number_increment
|
|
48
|
+
|
|
49
|
+
image_dest_path.relative_path_from(dest_dir)
|
|
50
|
+
ensure
|
|
51
|
+
tempfile&.close!
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def determine_image_src_path(src, imgdata)
|
|
55
|
+
return copy_temp_file(imgdata) if imgdata
|
|
56
|
+
|
|
57
|
+
ext = File.extname(src).strip.downcase[1..]
|
|
58
|
+
[ext, Pathname.new(Html.input_config.sourcedir).join(src)]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def copy_temp_file(imgdata)
|
|
62
|
+
f = Tempfile.open(['radoc', '.jpg'])
|
|
63
|
+
f.binmode
|
|
64
|
+
f.write(Base64.strict_decode64(imgdata))
|
|
65
|
+
f.rewind
|
|
66
|
+
ext = Marcel::MimeType.for(f).sub(%r{^[^/]+/}, '')
|
|
67
|
+
ext = 'svg' if ext == 'svg+xml'
|
|
68
|
+
[ext, f.path, f]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def to_coradoc(node, _state = {})
|
|
72
|
+
id = node['id']
|
|
73
|
+
alt = node['alt']
|
|
74
|
+
src = node['src']
|
|
75
|
+
width = node['width']
|
|
76
|
+
height = node['height']
|
|
77
|
+
|
|
78
|
+
# Convert width/height to integers if they are numeric strings
|
|
79
|
+
width = width.to_i if width&.match?(/\A\d+\z/)
|
|
80
|
+
height = height.to_i if height&.match?(/\A\d+\z/)
|
|
81
|
+
|
|
82
|
+
title = extract_title(node)
|
|
83
|
+
|
|
84
|
+
src = datauri2file(src) if Html.input_config.external_images
|
|
85
|
+
|
|
86
|
+
# Use CoreModel::Image
|
|
87
|
+
return unless src
|
|
88
|
+
|
|
89
|
+
Coradoc::CoreModel::Image.new(
|
|
90
|
+
src: src,
|
|
91
|
+
alt: alt,
|
|
92
|
+
caption: title,
|
|
93
|
+
width: width&.to_s,
|
|
94
|
+
height: height&.to_s,
|
|
95
|
+
id: id
|
|
96
|
+
)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
register :img, Img::INSTANCE
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Li < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
id = node['id']
|
|
11
|
+
|
|
12
|
+
# Check if all children are <p> tags
|
|
13
|
+
p_children = node.children.select { |child| child.name == 'p' }
|
|
14
|
+
non_empty_children = node.children.reject { |c| c.text? && c.text.strip.empty? }
|
|
15
|
+
|
|
16
|
+
content = if p_children.any? && p_children.size == non_empty_children.size && p_children.size == 1
|
|
17
|
+
# Single <p> tag - extract its content directly as inline content
|
|
18
|
+
treat_children_coradoc(p_children.first, state)
|
|
19
|
+
else
|
|
20
|
+
treat_children_coradoc(node, state)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Use CoreModel::ListItem with children for mixed content
|
|
24
|
+
# content can be an array of inline elements or a single string
|
|
25
|
+
Coradoc::CoreModel::ListItem.new(
|
|
26
|
+
children: content,
|
|
27
|
+
id: id
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
register :li, Li::INSTANCE
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Mark < Markup
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def coradoc_format_type
|
|
10
|
+
'highlight'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def markup_ancestor_tag_names
|
|
14
|
+
%w[mark]
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
register :mark, Mark::INSTANCE
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Markup < Base
|
|
7
|
+
def to_coradoc(node, state = {})
|
|
8
|
+
u_before = unconstrained_before?(node)
|
|
9
|
+
u_after = unconstrained_after?(node)
|
|
10
|
+
|
|
11
|
+
leading_ws, trailing_ws =
|
|
12
|
+
extract_leading_trailing_whitespace(node)
|
|
13
|
+
|
|
14
|
+
# Wrap whitespace in InlineElement so it can be processed
|
|
15
|
+
leading_whitespace = if leading_ws
|
|
16
|
+
Coradoc::CoreModel::TextElement.new(
|
|
17
|
+
content: leading_ws
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
trailing_whitespace = if trailing_ws
|
|
21
|
+
Coradoc::CoreModel::TextElement.new(
|
|
22
|
+
content: trailing_ws
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
content = treat_children_coradoc(node, state)
|
|
27
|
+
|
|
28
|
+
if node_has_ancestor?(node, markup_ancestor_tag_names)
|
|
29
|
+
content
|
|
30
|
+
elsif node.children.empty?
|
|
31
|
+
# Return InlineElement wrapper for whitespace
|
|
32
|
+
if leading_ws
|
|
33
|
+
Coradoc::CoreModel::TextElement.new(
|
|
34
|
+
content: leading_ws
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
else
|
|
38
|
+
u = (u_before && leading_whitespace.nil?) ||
|
|
39
|
+
(u_after && trailing_whitespace.nil?)
|
|
40
|
+
|
|
41
|
+
# Separate text strings from InlineElements in content array
|
|
42
|
+
text_content, nested = extract_text_and_elements(content)
|
|
43
|
+
|
|
44
|
+
# Create CoreModel::InlineElement with the appropriate format type
|
|
45
|
+
inline_element = Coradoc::CoreModel::InlineElement.format_type_class(coradoc_format_type).new(
|
|
46
|
+
content: text_content,
|
|
47
|
+
nested_elements: nested.empty? ? nil : nested,
|
|
48
|
+
metadata: { unconstrained: u }
|
|
49
|
+
)
|
|
50
|
+
result = [leading_whitespace, inline_element, trailing_whitespace].compact
|
|
51
|
+
result.length == 1 ? result.first : result
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Extract text content and InlineElements from mixed content array
|
|
56
|
+
def extract_text_and_elements(content)
|
|
57
|
+
return [content, []] unless content.is_a?(Array)
|
|
58
|
+
|
|
59
|
+
text_parts = []
|
|
60
|
+
elements = []
|
|
61
|
+
|
|
62
|
+
content.each do |item|
|
|
63
|
+
case item
|
|
64
|
+
when String
|
|
65
|
+
text_parts << item
|
|
66
|
+
when Coradoc::CoreModel::InlineElement
|
|
67
|
+
elements << item
|
|
68
|
+
when Coradoc::CoreModel::Base
|
|
69
|
+
# For other block types, convert to text
|
|
70
|
+
text_parts << extract_text_from_model(item)
|
|
71
|
+
else
|
|
72
|
+
text_parts << item.to_s
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
[text_parts.join, elements]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Extract text from a CoreModel object via the shared
|
|
80
|
+
# CoreModel::InlineContent helper. Kept as a thin wrapper so
|
|
81
|
+
# callers in Markup can pass single elements without wrapping.
|
|
82
|
+
def extract_text_from_model(model)
|
|
83
|
+
Coradoc::CoreModel::InlineContent.text_of(model)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Subclasses should override this to return the format type
|
|
87
|
+
def coradoc_format_type
|
|
88
|
+
'text'
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Math < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, _state = {})
|
|
10
|
+
stem = node.to_s.tr("\n", ' ')
|
|
11
|
+
if Html.input_config.mathml2asciimath
|
|
12
|
+
require 'plurimath'
|
|
13
|
+
stem = Plurimath::Math.parse(stem, :mathml).to_asciimath
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
unless stem.nil?
|
|
17
|
+
stem = stem.gsub('[', '\\[')
|
|
18
|
+
stem = stem.gsub(']', '\\]')
|
|
19
|
+
loop do
|
|
20
|
+
new_stem = stem.gsub(/\(\(([^)]{1,100})\)\)/, '(\\1)')
|
|
21
|
+
break if new_stem == stem
|
|
22
|
+
|
|
23
|
+
stem = new_stem
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
Coradoc::CoreModel::StemElement.new(
|
|
28
|
+
content: stem,
|
|
29
|
+
stem_type: 'mathml'
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
register :math, Math::INSTANCE
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class MediaBase < Base
|
|
7
|
+
def to_coradoc(node, _state = {})
|
|
8
|
+
src = node['src']
|
|
9
|
+
id = node['id']
|
|
10
|
+
title = extract_title(node)
|
|
11
|
+
|
|
12
|
+
Coradoc::CoreModel::Block.new(
|
|
13
|
+
block_semantic_type: semantic_type,
|
|
14
|
+
content: src,
|
|
15
|
+
title: title,
|
|
16
|
+
id: id,
|
|
17
|
+
element_attributes: build_attributes(node)
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def extract_title(node)
|
|
22
|
+
track = node.at('./track') || node.at('.//source')
|
|
23
|
+
return '' if track.nil?
|
|
24
|
+
|
|
25
|
+
track['label'] || track['srclang'] || ''
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def semantic_type
|
|
31
|
+
raise NotImplementedError
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def base_attributes(node)
|
|
35
|
+
{
|
|
36
|
+
autoplay: node['autoplay'],
|
|
37
|
+
loop: node['loop'],
|
|
38
|
+
controls: node['controls']
|
|
39
|
+
}.compact
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def build_attributes(node)
|
|
43
|
+
base_attributes(node)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Ol < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
id = node['id']
|
|
11
|
+
items = treat_children_coradoc(node, state)
|
|
12
|
+
|
|
13
|
+
marker_type = get_list_type(node, state)
|
|
14
|
+
|
|
15
|
+
Coradoc::CoreModel::ListBlock.new(
|
|
16
|
+
marker_type: marker_type,
|
|
17
|
+
items: items,
|
|
18
|
+
id: id,
|
|
19
|
+
start: node['start']&.to_i
|
|
20
|
+
)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def get_list_type(node, _state)
|
|
24
|
+
case node.name
|
|
25
|
+
when 'ol'
|
|
26
|
+
'ordered'
|
|
27
|
+
when 'ul'
|
|
28
|
+
'unordered'
|
|
29
|
+
when 'dir'
|
|
30
|
+
'unordered'
|
|
31
|
+
else
|
|
32
|
+
'unordered'
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
register :ol, Ol::INSTANCE
|
|
38
|
+
register :ul, Ol::INSTANCE
|
|
39
|
+
register :dir, Ol::INSTANCE
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class P < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, state = {})
|
|
10
|
+
id = node['id']
|
|
11
|
+
content = treat_children_coradoc(node, state)
|
|
12
|
+
|
|
13
|
+
content = strip_fullwidth_spaces(content)
|
|
14
|
+
|
|
15
|
+
Coradoc::CoreModel::ParagraphBlock.new(
|
|
16
|
+
children: content,
|
|
17
|
+
id: id
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def strip_fullwidth_spaces(content)
|
|
24
|
+
return content unless content.is_a?(Array)
|
|
25
|
+
|
|
26
|
+
content = strip_fullwidth_per_element(content)
|
|
27
|
+
content = Coradoc::CoreModel::InlineContent.strip_edges(content)
|
|
28
|
+
reject_empty_elements(content)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Strip CJK fullwidth spaces from the leading/trailing edge of
|
|
32
|
+
# every InlineElement's content. Returns a new array; inputs
|
|
33
|
+
# are not mutated.
|
|
34
|
+
def strip_fullwidth_per_element(content)
|
|
35
|
+
content.map do |item|
|
|
36
|
+
next item unless item.is_a?(Coradoc::CoreModel::InlineElement)
|
|
37
|
+
next item unless item.content.is_a?(String)
|
|
38
|
+
|
|
39
|
+
item.with_content(item.content.gsub(/\A +| +\z/, ''))
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def reject_empty_elements(content)
|
|
44
|
+
content.reject do |item|
|
|
45
|
+
if item.is_a?(Coradoc::CoreModel::InlineElement)
|
|
46
|
+
item.content.to_s.empty? && !has_nested_content?(item)
|
|
47
|
+
elsif item.is_a?(String)
|
|
48
|
+
item.empty?
|
|
49
|
+
else
|
|
50
|
+
false
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def has_nested_content?(item)
|
|
56
|
+
item.is_a?(Coradoc::CoreModel::InlineElement) &&
|
|
57
|
+
item.nested_elements && !item.nested_elements.empty?
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
register :p, P::INSTANCE
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
# Shared logic for superscript/subscript converters.
|
|
7
|
+
#
|
|
8
|
+
# Subclasses must implement `element_class` returning the
|
|
9
|
+
# CoreModel class (e.g., SuperscriptElement, SubscriptElement).
|
|
10
|
+
module PositionalFormatting
|
|
11
|
+
def to_coradoc(node, state = {})
|
|
12
|
+
leading_whitespace, trailing_whitespace = extract_leading_trailing_whitespace(node)
|
|
13
|
+
|
|
14
|
+
content = treat_children_coradoc(node, state)
|
|
15
|
+
|
|
16
|
+
return nil if content_empty?(content)
|
|
17
|
+
|
|
18
|
+
e = element_class.new(content: content)
|
|
19
|
+
result = [leading_whitespace, e, trailing_whitespace].compact
|
|
20
|
+
result.length == 1 ? result.first : result
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def content_empty?(content)
|
|
26
|
+
return true if content.nil?
|
|
27
|
+
return content.strip.empty? if content.is_a?(String)
|
|
28
|
+
return content.empty? if content.is_a?(Array)
|
|
29
|
+
|
|
30
|
+
false
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Html
|
|
5
|
+
module Converters
|
|
6
|
+
class Pre < Base
|
|
7
|
+
INSTANCE = new
|
|
8
|
+
|
|
9
|
+
def to_coradoc(node, _state = {})
|
|
10
|
+
id = node['id']
|
|
11
|
+
lang = language(node)
|
|
12
|
+
content = extract_text_content(node)
|
|
13
|
+
|
|
14
|
+
if lang
|
|
15
|
+
Coradoc::CoreModel::SourceBlock.new(
|
|
16
|
+
content: content,
|
|
17
|
+
id: id,
|
|
18
|
+
language: lang
|
|
19
|
+
)
|
|
20
|
+
else
|
|
21
|
+
Coradoc::CoreModel::LiteralBlock.new(
|
|
22
|
+
content: content,
|
|
23
|
+
id: id
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def extract_text_content(node)
|
|
31
|
+
# Get text content from pre node
|
|
32
|
+
node.text
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def language(node)
|
|
36
|
+
lang = language_from_highlight_class(node)
|
|
37
|
+
lang || language_from_confluence_class(node)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def language_from_highlight_class(node)
|
|
41
|
+
node.parent['class'].to_s[/highlight-([a-zA-Z0-9]+)/, 1]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def language_from_confluence_class(node)
|
|
45
|
+
class_str = node['class'].to_s
|
|
46
|
+
return nil unless class_str.include?('brush:')
|
|
47
|
+
|
|
48
|
+
# Extract language from brush: language; pattern
|
|
49
|
+
match = class_str.match(/brush:\s*([^;]+);/)
|
|
50
|
+
match ? match[1].strip : nil
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
register :pre, Pre::INSTANCE
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|