coradoc-html 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/html/cleaner.rb +128 -0
- data/lib/coradoc/html/converters/a.rb +77 -0
- data/lib/coradoc/html/converters/aside.rb +20 -0
- data/lib/coradoc/html/converters/audio.rb +19 -0
- data/lib/coradoc/html/converters/base.rb +98 -0
- data/lib/coradoc/html/converters/blockquote.rb +25 -0
- data/lib/coradoc/html/converters/br.rb +17 -0
- data/lib/coradoc/html/converters/bypass.rb +82 -0
- data/lib/coradoc/html/converters/code.rb +25 -0
- data/lib/coradoc/html/converters/div.rb +23 -0
- data/lib/coradoc/html/converters/dl.rb +82 -0
- data/lib/coradoc/html/converters/drop.rb +26 -0
- data/lib/coradoc/html/converters/em.rb +23 -0
- data/lib/coradoc/html/converters/figure.rb +33 -0
- data/lib/coradoc/html/converters/h.rb +58 -0
- data/lib/coradoc/html/converters/head.rb +29 -0
- data/lib/coradoc/html/converters/hr.rb +17 -0
- data/lib/coradoc/html/converters/img.rb +103 -0
- data/lib/coradoc/html/converters/li.rb +35 -0
- data/lib/coradoc/html/converters/mark.rb +21 -0
- data/lib/coradoc/html/converters/markup.rb +93 -0
- data/lib/coradoc/html/converters/math.rb +37 -0
- data/lib/coradoc/html/converters/media_base.rb +48 -0
- data/lib/coradoc/html/converters/ol.rb +42 -0
- data/lib/coradoc/html/converters/p.rb +64 -0
- data/lib/coradoc/html/converters/pass_through.rb +15 -0
- data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
- data/lib/coradoc/html/converters/pre.rb +57 -0
- data/lib/coradoc/html/converters/q.rb +25 -0
- data/lib/coradoc/html/converters/strong.rb +22 -0
- data/lib/coradoc/html/converters/sub.rb +20 -0
- data/lib/coradoc/html/converters/sup.rb +20 -0
- data/lib/coradoc/html/converters/table.rb +64 -0
- data/lib/coradoc/html/converters/td.rb +42 -0
- data/lib/coradoc/html/converters/text.rb +66 -0
- data/lib/coradoc/html/converters/tr.rb +27 -0
- data/lib/coradoc/html/converters/video.rb +27 -0
- data/lib/coradoc/html/converters.rb +104 -0
- data/lib/coradoc/html/drop/drop_factory.rb +14 -22
- data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
- data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
- data/lib/coradoc/html/drop.rb +30 -8
- data/lib/coradoc/html/errors.rb +11 -0
- data/lib/coradoc/html/html_converter.rb +78 -0
- data/lib/coradoc/html/input_config.rb +66 -0
- data/lib/coradoc/html/plugin.rb +90 -0
- data/lib/coradoc/html/plugins/plateau.rb +212 -0
- data/lib/coradoc/html/postprocessor.rb +19 -0
- data/lib/coradoc/html/spa.rb +0 -2
- data/lib/coradoc/html/static.rb +0 -2
- data/lib/coradoc/html/tag_mapping.rb +3 -1
- data/lib/coradoc/html/transform/from_core_model.rb +2 -2
- data/lib/coradoc/html/transform/to_core_model.rb +3 -3
- data/lib/coradoc/html/version.rb +1 -1
- data/lib/coradoc/html.rb +30 -5
- metadata +46 -47
- data/lib/coradoc/html/input/cleaner.rb +0 -134
- data/lib/coradoc/html/input/config.rb +0 -80
- data/lib/coradoc/html/input/converters/a.rb +0 -79
- data/lib/coradoc/html/input/converters/aside.rb +0 -22
- data/lib/coradoc/html/input/converters/audio.rb +0 -21
- data/lib/coradoc/html/input/converters/base.rb +0 -118
- data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
- data/lib/coradoc/html/input/converters/br.rb +0 -19
- data/lib/coradoc/html/input/converters/bypass.rb +0 -84
- data/lib/coradoc/html/input/converters/code.rb +0 -27
- data/lib/coradoc/html/input/converters/div.rb +0 -25
- data/lib/coradoc/html/input/converters/dl.rb +0 -84
- data/lib/coradoc/html/input/converters/drop.rb +0 -28
- data/lib/coradoc/html/input/converters/em.rb +0 -25
- data/lib/coradoc/html/input/converters/figure.rb +0 -35
- data/lib/coradoc/html/input/converters/h.rb +0 -74
- data/lib/coradoc/html/input/converters/head.rb +0 -31
- data/lib/coradoc/html/input/converters/hr.rb +0 -19
- data/lib/coradoc/html/input/converters/img.rb +0 -105
- data/lib/coradoc/html/input/converters/li.rb +0 -37
- data/lib/coradoc/html/input/converters/mark.rb +0 -23
- data/lib/coradoc/html/input/converters/markup.rb +0 -103
- data/lib/coradoc/html/input/converters/math.rb +0 -39
- data/lib/coradoc/html/input/converters/media_base.rb +0 -50
- data/lib/coradoc/html/input/converters/ol.rb +0 -44
- data/lib/coradoc/html/input/converters/p.rb +0 -90
- data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
- data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
- data/lib/coradoc/html/input/converters/pre.rb +0 -59
- data/lib/coradoc/html/input/converters/q.rb +0 -27
- data/lib/coradoc/html/input/converters/strong.rb +0 -24
- data/lib/coradoc/html/input/converters/sub.rb +0 -22
- data/lib/coradoc/html/input/converters/sup.rb +0 -22
- data/lib/coradoc/html/input/converters/table.rb +0 -66
- data/lib/coradoc/html/input/converters/td.rb +0 -44
- data/lib/coradoc/html/input/converters/text.rb +0 -68
- data/lib/coradoc/html/input/converters/tr.rb +0 -29
- data/lib/coradoc/html/input/converters/video.rb +0 -29
- data/lib/coradoc/html/input/converters.rb +0 -107
- data/lib/coradoc/html/input/errors.rb +0 -22
- data/lib/coradoc/html/input/html_converter.rb +0 -98
- data/lib/coradoc/html/input/plugin.rb +0 -120
- data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
- data/lib/coradoc/html/input/postprocessor.rb +0 -25
- data/lib/coradoc/html/input.rb +0 -86
- data/lib/coradoc/html/output.rb +0 -89
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Dl < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
items = process_dl(node, state)
|
|
12
|
-
|
|
13
|
-
# Convert items to CoreModel::ListItem objects
|
|
14
|
-
# For definition lists, term goes in content, definition goes in children
|
|
15
|
-
list_items = items.map do |item|
|
|
16
|
-
term_text = extract_text_from_content(item[:name])
|
|
17
|
-
Coradoc::CoreModel::ListItem.new(
|
|
18
|
-
content: term_text,
|
|
19
|
-
children: item[:value]
|
|
20
|
-
)
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Use CoreModel::ListBlock with marker_type "definition"
|
|
24
|
-
Coradoc::CoreModel::ListBlock.new(
|
|
25
|
-
marker_type: 'definition',
|
|
26
|
-
items: list_items
|
|
27
|
-
)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def process_dl(node, state = {})
|
|
31
|
-
groups = []
|
|
32
|
-
current = { name: [], value: [] }
|
|
33
|
-
|
|
34
|
-
seen_dd = false
|
|
35
|
-
child = node.at_xpath('*[1]')
|
|
36
|
-
grandchild = nil
|
|
37
|
-
until child.nil?
|
|
38
|
-
if child.name == 'div'
|
|
39
|
-
grandchild = child.at_xpath('*[1]')
|
|
40
|
-
until grandchild.nil?
|
|
41
|
-
groups, current, seen_dd = process_dt_or_dd(
|
|
42
|
-
groups,
|
|
43
|
-
current,
|
|
44
|
-
seen_dd,
|
|
45
|
-
grandchild,
|
|
46
|
-
state
|
|
47
|
-
)
|
|
48
|
-
grandchild = grandchild.at_xpath('following-sibling::*[1]')
|
|
49
|
-
end
|
|
50
|
-
elsif %w[dt dd].include?(child.name)
|
|
51
|
-
groups, current, seen_dd = process_dt_or_dd(
|
|
52
|
-
groups,
|
|
53
|
-
current,
|
|
54
|
-
seen_dd,
|
|
55
|
-
child,
|
|
56
|
-
state
|
|
57
|
-
)
|
|
58
|
-
end
|
|
59
|
-
child = child.at_xpath('following-sibling::*[1]')
|
|
60
|
-
groups << current if current[:name].any? && current[:value].any?
|
|
61
|
-
end
|
|
62
|
-
groups
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
def process_dt_or_dd(groups, current, seen_dd, subnode, state = {})
|
|
66
|
-
if subnode.name == 'dt'
|
|
67
|
-
if seen_dd
|
|
68
|
-
current = { name: [], value: [] }
|
|
69
|
-
seen_dd = false
|
|
70
|
-
end
|
|
71
|
-
current[:name] += treat_children_coradoc(subnode, state)
|
|
72
|
-
elsif subnode.name == 'dd'
|
|
73
|
-
current[:value] += treat_children_coradoc(subnode, state)
|
|
74
|
-
seen_dd = true
|
|
75
|
-
end
|
|
76
|
-
[groups, current, seen_dd]
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
register :dl, Dl::INSTANCE
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Skip < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(_node, _state = {})
|
|
11
|
-
''
|
|
12
|
-
end
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
register :caption, Skip::INSTANCE
|
|
16
|
-
register :figcaption, Skip::INSTANCE
|
|
17
|
-
register :title, Skip::INSTANCE
|
|
18
|
-
register :link, Skip::INSTANCE
|
|
19
|
-
register :style, Skip::INSTANCE
|
|
20
|
-
register :meta, Skip::INSTANCE
|
|
21
|
-
register :script, Skip::INSTANCE
|
|
22
|
-
register :comment, Skip::INSTANCE
|
|
23
|
-
register :colgroup, Skip::INSTANCE
|
|
24
|
-
register :col, Skip::INSTANCE
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
end
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Em < Markup
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def coradoc_format_type
|
|
11
|
-
'italic'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def markup_ancestor_tag_names
|
|
15
|
-
%w[em i cite]
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
register :em, Em::INSTANCE
|
|
20
|
-
register :i, Em::INSTANCE
|
|
21
|
-
register :cite, Em::INSTANCE
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
end
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Figure < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
title_content = extract_title(node)
|
|
13
|
-
content = treat_children_coradoc(node, state)
|
|
14
|
-
|
|
15
|
-
# Use CoreModel::ExampleBlock for example/figure
|
|
16
|
-
Coradoc::CoreModel::ExampleBlock.new(
|
|
17
|
-
title: extract_text_from_content(title_content),
|
|
18
|
-
children: content,
|
|
19
|
-
id: id
|
|
20
|
-
)
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def extract_title(node)
|
|
24
|
-
title = node.at('./figcaption')
|
|
25
|
-
return '' if title.nil?
|
|
26
|
-
|
|
27
|
-
treat_children_coradoc(title, {})
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
register :figure, Figure::INSTANCE
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
end
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class H < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
internal_anchor = treat_children_anchors(node, state)
|
|
13
|
-
|
|
14
|
-
if id.to_s.empty? && internal_anchor.size.positive?
|
|
15
|
-
first_model = internal_anchor.first
|
|
16
|
-
id = first_model.target if first_model.is_a?(Coradoc::CoreModel::InlineElement) && first_model.target
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
level_int = node.name[/\d/].to_i
|
|
20
|
-
content = treat_children_no_anchors(node, state)
|
|
21
|
-
|
|
22
|
-
Coradoc::CoreModel::SectionElement.new(
|
|
23
|
-
title: extract_title_text(content),
|
|
24
|
-
level: level_int,
|
|
25
|
-
id: id,
|
|
26
|
-
children: []
|
|
27
|
-
)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def treat_children_no_anchors(node, state)
|
|
31
|
-
node.children.reject { |a| a.name == 'a' }
|
|
32
|
-
.map do |child|
|
|
33
|
-
treat_coradoc(child, state)
|
|
34
|
-
end.flatten.compact
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def treat_children_anchors(node, state)
|
|
38
|
-
node.children.select { |a| a.name == 'a' }
|
|
39
|
-
.map do |child|
|
|
40
|
-
treat_coradoc(child, state)
|
|
41
|
-
end.flatten.compact
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
private
|
|
45
|
-
|
|
46
|
-
def extract_title_text(content)
|
|
47
|
-
return '' if content.nil? || content.empty?
|
|
48
|
-
|
|
49
|
-
if content.is_a?(Array)
|
|
50
|
-
content.map do |c|
|
|
51
|
-
if c.is_a?(Coradoc::CoreModel::InlineElement)
|
|
52
|
-
c.content.to_s
|
|
53
|
-
else
|
|
54
|
-
c.to_s
|
|
55
|
-
end
|
|
56
|
-
end.join.strip
|
|
57
|
-
elsif content.is_a?(Coradoc::CoreModel::InlineElement)
|
|
58
|
-
content.content.to_s
|
|
59
|
-
else
|
|
60
|
-
content.to_s
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
register :h1, H::INSTANCE
|
|
66
|
-
register :h2, H::INSTANCE
|
|
67
|
-
register :h3, H::INSTANCE
|
|
68
|
-
register :h4, H::INSTANCE
|
|
69
|
-
register :h5, H::INSTANCE
|
|
70
|
-
register :h6, H::INSTANCE
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
end
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Head < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, _state = {})
|
|
11
|
-
title = extract_title(node)
|
|
12
|
-
# Use DocumentElement for document header
|
|
13
|
-
Coradoc::CoreModel::DocumentElement.new(
|
|
14
|
-
title: title,
|
|
15
|
-
level: 0
|
|
16
|
-
)
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def extract_title(node)
|
|
20
|
-
title = node.at('./title')
|
|
21
|
-
return '(???)' if title.nil?
|
|
22
|
-
|
|
23
|
-
title.text
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
register :head, Head::INSTANCE
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Hr < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(_node, _state = {})
|
|
11
|
-
Coradoc::CoreModel::HorizontalRuleBlock.new
|
|
12
|
-
end
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
register :hr, Hr::INSTANCE
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
end
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'fileutils'
|
|
4
|
-
require 'pathname'
|
|
5
|
-
require 'tempfile'
|
|
6
|
-
require 'base64'
|
|
7
|
-
require 'marcel'
|
|
8
|
-
|
|
9
|
-
module Coradoc
|
|
10
|
-
module Input
|
|
11
|
-
module Html
|
|
12
|
-
module Converters
|
|
13
|
-
class Img < Base
|
|
14
|
-
INSTANCE = new
|
|
15
|
-
|
|
16
|
-
def image_number
|
|
17
|
-
format(
|
|
18
|
-
Coradoc::Html::Input.config.image_counter_pattern,
|
|
19
|
-
Coradoc::Html::Input.config.image_counter
|
|
20
|
-
)
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def image_number_increment
|
|
24
|
-
Coradoc::Html::Input.config.image_counter += 1
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def datauri2file(src)
|
|
28
|
-
return unless src
|
|
29
|
-
|
|
30
|
-
%r{^data:image/(?:[^;]+);base64,(?<imgdata>.+)$} =~ src
|
|
31
|
-
|
|
32
|
-
dest_dir = Pathname.new(Coradoc::Html::Input.config.destination).dirname
|
|
33
|
-
images_dir = dest_dir.join('images')
|
|
34
|
-
FileUtils.mkdir_p(images_dir)
|
|
35
|
-
|
|
36
|
-
ext, image_src_path, tempfile = determine_image_src_path(
|
|
37
|
-
src,
|
|
38
|
-
imgdata
|
|
39
|
-
)
|
|
40
|
-
image_dest_path = images_dir + "#{image_number}.#{ext}"
|
|
41
|
-
|
|
42
|
-
if File.exist?(image_src_path)
|
|
43
|
-
FileUtils.cp(image_src_path, image_dest_path)
|
|
44
|
-
else
|
|
45
|
-
Kernel.warn "Image #{image_src_path} does not exist"
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
image_number_increment
|
|
49
|
-
|
|
50
|
-
image_dest_path.relative_path_from(dest_dir)
|
|
51
|
-
ensure
|
|
52
|
-
tempfile&.close!
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def determine_image_src_path(src, imgdata)
|
|
56
|
-
return copy_temp_file(imgdata) if imgdata
|
|
57
|
-
|
|
58
|
-
ext = File.extname(src).strip.downcase[1..]
|
|
59
|
-
[ext, Pathname.new(Coradoc::Html::Input.config.sourcedir).join(src)]
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def copy_temp_file(imgdata)
|
|
63
|
-
f = Tempfile.open(['radoc', '.jpg'])
|
|
64
|
-
f.binmode
|
|
65
|
-
f.write(Base64.strict_decode64(imgdata))
|
|
66
|
-
f.rewind
|
|
67
|
-
ext = Marcel::MimeType.for(f).sub(%r{^[^/]+/}, '')
|
|
68
|
-
ext = 'svg' if ext == 'svg+xml'
|
|
69
|
-
[ext, f.path, f]
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
def to_coradoc(node, _state = {})
|
|
73
|
-
id = node['id']
|
|
74
|
-
alt = node['alt']
|
|
75
|
-
src = node['src']
|
|
76
|
-
width = node['width']
|
|
77
|
-
height = node['height']
|
|
78
|
-
|
|
79
|
-
# Convert width/height to integers if they are numeric strings
|
|
80
|
-
width = width.to_i if width&.match?(/\A\d+\z/)
|
|
81
|
-
height = height.to_i if height&.match?(/\A\d+\z/)
|
|
82
|
-
|
|
83
|
-
title = extract_title(node)
|
|
84
|
-
|
|
85
|
-
src = datauri2file(src) if Coradoc::Html::Input.config.external_images
|
|
86
|
-
|
|
87
|
-
# Use CoreModel::Image
|
|
88
|
-
return unless src
|
|
89
|
-
|
|
90
|
-
Coradoc::CoreModel::Image.new(
|
|
91
|
-
src: src,
|
|
92
|
-
alt: alt,
|
|
93
|
-
caption: title,
|
|
94
|
-
width: width&.to_s,
|
|
95
|
-
height: height&.to_s,
|
|
96
|
-
id: id
|
|
97
|
-
)
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
register :img, Img::INSTANCE
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Li < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
|
|
13
|
-
# Check if all children are <p> tags
|
|
14
|
-
p_children = node.children.select { |child| child.name == 'p' }
|
|
15
|
-
non_empty_children = node.children.reject { |c| c.text? && c.text.strip.empty? }
|
|
16
|
-
|
|
17
|
-
content = if p_children.any? && p_children.size == non_empty_children.size && p_children.size == 1
|
|
18
|
-
# Single <p> tag - extract its content directly as inline content
|
|
19
|
-
treat_children_coradoc(p_children.first, state)
|
|
20
|
-
else
|
|
21
|
-
treat_children_coradoc(node, state)
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
# Use CoreModel::ListItem with children for mixed content
|
|
25
|
-
# content can be an array of inline elements or a single string
|
|
26
|
-
Coradoc::CoreModel::ListItem.new(
|
|
27
|
-
children: content,
|
|
28
|
-
id: id
|
|
29
|
-
)
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
register :li, Li::INSTANCE
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Mark < Markup
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def coradoc_format_type
|
|
11
|
-
'highlight'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def markup_ancestor_tag_names
|
|
15
|
-
%w[mark]
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
register :mark, Mark::INSTANCE
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Markup < Base
|
|
8
|
-
def to_coradoc(node, state = {})
|
|
9
|
-
u_before = unconstrained_before?(node)
|
|
10
|
-
u_after = unconstrained_after?(node)
|
|
11
|
-
|
|
12
|
-
leading_ws, trailing_ws =
|
|
13
|
-
extract_leading_trailing_whitespace(node)
|
|
14
|
-
|
|
15
|
-
# Wrap whitespace in InlineElement so it can be processed
|
|
16
|
-
leading_whitespace = if leading_ws
|
|
17
|
-
Coradoc::CoreModel::TextElement.new(
|
|
18
|
-
content: leading_ws
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
trailing_whitespace = if trailing_ws
|
|
22
|
-
Coradoc::CoreModel::TextElement.new(
|
|
23
|
-
content: trailing_ws
|
|
24
|
-
)
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
content = treat_children_coradoc(node, state)
|
|
28
|
-
|
|
29
|
-
if node_has_ancestor?(node, markup_ancestor_tag_names)
|
|
30
|
-
content
|
|
31
|
-
elsif node.children.empty?
|
|
32
|
-
# Return InlineElement wrapper for whitespace
|
|
33
|
-
if leading_ws
|
|
34
|
-
Coradoc::CoreModel::TextElement.new(
|
|
35
|
-
content: leading_ws
|
|
36
|
-
)
|
|
37
|
-
end
|
|
38
|
-
else
|
|
39
|
-
u = (u_before && leading_whitespace.nil?) ||
|
|
40
|
-
(u_after && trailing_whitespace.nil?)
|
|
41
|
-
|
|
42
|
-
# Separate text strings from InlineElements in content array
|
|
43
|
-
text_content, nested = extract_text_and_elements(content)
|
|
44
|
-
|
|
45
|
-
# Create CoreModel::InlineElement with the appropriate format type
|
|
46
|
-
inline_element = Coradoc::CoreModel::InlineElement.format_type_class(coradoc_format_type).new(
|
|
47
|
-
content: text_content,
|
|
48
|
-
nested_elements: nested.empty? ? nil : nested,
|
|
49
|
-
metadata: { unconstrained: u }
|
|
50
|
-
)
|
|
51
|
-
result = [leading_whitespace, inline_element, trailing_whitespace].compact
|
|
52
|
-
result.length == 1 ? result.first : result
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
# Extract text content and InlineElements from mixed content array
|
|
57
|
-
def extract_text_and_elements(content)
|
|
58
|
-
return [content, []] unless content.is_a?(Array)
|
|
59
|
-
|
|
60
|
-
text_parts = []
|
|
61
|
-
elements = []
|
|
62
|
-
|
|
63
|
-
content.each do |item|
|
|
64
|
-
case item
|
|
65
|
-
when String
|
|
66
|
-
text_parts << item
|
|
67
|
-
when Coradoc::CoreModel::InlineElement
|
|
68
|
-
elements << item
|
|
69
|
-
when Coradoc::CoreModel::Base
|
|
70
|
-
# For other block types, convert to text
|
|
71
|
-
text_parts << extract_text_from_model(item)
|
|
72
|
-
else
|
|
73
|
-
text_parts << item.to_s
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
[text_parts.join, elements]
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# Extract text from a CoreModel object
|
|
81
|
-
def extract_text_from_model(model)
|
|
82
|
-
return '' if model.nil?
|
|
83
|
-
|
|
84
|
-
if model.is_a?(Coradoc::CoreModel::Base) && model.content.is_a?(String)
|
|
85
|
-
model.content
|
|
86
|
-
elsif model.is_a?(Coradoc::CoreModel::StructuralElement) && model.children.is_a?(Array)
|
|
87
|
-
model.children.map { |c| extract_text_from_model(c) }.join
|
|
88
|
-
elsif model.is_a?(Coradoc::CoreModel::Base) && model.title.is_a?(String)
|
|
89
|
-
model.title
|
|
90
|
-
else
|
|
91
|
-
model.to_s
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
# Subclasses should override this to return the format type
|
|
96
|
-
def coradoc_format_type
|
|
97
|
-
'text'
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
end
|
|
103
|
-
end
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Math < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, _state = {})
|
|
11
|
-
stem = node.to_s.tr("\n", ' ')
|
|
12
|
-
if Coradoc::Html::Input.config.mathml2asciimath
|
|
13
|
-
require 'plurimath'
|
|
14
|
-
stem = Plurimath::Math.parse(stem, :mathml).to_asciimath
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
unless stem.nil?
|
|
18
|
-
stem = stem.gsub('[', '\\[')
|
|
19
|
-
stem = stem.gsub(']', '\\]')
|
|
20
|
-
loop do
|
|
21
|
-
new_stem = stem.gsub(/\(\(([^)]{1,100})\)\)/, '(\\1)')
|
|
22
|
-
break if new_stem == stem
|
|
23
|
-
|
|
24
|
-
stem = new_stem
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
Coradoc::CoreModel::StemElement.new(
|
|
29
|
-
content: stem,
|
|
30
|
-
stem_type: 'mathml'
|
|
31
|
-
)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
register :math, Math::INSTANCE
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class MediaBase < Base
|
|
8
|
-
def to_coradoc(node, _state = {})
|
|
9
|
-
src = node['src']
|
|
10
|
-
id = node['id']
|
|
11
|
-
title = extract_title(node)
|
|
12
|
-
|
|
13
|
-
Coradoc::CoreModel::Block.new(
|
|
14
|
-
block_semantic_type: semantic_type,
|
|
15
|
-
content: src,
|
|
16
|
-
title: title,
|
|
17
|
-
id: id,
|
|
18
|
-
element_attributes: build_attributes(node)
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def extract_title(node)
|
|
23
|
-
track = node.at('./track') || node.at('.//source')
|
|
24
|
-
return '' if track.nil?
|
|
25
|
-
|
|
26
|
-
track['label'] || track['srclang'] || ''
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
private
|
|
30
|
-
|
|
31
|
-
def semantic_type
|
|
32
|
-
raise NotImplementedError
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def base_attributes(node)
|
|
36
|
-
{
|
|
37
|
-
autoplay: node['autoplay'],
|
|
38
|
-
loop: node['loop'],
|
|
39
|
-
controls: node['controls']
|
|
40
|
-
}.compact
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def build_attributes(node)
|
|
44
|
-
base_attributes(node)
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
end
|