coradoc-html 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/html/cleaner.rb +128 -0
- data/lib/coradoc/html/converters/a.rb +77 -0
- data/lib/coradoc/html/converters/aside.rb +20 -0
- data/lib/coradoc/html/converters/audio.rb +19 -0
- data/lib/coradoc/html/converters/base.rb +98 -0
- data/lib/coradoc/html/converters/blockquote.rb +25 -0
- data/lib/coradoc/html/converters/br.rb +17 -0
- data/lib/coradoc/html/converters/bypass.rb +82 -0
- data/lib/coradoc/html/converters/code.rb +25 -0
- data/lib/coradoc/html/converters/div.rb +23 -0
- data/lib/coradoc/html/converters/dl.rb +82 -0
- data/lib/coradoc/html/converters/drop.rb +26 -0
- data/lib/coradoc/html/converters/em.rb +23 -0
- data/lib/coradoc/html/converters/figure.rb +33 -0
- data/lib/coradoc/html/converters/h.rb +58 -0
- data/lib/coradoc/html/converters/head.rb +29 -0
- data/lib/coradoc/html/converters/hr.rb +17 -0
- data/lib/coradoc/html/converters/img.rb +103 -0
- data/lib/coradoc/html/converters/li.rb +35 -0
- data/lib/coradoc/html/converters/mark.rb +21 -0
- data/lib/coradoc/html/converters/markup.rb +93 -0
- data/lib/coradoc/html/converters/math.rb +37 -0
- data/lib/coradoc/html/converters/media_base.rb +48 -0
- data/lib/coradoc/html/converters/ol.rb +42 -0
- data/lib/coradoc/html/converters/p.rb +64 -0
- data/lib/coradoc/html/converters/pass_through.rb +15 -0
- data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
- data/lib/coradoc/html/converters/pre.rb +57 -0
- data/lib/coradoc/html/converters/q.rb +25 -0
- data/lib/coradoc/html/converters/strong.rb +22 -0
- data/lib/coradoc/html/converters/sub.rb +20 -0
- data/lib/coradoc/html/converters/sup.rb +20 -0
- data/lib/coradoc/html/converters/table.rb +64 -0
- data/lib/coradoc/html/converters/td.rb +42 -0
- data/lib/coradoc/html/converters/text.rb +66 -0
- data/lib/coradoc/html/converters/tr.rb +27 -0
- data/lib/coradoc/html/converters/video.rb +27 -0
- data/lib/coradoc/html/converters.rb +104 -0
- data/lib/coradoc/html/drop/drop_factory.rb +14 -22
- data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
- data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
- data/lib/coradoc/html/drop.rb +30 -8
- data/lib/coradoc/html/errors.rb +11 -0
- data/lib/coradoc/html/html_converter.rb +78 -0
- data/lib/coradoc/html/input_config.rb +66 -0
- data/lib/coradoc/html/plugin.rb +90 -0
- data/lib/coradoc/html/plugins/plateau.rb +212 -0
- data/lib/coradoc/html/postprocessor.rb +19 -0
- data/lib/coradoc/html/spa.rb +0 -2
- data/lib/coradoc/html/static.rb +0 -2
- data/lib/coradoc/html/tag_mapping.rb +3 -1
- data/lib/coradoc/html/transform/from_core_model.rb +2 -2
- data/lib/coradoc/html/transform/to_core_model.rb +3 -3
- data/lib/coradoc/html/version.rb +1 -1
- data/lib/coradoc/html.rb +30 -5
- metadata +46 -47
- data/lib/coradoc/html/input/cleaner.rb +0 -134
- data/lib/coradoc/html/input/config.rb +0 -80
- data/lib/coradoc/html/input/converters/a.rb +0 -79
- data/lib/coradoc/html/input/converters/aside.rb +0 -22
- data/lib/coradoc/html/input/converters/audio.rb +0 -21
- data/lib/coradoc/html/input/converters/base.rb +0 -118
- data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
- data/lib/coradoc/html/input/converters/br.rb +0 -19
- data/lib/coradoc/html/input/converters/bypass.rb +0 -84
- data/lib/coradoc/html/input/converters/code.rb +0 -27
- data/lib/coradoc/html/input/converters/div.rb +0 -25
- data/lib/coradoc/html/input/converters/dl.rb +0 -84
- data/lib/coradoc/html/input/converters/drop.rb +0 -28
- data/lib/coradoc/html/input/converters/em.rb +0 -25
- data/lib/coradoc/html/input/converters/figure.rb +0 -35
- data/lib/coradoc/html/input/converters/h.rb +0 -74
- data/lib/coradoc/html/input/converters/head.rb +0 -31
- data/lib/coradoc/html/input/converters/hr.rb +0 -19
- data/lib/coradoc/html/input/converters/img.rb +0 -105
- data/lib/coradoc/html/input/converters/li.rb +0 -37
- data/lib/coradoc/html/input/converters/mark.rb +0 -23
- data/lib/coradoc/html/input/converters/markup.rb +0 -103
- data/lib/coradoc/html/input/converters/math.rb +0 -39
- data/lib/coradoc/html/input/converters/media_base.rb +0 -50
- data/lib/coradoc/html/input/converters/ol.rb +0 -44
- data/lib/coradoc/html/input/converters/p.rb +0 -90
- data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
- data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
- data/lib/coradoc/html/input/converters/pre.rb +0 -59
- data/lib/coradoc/html/input/converters/q.rb +0 -27
- data/lib/coradoc/html/input/converters/strong.rb +0 -24
- data/lib/coradoc/html/input/converters/sub.rb +0 -22
- data/lib/coradoc/html/input/converters/sup.rb +0 -22
- data/lib/coradoc/html/input/converters/table.rb +0 -66
- data/lib/coradoc/html/input/converters/td.rb +0 -44
- data/lib/coradoc/html/input/converters/text.rb +0 -68
- data/lib/coradoc/html/input/converters/tr.rb +0 -29
- data/lib/coradoc/html/input/converters/video.rb +0 -29
- data/lib/coradoc/html/input/converters.rb +0 -107
- data/lib/coradoc/html/input/errors.rb +0 -22
- data/lib/coradoc/html/input/html_converter.rb +0 -98
- data/lib/coradoc/html/input/plugin.rb +0 -120
- data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
- data/lib/coradoc/html/input/postprocessor.rb +0 -25
- data/lib/coradoc/html/input.rb +0 -86
- data/lib/coradoc/html/output.rb +0 -89
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
class Cleaner
|
|
7
|
-
# Pre-compiled regexes for performance
|
|
8
|
-
INNER_WHITESPACE_REGEX_1 = /\n stem:\[/
|
|
9
|
-
INNER_WHITESPACE_REGEX_2 = /(stem:\[([^\]]|\\\])*\])\n(?=\S)/
|
|
10
|
-
NEWLINES_REGEX = /\n{3,}/
|
|
11
|
-
LEADING_NEWLINE_REGEX = /\A\n+/
|
|
12
|
-
WHITESPACE_REGEX = /[ \t\r\n]+/
|
|
13
|
-
TRAILING_WHITESPACE_REGEX = /[ \t\r\n]+\z/
|
|
14
|
-
|
|
15
|
-
def tidy(string)
|
|
16
|
-
return string.transform_values { |i| tidy(i) } if string.is_a? Hash
|
|
17
|
-
|
|
18
|
-
result = HtmlConverter.track_time 'Removing inner whitespace' do
|
|
19
|
-
remove_inner_whitespaces(String.new(string))
|
|
20
|
-
end
|
|
21
|
-
result = HtmlConverter.track_time 'Removing newlines' do
|
|
22
|
-
remove_newlines(result)
|
|
23
|
-
end
|
|
24
|
-
result = HtmlConverter.track_time 'Removing leading newlines' do
|
|
25
|
-
remove_leading_newlines(result)
|
|
26
|
-
end
|
|
27
|
-
result = HtmlConverter.track_time 'Cleaning tag borders' do
|
|
28
|
-
clean_tag_borders(result)
|
|
29
|
-
end
|
|
30
|
-
result = HtmlConverter.track_time 'Cleaning punctuation characters' do
|
|
31
|
-
clean_punctuation_characters(result)
|
|
32
|
-
end
|
|
33
|
-
result = remove_block_leading_newlines(result)
|
|
34
|
-
result = remove_section_attribute_newlines(result)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def remove_block_leading_newlines(string)
|
|
38
|
-
string.gsub("]\n****\n\n", "]\n****\n")
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def remove_section_attribute_newlines(string)
|
|
42
|
-
string.gsub("]\n\n==", "]\n==")
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def remove_newlines(string)
|
|
46
|
-
string.gsub(NEWLINES_REGEX, "\n\n")
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def remove_leading_newlines(string)
|
|
50
|
-
string.gsub(LEADING_NEWLINE_REGEX, '')
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def remove_inner_whitespaces(string)
|
|
54
|
-
unless string.nil?
|
|
55
|
-
string.gsub!("\n stem:[", "\nstem:[")
|
|
56
|
-
string.gsub!(INNER_WHITESPACE_REGEX_1, '\\1 ')
|
|
57
|
-
string.gsub!(INNER_WHITESPACE_REGEX_2, '\\1')
|
|
58
|
-
end
|
|
59
|
-
result = +''
|
|
60
|
-
string.each_line do |line|
|
|
61
|
-
result << preserve_border_whitespaces(line) do
|
|
62
|
-
# Use ASCII-only strip to preserve CJK fullwidth spaces
|
|
63
|
-
line.gsub(/\A[ \t\r\n]+/, '').gsub(/[ \t\r\n]+\z/, '').gsub(/[ \t]{2,}/, ' ')
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
result
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
def clean_tag_borders(string)
|
|
70
|
-
result = string.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
|
|
71
|
-
preserve_border_whitespaces(
|
|
72
|
-
match,
|
|
73
|
-
default_border: Coradoc::Html::Input.config.tag_border
|
|
74
|
-
) do
|
|
75
|
-
match.strip.sub('~~ ', '~~').sub(' ~~', '~~')
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
result.gsub(/\s?\[.*?\]\s?/) do |match|
|
|
80
|
-
preserve_border_whitespaces(match) do
|
|
81
|
-
match.strip.sub('[ ', '[').sub(' ]', ']')
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def clean_punctuation_characters(string)
|
|
87
|
-
string.gsub(/(\*\*|~~|__)\s([.!?'"])/, '\\1\\2')
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
def preprocess_word_html(string)
|
|
91
|
-
clean_headings(scrub_whitespace(string.dup))
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
def scrub_whitespace(string)
|
|
95
|
-
string.gsub!(/ | | /i, ' ')
|
|
96
|
-
string = Coradoc.strip_unicode(string)
|
|
97
|
-
string.gsub!(/( +)$/, ' ')
|
|
98
|
-
string.gsub!("\n\n\n\n", "\n\n")
|
|
99
|
-
string
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def clean_headings(string)
|
|
103
|
-
string.gsub!(%r{<h([1-9])[^>]*></h\1>}, ' ')
|
|
104
|
-
string.gsub!(
|
|
105
|
-
%r{<h([1-9])[^>]* style="vertical-align: super;[^>]*>(.+?)</h\1>},
|
|
106
|
-
'<sup>\\2</sup>'
|
|
107
|
-
)
|
|
108
|
-
string
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
private
|
|
112
|
-
|
|
113
|
-
def preserve_border_whitespaces(string, options = {})
|
|
114
|
-
return string if /\A\s*\Z/.match?(string)
|
|
115
|
-
|
|
116
|
-
default_border = options.fetch(:default_border, '')
|
|
117
|
-
# If the string contains part of a link so the characters [,],(,)
|
|
118
|
-
# then don't add any extra spaces
|
|
119
|
-
default_border = '' if /[\[(\])]/.match?(string)
|
|
120
|
-
string_start = present_or_default(string[/\A\s*/], default_border)
|
|
121
|
-
string_end = present_or_default(string[/\s*\Z/], default_border)
|
|
122
|
-
result = yield
|
|
123
|
-
string_start + result + string_end
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
def present_or_default(string, default)
|
|
127
|
-
return default if string.nil? || string.empty?
|
|
128
|
-
|
|
129
|
-
string
|
|
130
|
-
end
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
end
|
|
134
|
-
end
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'tmpdir'
|
|
4
|
-
|
|
5
|
-
module Coradoc
|
|
6
|
-
module Input
|
|
7
|
-
module Html
|
|
8
|
-
class Config
|
|
9
|
-
def initialize
|
|
10
|
-
@unknown_tags = :pass_through
|
|
11
|
-
@input_format = :html
|
|
12
|
-
@mathml2asciimath = false
|
|
13
|
-
@external_images = false
|
|
14
|
-
|
|
15
|
-
# Destination to save file and images
|
|
16
|
-
@destination = nil
|
|
17
|
-
|
|
18
|
-
# Source of HTML
|
|
19
|
-
# @sourcedir = nil
|
|
20
|
-
|
|
21
|
-
# Image counter, assuming there are max 999 images
|
|
22
|
-
@image_counter = 1
|
|
23
|
-
# pad with 0s
|
|
24
|
-
@image_counter_pattern = '%03d'
|
|
25
|
-
|
|
26
|
-
@em_delimiter = '_'
|
|
27
|
-
@strong_delimiter = '*'
|
|
28
|
-
@inline_options = {}
|
|
29
|
-
@tag_border = ' '
|
|
30
|
-
|
|
31
|
-
@split_sections = nil
|
|
32
|
-
|
|
33
|
-
# Document width - used to compute table sizes.
|
|
34
|
-
# This is an assumption for screen size in input document.
|
|
35
|
-
# If column widths are specified in absolute values, then we
|
|
36
|
-
# have to convert them to relative values for better portability
|
|
37
|
-
# across output formats.
|
|
38
|
-
@doc_width = 1000
|
|
39
|
-
|
|
40
|
-
# Plugin system
|
|
41
|
-
@plugins = []
|
|
42
|
-
|
|
43
|
-
# Debugging options
|
|
44
|
-
@track_time = false
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
def with(options = {})
|
|
48
|
-
old_options = @inline_options
|
|
49
|
-
@inline_options = options
|
|
50
|
-
result = yield
|
|
51
|
-
@inline_options = old_options
|
|
52
|
-
result
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def self.declare_option(option)
|
|
56
|
-
attr_accessor option
|
|
57
|
-
|
|
58
|
-
original_reader = instance_method(option)
|
|
59
|
-
define_method(option) do
|
|
60
|
-
@inline_options[option] || original_reader.bind_call(self)
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
declare_option :unknown_tags
|
|
65
|
-
declare_option :tag_border
|
|
66
|
-
declare_option :mathml2asciimath
|
|
67
|
-
declare_option :external_images
|
|
68
|
-
declare_option :destination
|
|
69
|
-
declare_option :sourcedir
|
|
70
|
-
declare_option :image_counter
|
|
71
|
-
declare_option :image_counter_pattern
|
|
72
|
-
declare_option :input_format
|
|
73
|
-
declare_option :split_sections
|
|
74
|
-
declare_option :doc_width
|
|
75
|
-
declare_option :plugins
|
|
76
|
-
declare_option :track_time
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
end
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'coradoc'
|
|
4
|
-
|
|
5
|
-
module Coradoc
|
|
6
|
-
module Input
|
|
7
|
-
module Html
|
|
8
|
-
module Converters
|
|
9
|
-
class A < Base
|
|
10
|
-
INSTANCE = new
|
|
11
|
-
|
|
12
|
-
def to_coradoc(node, state = {})
|
|
13
|
-
# Use treat_children_coradoc to get CoreModel elements
|
|
14
|
-
content = treat_children_coradoc(node, state)
|
|
15
|
-
|
|
16
|
-
href = node['href']
|
|
17
|
-
title = extract_title(node)
|
|
18
|
-
id = node['id'] || node['name']
|
|
19
|
-
|
|
20
|
-
id = id&.gsub(/\s/, '')&.gsub(/__+/, '_')
|
|
21
|
-
id = nil if id&.empty?
|
|
22
|
-
|
|
23
|
-
return nil if /^_Toc\d+$|^_GoBack$/.match?(id)
|
|
24
|
-
|
|
25
|
-
# For inline anchors - return CoreModel InlineElement with format_type "anchor"
|
|
26
|
-
if id
|
|
27
|
-
return Coradoc::CoreModel::InlineElement.new(
|
|
28
|
-
format_type: 'anchor',
|
|
29
|
-
target: id
|
|
30
|
-
)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# For cross-references
|
|
34
|
-
if href.to_s.start_with?('#')
|
|
35
|
-
ref_id = href.sub(/^#/, '').gsub(/\s/, '').gsub(/__+/, '_')
|
|
36
|
-
content_str = extract_text_from_content(content)
|
|
37
|
-
return Coradoc::CoreModel::CrossReferenceElement.new(
|
|
38
|
-
target: ref_id,
|
|
39
|
-
content: content_str.strip.empty? ? nil : content_str.strip
|
|
40
|
-
)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
return nil if href.to_s.empty?
|
|
44
|
-
|
|
45
|
-
# For links
|
|
46
|
-
ambigous_characters = /[\w.?&#=%;\[\u{ff}-\u{10ffff}]/
|
|
47
|
-
right_constrain = textnode_after_start_with?(node, ambigous_characters)
|
|
48
|
-
|
|
49
|
-
content_str = extract_text_from_content(content)
|
|
50
|
-
|
|
51
|
-
out = []
|
|
52
|
-
# Add leading space if needed
|
|
53
|
-
if textnode_before_end_with?(node, ambigous_characters)
|
|
54
|
-
out << Coradoc::CoreModel::TextElement.new(
|
|
55
|
-
content: ' '
|
|
56
|
-
)
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Create link element
|
|
60
|
-
link = Coradoc::CoreModel::LinkElement.new(
|
|
61
|
-
target: href,
|
|
62
|
-
content: content_str.strip,
|
|
63
|
-
metadata: {
|
|
64
|
-
title: (title.strip unless title.to_s.strip.empty?),
|
|
65
|
-
right_constrain: right_constrain
|
|
66
|
-
}.compact
|
|
67
|
-
)
|
|
68
|
-
out << link
|
|
69
|
-
|
|
70
|
-
# Return single element or array
|
|
71
|
-
out.length == 1 ? out.first : out
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
register :a, A::INSTANCE
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
end
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Aside < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
content = treat_children_coradoc(node, state)
|
|
12
|
-
Coradoc::CoreModel::SidebarBlock.new(
|
|
13
|
-
children: content
|
|
14
|
-
)
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
register :aside, Aside::INSTANCE
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Audio < MediaBase
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
private
|
|
11
|
-
|
|
12
|
-
def semantic_type
|
|
13
|
-
:audio
|
|
14
|
-
end
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
register :audio, Audio::INSTANCE
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Base
|
|
8
|
-
def treat_children_coradoc(node, state)
|
|
9
|
-
results = node.children.map do |child|
|
|
10
|
-
treat_coradoc(child, state)
|
|
11
|
-
end.flatten
|
|
12
|
-
|
|
13
|
-
results.reject do |x|
|
|
14
|
-
x.nil? || (x.is_a?(String) && x.strip.empty?)
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def treat_coradoc(node, state)
|
|
19
|
-
Converters.process_coradoc(node, state)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def extract_title(node)
|
|
23
|
-
node['title'].to_s
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def node_has_ancestor?(node, name)
|
|
27
|
-
case name
|
|
28
|
-
when String
|
|
29
|
-
node.ancestors(name).any?
|
|
30
|
-
when Array
|
|
31
|
-
name.any? { |n| node.ancestors(n).any? }
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def textnode_before_end_with?(node, str)
|
|
36
|
-
return false unless [String, Regexp].include?(str.class)
|
|
37
|
-
return false if str.is_a?(String) && str.empty?
|
|
38
|
-
|
|
39
|
-
str = /#{Regexp.escape(str)}/ if str.is_a?(String)
|
|
40
|
-
str = /(?:#{str})\z/
|
|
41
|
-
|
|
42
|
-
node2 = node.at_xpath('preceding-sibling::node()[1]')
|
|
43
|
-
node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def textnode_after_start_with?(node, str)
|
|
47
|
-
return false unless [String, Regexp].include?(str.class)
|
|
48
|
-
return false if str.is_a?(String) && str.empty?
|
|
49
|
-
|
|
50
|
-
str = /#{Regexp.escape(str)}/ if str.is_a?(String)
|
|
51
|
-
str = /\A(?:#{str})/
|
|
52
|
-
|
|
53
|
-
node2 = node.at_xpath('following-sibling::node()[1]')
|
|
54
|
-
node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
def extract_leading_trailing_whitespace(node)
|
|
58
|
-
node.text =~ /^(\s+)/
|
|
59
|
-
leading_whitespace = ::Regexp.last_match(1)
|
|
60
|
-
unless leading_whitespace.nil?
|
|
61
|
-
first_text = node.at_xpath('./text()[1]')
|
|
62
|
-
first_text&.replace(first_text.text.lstrip)
|
|
63
|
-
leading_whitespace = ' '
|
|
64
|
-
end
|
|
65
|
-
node.text =~ /(\s+)$/
|
|
66
|
-
trailing_whitespace = ::Regexp.last_match(1)
|
|
67
|
-
unless trailing_whitespace.nil?
|
|
68
|
-
last_text = node.at_xpath('./text()[last()]')
|
|
69
|
-
last_text&.replace(last_text.text.rstrip)
|
|
70
|
-
trailing_whitespace = ' '
|
|
71
|
-
end
|
|
72
|
-
[leading_whitespace, trailing_whitespace]
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def unconstrained_before?(node)
|
|
76
|
-
before = node.at_xpath('preceding::node()[1]')
|
|
77
|
-
|
|
78
|
-
before &&
|
|
79
|
-
!before.text.strip.empty? &&
|
|
80
|
-
before.text[-1]&.match?(/\w/)
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
def unconstrained_after?(node)
|
|
84
|
-
after = node.at_xpath('following::node()[1]')
|
|
85
|
-
|
|
86
|
-
after && !after.text.strip.empty? &&
|
|
87
|
-
after.text[0]&.match?(/\w|,|;|"|\.\?!/)
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
# Extract plain text from a mixed content array.
|
|
91
|
-
# Handles String, InlineElement (via .content), and other
|
|
92
|
-
# CoreModel::Base (via .content or .title).
|
|
93
|
-
def extract_text_from_content(content)
|
|
94
|
-
return content if content.is_a?(String)
|
|
95
|
-
return '' if content.nil?
|
|
96
|
-
|
|
97
|
-
content.map do |item|
|
|
98
|
-
case item
|
|
99
|
-
when String
|
|
100
|
-
item
|
|
101
|
-
when Coradoc::CoreModel::InlineElement
|
|
102
|
-
item.content.to_s
|
|
103
|
-
when Coradoc::CoreModel::Base
|
|
104
|
-
if item.content
|
|
105
|
-
item.content.to_s
|
|
106
|
-
else
|
|
107
|
-
''
|
|
108
|
-
end
|
|
109
|
-
else
|
|
110
|
-
item.to_s
|
|
111
|
-
end
|
|
112
|
-
end.join
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
end
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Blockquote < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
cite = node['cite']
|
|
13
|
-
content = treat_children_coradoc(node, state)
|
|
14
|
-
|
|
15
|
-
Coradoc::CoreModel::QuoteBlock.new(
|
|
16
|
-
children: content,
|
|
17
|
-
id: id,
|
|
18
|
-
attribution: cite
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
register :blockquote, Blockquote::INSTANCE
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Br < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(_node, _state = {})
|
|
11
|
-
Coradoc::CoreModel::LineBreakElement.new
|
|
12
|
-
end
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
register :br, Br::INSTANCE
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
end
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Bypass < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
treat_children_coradoc(node, state)
|
|
12
|
-
end
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
register :document, Bypass::INSTANCE
|
|
16
|
-
register :html, Bypass::INSTANCE
|
|
17
|
-
register :body, Bypass::INSTANCE
|
|
18
|
-
register :span, Bypass::INSTANCE
|
|
19
|
-
register :thead, Bypass::INSTANCE
|
|
20
|
-
register :tbody, Bypass::INSTANCE
|
|
21
|
-
register :tfoot, Bypass::INSTANCE
|
|
22
|
-
register :abbr, Bypass::INSTANCE
|
|
23
|
-
register :acronym, Bypass::INSTANCE
|
|
24
|
-
register :address, Bypass::INSTANCE
|
|
25
|
-
register :applet, Bypass::INSTANCE
|
|
26
|
-
register :map, Bypass::INSTANCE
|
|
27
|
-
register :area, Bypass::INSTANCE
|
|
28
|
-
register :bdi, Bypass::INSTANCE
|
|
29
|
-
register :bdo, Bypass::INSTANCE
|
|
30
|
-
register :big, Bypass::INSTANCE
|
|
31
|
-
register :button, Bypass::INSTANCE
|
|
32
|
-
register :canvas, Bypass::INSTANCE
|
|
33
|
-
register :data, Bypass::INSTANCE
|
|
34
|
-
register :datalist, Bypass::INSTANCE
|
|
35
|
-
register :del, Bypass::INSTANCE
|
|
36
|
-
register :ins, Bypass::INSTANCE
|
|
37
|
-
register :dfn, Bypass::INSTANCE
|
|
38
|
-
register :dialog, Bypass::INSTANCE
|
|
39
|
-
register :embed, Bypass::INSTANCE
|
|
40
|
-
register :fieldset, Bypass::INSTANCE
|
|
41
|
-
register :font, Bypass::INSTANCE
|
|
42
|
-
register :footer, Bypass::INSTANCE
|
|
43
|
-
register :form, Bypass::INSTANCE
|
|
44
|
-
register :frame, Bypass::INSTANCE
|
|
45
|
-
register :frameset, Bypass::INSTANCE
|
|
46
|
-
register :header, Bypass::INSTANCE
|
|
47
|
-
register :iframe, Bypass::INSTANCE
|
|
48
|
-
register :input, Bypass::INSTANCE
|
|
49
|
-
register :label, Bypass::INSTANCE
|
|
50
|
-
register :legend, Bypass::INSTANCE
|
|
51
|
-
register :main, Bypass::INSTANCE
|
|
52
|
-
register :menu, Bypass::INSTANCE
|
|
53
|
-
register :menulist, Bypass::INSTANCE
|
|
54
|
-
register :meter, Bypass::INSTANCE
|
|
55
|
-
register :nav, Bypass::INSTANCE
|
|
56
|
-
register :noframes, Bypass::INSTANCE
|
|
57
|
-
register :noscript, Bypass::INSTANCE
|
|
58
|
-
register :object, Bypass::INSTANCE
|
|
59
|
-
register :optgroup, Bypass::INSTANCE
|
|
60
|
-
register :option, Bypass::INSTANCE
|
|
61
|
-
register :output, Bypass::INSTANCE
|
|
62
|
-
register :param, Bypass::INSTANCE
|
|
63
|
-
register :picture, Bypass::INSTANCE
|
|
64
|
-
register :progress, Bypass::INSTANCE
|
|
65
|
-
register :ruby, Bypass::INSTANCE
|
|
66
|
-
register :rt, Bypass::INSTANCE
|
|
67
|
-
register :rp, Bypass::INSTANCE
|
|
68
|
-
register :s, Bypass::INSTANCE
|
|
69
|
-
register :select, Bypass::INSTANCE
|
|
70
|
-
register :small, Bypass::INSTANCE
|
|
71
|
-
register :strike, Bypass::INSTANCE
|
|
72
|
-
register :details, Bypass::INSTANCE
|
|
73
|
-
register :section, Bypass::INSTANCE
|
|
74
|
-
register :summary, Bypass::INSTANCE
|
|
75
|
-
register :svg, Bypass::INSTANCE
|
|
76
|
-
register :template, Bypass::INSTANCE
|
|
77
|
-
register :textarea, Bypass::INSTANCE
|
|
78
|
-
register :track, Bypass::INSTANCE
|
|
79
|
-
register :u, Bypass::INSTANCE
|
|
80
|
-
register :wbr, Bypass::INSTANCE
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Code < Markup
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def coradoc_format_type
|
|
11
|
-
'monospace'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def markup_ancestor_tag_names
|
|
15
|
-
%w[code tt kbd samp var]
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
register :code, Code::INSTANCE
|
|
20
|
-
register :tt, Code::INSTANCE
|
|
21
|
-
register :kbd, Code::INSTANCE
|
|
22
|
-
register :samp, Code::INSTANCE
|
|
23
|
-
register :var, Code::INSTANCE
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Coradoc
|
|
4
|
-
module Input
|
|
5
|
-
module Html
|
|
6
|
-
module Converters
|
|
7
|
-
class Div < Base
|
|
8
|
-
INSTANCE = new
|
|
9
|
-
|
|
10
|
-
def to_coradoc(node, state = {})
|
|
11
|
-
id = node['id']
|
|
12
|
-
contents = treat_children_coradoc(node, state)
|
|
13
|
-
|
|
14
|
-
Coradoc::CoreModel::OpenBlock.new(
|
|
15
|
-
children: contents,
|
|
16
|
-
id: id
|
|
17
|
-
)
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
register :div, Div::INSTANCE
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
end
|