coradoc 0.3.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/exe/reverse_adoc +24 -3
- data/lib/coradoc/document.rb +1 -0
- data/lib/coradoc/element/admonition.rb +2 -2
- data/lib/coradoc/element/attribute.rb +2 -2
- data/lib/coradoc/element/attribute_list.rb +94 -15
- data/lib/coradoc/element/audio.rb +13 -2
- data/lib/coradoc/element/author.rb +4 -2
- data/lib/coradoc/element/base.rb +70 -7
- data/lib/coradoc/element/block/core.rb +8 -4
- data/lib/coradoc/element/block/quote.rb +1 -1
- data/lib/coradoc/element/block/side.rb +1 -1
- data/lib/coradoc/element/break.rb +1 -1
- data/lib/coradoc/element/document_attributes.rb +6 -6
- data/lib/coradoc/element/header.rb +4 -2
- data/lib/coradoc/element/image/block_image.rb +13 -2
- data/lib/coradoc/element/image/core.rb +37 -6
- data/lib/coradoc/element/image/inline_image.rb +2 -2
- data/lib/coradoc/element/inline/anchor.rb +4 -2
- data/lib/coradoc/element/inline/bold.rb +9 -4
- data/lib/coradoc/element/inline/cross_reference.rb +4 -2
- data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
- data/lib/coradoc/element/inline/highlight.rb +11 -6
- data/lib/coradoc/element/inline/italic.rb +9 -4
- data/lib/coradoc/element/inline/link.rb +22 -6
- data/lib/coradoc/element/inline/monospace.rb +9 -4
- data/lib/coradoc/element/inline/quotation.rb +3 -1
- data/lib/coradoc/element/inline/subscript.rb +4 -2
- data/lib/coradoc/element/inline/superscript.rb +4 -2
- data/lib/coradoc/element/list/core.rb +15 -7
- data/lib/coradoc/element/list/definition.rb +22 -1
- data/lib/coradoc/element/list/ordered.rb +1 -1
- data/lib/coradoc/element/list/unordered.rb +1 -1
- data/lib/coradoc/element/list.rb +1 -0
- data/lib/coradoc/element/list_item.rb +16 -3
- data/lib/coradoc/element/list_item_definition.rb +32 -0
- data/lib/coradoc/element/paragraph.rb +6 -4
- data/lib/coradoc/element/revision.rb +4 -2
- data/lib/coradoc/element/section.rb +27 -4
- data/lib/coradoc/element/table.rb +32 -10
- data/lib/coradoc/element/text_element.rb +48 -8
- data/lib/coradoc/element/title.rb +27 -7
- data/lib/coradoc/element/video.rb +32 -5
- data/lib/coradoc/reverse_adoc/README.adoc +14 -8
- data/lib/coradoc/reverse_adoc/cleaner.rb +21 -10
- data/lib/coradoc/reverse_adoc/config.rb +35 -16
- data/lib/coradoc/reverse_adoc/converters/a.rb +17 -12
- data/lib/coradoc/reverse_adoc/converters/aside.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/audio.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/base.rb +48 -44
- data/lib/coradoc/reverse_adoc/converters/blockquote.rb +2 -11
- data/lib/coradoc/reverse_adoc/converters/br.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/bypass.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/code.rb +5 -42
- data/lib/coradoc/reverse_adoc/converters/div.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
- data/lib/coradoc/reverse_adoc/converters/em.rb +5 -43
- data/lib/coradoc/reverse_adoc/converters/figure.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/h.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/head.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/hr.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/img.rb +30 -18
- data/lib/coradoc/reverse_adoc/converters/li.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/mark.rb +5 -11
- data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
- data/lib/coradoc/reverse_adoc/converters/ol.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/p.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/pre.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/q.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/strong.rb +5 -41
- data/lib/coradoc/reverse_adoc/converters/sub.rb +6 -4
- data/lib/coradoc/reverse_adoc/converters/sup.rb +7 -5
- data/lib/coradoc/reverse_adoc/converters/table.rb +240 -4
- data/lib/coradoc/reverse_adoc/converters/td.rb +1 -7
- data/lib/coradoc/reverse_adoc/converters/text.rb +1 -38
- data/lib/coradoc/reverse_adoc/converters/tr.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters/video.rb +0 -4
- data/lib/coradoc/reverse_adoc/converters.rb +24 -1
- data/lib/coradoc/reverse_adoc/html_converter.rb +109 -20
- data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
- data/lib/coradoc/reverse_adoc/plugins/plateau.rb +206 -0
- data/lib/coradoc/reverse_adoc/postprocessor.rb +152 -0
- data/lib/coradoc/reverse_adoc.rb +3 -0
- data/lib/coradoc/util.rb +10 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc.rb +1 -0
- data/lib/reverse_adoc.rb +1 -1
- metadata +9 -3
- data/lib/coradoc/element/inline/image.rb +0 -26
@@ -1,23 +1,50 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
|
-
class Video
|
4
|
-
|
3
|
+
class Video < Base
|
4
|
+
attr_accessor :id, :title, :src, :options
|
5
|
+
|
6
|
+
declare_children :id, :anchor, :attributes
|
5
7
|
|
6
8
|
def initialize(title, options = {})
|
7
9
|
@title = title
|
8
10
|
@id = options.fetch(:id, nil)
|
9
11
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
10
12
|
@src = options.fetch(:src, "")
|
11
|
-
@attributes = options.fetch(:attributes,
|
12
|
-
|
13
|
+
@attributes = options.fetch(:attributes, AttributeList.new)
|
14
|
+
if @attributes.any?
|
15
|
+
@attributes.validate_positional(VALIDATORS_POSITIONAL)
|
16
|
+
@attributes.validate_named(VALIDATORS_NAMED)
|
17
|
+
end
|
13
18
|
end
|
14
19
|
|
15
20
|
def to_adoc
|
16
21
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
17
22
|
title = ".#{@title}\n" unless @title.empty?
|
18
|
-
attrs = @attributes.
|
23
|
+
attrs = @attributes.to_adoc
|
19
24
|
[anchor, title, "video::", @src, attrs].join("")
|
20
25
|
end
|
26
|
+
|
27
|
+
extend AttributeList::Matchers
|
28
|
+
VALIDATORS_POSITIONAL = [
|
29
|
+
[:alt, String],
|
30
|
+
[:width, Integer],
|
31
|
+
[:height, Integer],
|
32
|
+
]
|
33
|
+
|
34
|
+
VALIDATORS_NAMED = {
|
35
|
+
title: String,
|
36
|
+
poster: String,
|
37
|
+
width: Integer,
|
38
|
+
height: Integer,
|
39
|
+
start: Integer,
|
40
|
+
end: Integer,
|
41
|
+
theme: one("dark", "light"),
|
42
|
+
lang: /[a-z]{2,3}(?:-[A-Z]{2})?/,
|
43
|
+
list: String,
|
44
|
+
playlist: String,
|
45
|
+
options: many("autoplay", "loop", "modest",
|
46
|
+
"nocontrols", "nofullscreen", "muted"),
|
47
|
+
}
|
21
48
|
end
|
22
49
|
end
|
23
50
|
end
|
@@ -1,17 +1,13 @@
|
|
1
1
|
= AsciiDoc from HTML and Microsoft Word: reverse_adoc
|
2
2
|
|
3
|
-
https://github.com/metanorma/reverse_adoc[reverse_adoc] image:https://img.shields.io/gem/v/reverse_adoc.svg["Gem Version", link="https://rubygems.org/gems/reverse_adoc"]::
|
4
|
-
image:https://github.com/metanorma/reverse_adoc/workflows/rake/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=rake"]
|
5
|
-
image:https://codeclimate.com/github/metanorma/reverse_adoc/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/reverse_adoc"]
|
6
|
-
image:https://img.shields.io/github/issues-pr-raw/metanorma/reverse_adoc.svg["Pull Requests", link="https://github.com/metanorma/reverse_adoc/pulls"]
|
7
|
-
image:https://img.shields.io/github/commits-since/metanorma/reverse_adoc/latest.svg["Commits since latest",link="https://github.com/metanorma/reverse_adoc/releases"]
|
8
|
-
|
9
3
|
== Purpose
|
10
4
|
|
11
5
|
Transforms HTML and Microsoft Word into AsciiDoc.
|
12
6
|
|
13
7
|
Based on https://github.com/xijo/reverse_markdown
|
14
8
|
|
9
|
+
reverse_adoc used to be a separate Gem, but now it's part of Coradoc.
|
10
|
+
|
15
11
|
|
16
12
|
== Installation
|
17
13
|
|
@@ -19,14 +15,14 @@ Install the gem:
|
|
19
15
|
|
20
16
|
[source,console]
|
21
17
|
----
|
22
|
-
[sudo] gem install
|
18
|
+
[sudo] gem install coradoc
|
23
19
|
----
|
24
20
|
|
25
21
|
or add it to your `Gemfile`:
|
26
22
|
|
27
23
|
[source,ruby]
|
28
24
|
----
|
29
|
-
gem '
|
25
|
+
gem 'coradoc'
|
30
26
|
----
|
31
27
|
|
32
28
|
|
@@ -290,6 +286,16 @@ Coradoc::ReverseAdoc.config do |config|
|
|
290
286
|
end
|
291
287
|
----
|
292
288
|
|
289
|
+
=== Convert HTML to a Coradoc AST
|
290
|
+
|
291
|
+
[source,ruby]
|
292
|
+
----
|
293
|
+
require 'coradoc/reverse_adoc'
|
294
|
+
|
295
|
+
# Options can be supplied as keyword arguments
|
296
|
+
Coradoc::ReverseAdoc::HtmlConverter.to_coradoc("<b><i>Some input</i></b>")
|
297
|
+
----
|
298
|
+
|
293
299
|
|
294
300
|
== Related stuff
|
295
301
|
|
@@ -1,11 +1,21 @@
|
|
1
1
|
module Coradoc::ReverseAdoc
|
2
2
|
class Cleaner
|
3
3
|
def tidy(string)
|
4
|
-
result =
|
5
|
-
|
6
|
-
|
7
|
-
result =
|
8
|
-
|
4
|
+
result = HtmlConverter.track_time "Removing inner whitespace" do
|
5
|
+
remove_inner_whitespaces(String.new(string))
|
6
|
+
end
|
7
|
+
result = HtmlConverter.track_time "Removing newlines" do
|
8
|
+
remove_newlines(result)
|
9
|
+
end
|
10
|
+
result = HtmlConverter.track_time "Removing leading newlines" do
|
11
|
+
remove_leading_newlines(result)
|
12
|
+
end
|
13
|
+
result = HtmlConverter.track_time "Cleaning tag borders" do
|
14
|
+
clean_tag_borders(result)
|
15
|
+
end
|
16
|
+
result = HtmlConverter.track_time "Cleaning punctuation characters" do
|
17
|
+
clean_punctuation_characters(result)
|
18
|
+
end
|
9
19
|
end
|
10
20
|
|
11
21
|
def remove_newlines(string)
|
@@ -22,11 +32,13 @@ module Coradoc::ReverseAdoc
|
|
22
32
|
string.gsub!(/(stem:\[([^\]]|\\\])*\])\n(?=\S)/, "\\1 ")
|
23
33
|
string.gsub!(/(stem:\[([^\]]|\\\])*\])\s+(?=[\^-])/, "\\1")
|
24
34
|
end
|
25
|
-
|
26
|
-
|
35
|
+
result = +""
|
36
|
+
string.each_line do |line|
|
37
|
+
result << preserve_border_whitespaces(line) do
|
27
38
|
line.strip.gsub(/[ \t]{2,}/, " ")
|
28
39
|
end
|
29
40
|
end
|
41
|
+
result
|
30
42
|
end
|
31
43
|
|
32
44
|
# Find non-asterisk content that is enclosed by two or
|
@@ -61,7 +73,7 @@ module Coradoc::ReverseAdoc
|
|
61
73
|
end
|
62
74
|
|
63
75
|
def clean_punctuation_characters(string)
|
64
|
-
string.gsub(/(\*\*|~~|__)\s([.!?'"])/, "
|
76
|
+
string.gsub(/(\*\*|~~|__)\s([.!?'"])/, "\\1\\2")
|
65
77
|
end
|
66
78
|
|
67
79
|
# preprocesses HTML, rather than postprocessing it
|
@@ -71,8 +83,7 @@ module Coradoc::ReverseAdoc
|
|
71
83
|
|
72
84
|
def scrub_whitespace(string)
|
73
85
|
string.gsub!(/ | |\u00a0/i, " ") # HTML encoded spaces
|
74
|
-
string.
|
75
|
-
string.sub!(/[[:space:]]+\z$/m, "") # document trailing whitespace
|
86
|
+
string = Coradoc.strip_unicode(string) # Strip document-level leading and trailing whitespace
|
76
87
|
string.gsub!(/( +)$/, " ") # line trailing whitespace
|
77
88
|
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
78
89
|
# string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs
|
@@ -2,9 +2,6 @@ require "tmpdir"
|
|
2
2
|
|
3
3
|
module Coradoc::ReverseAdoc
|
4
4
|
class Config
|
5
|
-
attr_accessor :unknown_tags, :tag_border, :mathml2asciimath, :external_images,
|
6
|
-
:destination, :sourcedir, :image_counter, :image_counter_pattern, :input_format
|
7
|
-
|
8
5
|
def initialize
|
9
6
|
@unknown_tags = :pass_through
|
10
7
|
@input_format = :html
|
@@ -26,29 +23,51 @@ module Coradoc::ReverseAdoc
|
|
26
23
|
@strong_delimiter = "*".freeze
|
27
24
|
@inline_options = {}
|
28
25
|
@tag_border = " ".freeze
|
26
|
+
|
27
|
+
@split_sections = nil
|
28
|
+
|
29
|
+
# Document width - used to compute table sizes.
|
30
|
+
# This is an assumption for screen size in input document.
|
31
|
+
# If column widths are specified in absolute values, then we
|
32
|
+
# have to convert them to relative values, as AsciiDoc only
|
33
|
+
# supports those.
|
34
|
+
@doc_width = 1000
|
35
|
+
|
36
|
+
# Plugin system
|
37
|
+
@plugins = []
|
38
|
+
|
39
|
+
# Debugging options
|
40
|
+
@track_time = false
|
29
41
|
end
|
30
42
|
|
31
43
|
def with(options = {})
|
44
|
+
old_options = @inline_options
|
32
45
|
@inline_options = options
|
33
46
|
result = yield
|
34
|
-
@inline_options =
|
47
|
+
@inline_options = old_options
|
35
48
|
result
|
36
49
|
end
|
37
50
|
|
38
|
-
def
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
def mathml2asciimath
|
43
|
-
@inline_options[:mathml2asciimath] || @mathml2asciimath
|
44
|
-
end
|
51
|
+
def self.declare_option(option)
|
52
|
+
define_method(option) do
|
53
|
+
@inline_options[option] || instance_variable_get(:"@#{option}")
|
54
|
+
end
|
45
55
|
|
46
|
-
|
47
|
-
@inline_options[:external_images] || @external_images
|
56
|
+
attr_writer option
|
48
57
|
end
|
49
58
|
|
50
|
-
|
51
|
-
|
52
|
-
|
59
|
+
declare_option :unknown_tags
|
60
|
+
declare_option :tag_border
|
61
|
+
declare_option :mathml2asciimath
|
62
|
+
declare_option :external_images
|
63
|
+
declare_option :destination
|
64
|
+
declare_option :sourcedir
|
65
|
+
declare_option :image_counter
|
66
|
+
declare_option :image_counter_pattern
|
67
|
+
declare_option :input_format
|
68
|
+
declare_option :split_sections
|
69
|
+
declare_option :doc_width
|
70
|
+
declare_option :plugins
|
71
|
+
declare_option :track_time
|
53
72
|
end
|
54
73
|
end
|
@@ -11,29 +11,34 @@ module Coradoc::ReverseAdoc
|
|
11
11
|
id = node["id"] || node["name"]
|
12
12
|
|
13
13
|
id = id&.gsub(/\s/, "")&.gsub(/__+/, "_")
|
14
|
+
id = nil if id&.empty?
|
14
15
|
|
15
16
|
return "" if /^_Toc\d+$|^_GoBack$/.match?(id)
|
16
17
|
|
17
|
-
|
18
|
-
return Coradoc::Element::Inline::Anchor.new(id)
|
19
|
-
end
|
18
|
+
return Coradoc::Element::Inline::Anchor.new(id) if id
|
20
19
|
|
21
20
|
if href.to_s.start_with?("#")
|
22
21
|
href = href.sub(/^#/, "").gsub(/\s/, "").gsub(/__+/, "_")
|
23
22
|
return Coradoc::Element::Inline::CrossReference.new(href, name)
|
24
23
|
end
|
25
24
|
|
26
|
-
if href.to_s.empty?
|
27
|
-
return name
|
28
|
-
end
|
25
|
+
return name if href.to_s.empty?
|
29
26
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
27
|
+
ambigous_characters = /[\w.?&#=%;\[\u{ff}-\u{10ffff}]/
|
28
|
+
if name&.strip == href
|
29
|
+
name = ""
|
30
|
+
right_constrain = textnode_after_start_with?(node, ambigous_characters)
|
31
|
+
end
|
34
32
|
|
35
|
-
|
36
|
-
|
33
|
+
out = []
|
34
|
+
out << " " if textnode_before_end_with?(node, ambigous_characters)
|
35
|
+
out << Coradoc::Element::Inline::Link.new(
|
36
|
+
path: href,
|
37
|
+
name: name.strip,
|
38
|
+
title: title.strip,
|
39
|
+
right_constrain: right_constrain,
|
40
|
+
)
|
41
|
+
out
|
37
42
|
end
|
38
43
|
end
|
39
44
|
|
@@ -5,10 +5,6 @@ module Coradoc::ReverseAdoc
|
|
5
5
|
content = treat_children(node, state)
|
6
6
|
Coradoc::Element::Block::Side.new(lines: content.lines)
|
7
7
|
end
|
8
|
-
|
9
|
-
def convert(node, state = {})
|
10
|
-
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
|
11
|
-
end
|
12
8
|
end
|
13
9
|
|
14
10
|
register :aside, Aside.new
|
@@ -1,6 +1,14 @@
|
|
1
1
|
module Coradoc::ReverseAdoc
|
2
2
|
module Converters
|
3
3
|
class Base
|
4
|
+
# Default implementation to convert a given Nokogiri node
|
5
|
+
# to an AsciiDoc script.
|
6
|
+
# Can be overriden by subclasses.
|
7
|
+
def convert(node, state = {})
|
8
|
+
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
|
9
|
+
end
|
10
|
+
|
11
|
+
# Note: treat_children won't run plugin hooks
|
4
12
|
def treat_children(node, state)
|
5
13
|
node.children.inject("") do |memo, child|
|
6
14
|
memo << treat(child, state)
|
@@ -8,7 +16,7 @@ module Coradoc::ReverseAdoc
|
|
8
16
|
end
|
9
17
|
|
10
18
|
def treat(node, state)
|
11
|
-
|
19
|
+
Converters.process(node, state)
|
12
20
|
end
|
13
21
|
|
14
22
|
def treat_children_coradoc(node, state)
|
@@ -18,21 +26,13 @@ module Coradoc::ReverseAdoc
|
|
18
26
|
end
|
19
27
|
|
20
28
|
def treat_coradoc(node, state)
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
|
-
def escape_keychars(string)
|
25
|
-
subs = { "*" => '\*', "_" => '\_' }
|
26
|
-
string
|
27
|
-
.gsub(/((?<=\s)[\*_]+)|[\*_]+(?=\s)/) do |n|
|
28
|
-
n.chars.map do |char|
|
29
|
-
subs[char]
|
30
|
-
end.join
|
31
|
-
end
|
29
|
+
Converters.process_coradoc(node, state)
|
32
30
|
end
|
33
31
|
|
34
32
|
def extract_title(node)
|
35
|
-
title = escape_keychars(
|
33
|
+
title = Coradoc::Element::TextElement.escape_keychars(
|
34
|
+
node["title"].to_s,
|
35
|
+
)
|
36
36
|
title.empty? ? "" : %[ #{title}]
|
37
37
|
end
|
38
38
|
|
@@ -46,10 +46,43 @@ module Coradoc::ReverseAdoc
|
|
46
46
|
end
|
47
47
|
|
48
48
|
def textnode_before_end_with?(node, str)
|
49
|
-
return nil
|
49
|
+
return nil unless [String, Regexp].include?(str.class)
|
50
|
+
return nil if str.is_a?(String) && str.empty?
|
51
|
+
|
52
|
+
str = /#{Regexp.escape(str)}/ if str.is_a?(String)
|
53
|
+
str = /(?:#{str})\z/
|
50
54
|
|
51
55
|
node2 = node.at_xpath("preceding-sibling::node()[1]")
|
52
|
-
node2.respond_to?(:text) && node2.text.
|
56
|
+
node2.respond_to?(:text) && node2.text.match?(str)
|
57
|
+
end
|
58
|
+
|
59
|
+
def textnode_after_start_with?(node, str)
|
60
|
+
return nil unless [String, Regexp].include?(str.class)
|
61
|
+
return nil if str.is_a?(String) && str.empty?
|
62
|
+
|
63
|
+
str = /#{Regexp.escape(str)}/ if str.is_a?(String)
|
64
|
+
str = /\A(?:#{str})/
|
65
|
+
|
66
|
+
node2 = node.at_xpath("following-sibling::node()[1]")
|
67
|
+
node2.respond_to?(:text) && node2.text.match?(str)
|
68
|
+
end
|
69
|
+
|
70
|
+
def extract_leading_trailing_whitespace(node)
|
71
|
+
node.text =~ /^(\s+)/
|
72
|
+
leading_whitespace = $1
|
73
|
+
if !leading_whitespace.nil?
|
74
|
+
first_text = node.at_xpath("./text()[1]")
|
75
|
+
first_text.replace(first_text.text.lstrip)
|
76
|
+
leading_whitespace = " "
|
77
|
+
end
|
78
|
+
node.text =~ /(\s+)$/
|
79
|
+
trailing_whitespace = $1
|
80
|
+
if !trailing_whitespace.nil?
|
81
|
+
last_text = node.at_xpath("./text()[last()]")
|
82
|
+
last_text.replace(last_text.text.rstrip)
|
83
|
+
trailing_whitespace = " "
|
84
|
+
end
|
85
|
+
[leading_whitespace, trailing_whitespace]
|
53
86
|
end
|
54
87
|
|
55
88
|
def unconstrained_before?(node)
|
@@ -60,41 +93,12 @@ module Coradoc::ReverseAdoc
|
|
60
93
|
before.text[-1]&.match?(/\w/)
|
61
94
|
end
|
62
95
|
|
63
|
-
# TODO: This logic ought to be cleaned up.
|
64
96
|
def unconstrained_after?(node)
|
65
97
|
after = node.at_xpath("following::node()[1]")
|
66
98
|
|
67
99
|
after && !after.text.strip.empty? &&
|
68
100
|
after.text[0]&.match?(/\w|,|;|"|\.\?!/)
|
69
101
|
end
|
70
|
-
|
71
|
-
# def trailing_whitespace?(node)
|
72
|
-
|
73
|
-
# TODO: This logic ought to be cleaned up.
|
74
|
-
def constrained?(node)
|
75
|
-
before = node.at_xpath("preceding::node()[1]").to_s[-1]
|
76
|
-
before = if before
|
77
|
-
before&.match?(/\s/) ? true : false
|
78
|
-
else
|
79
|
-
true
|
80
|
-
end
|
81
|
-
|
82
|
-
if !before && (node.to_s[0] =~ /\s/)
|
83
|
-
before = true
|
84
|
-
end
|
85
|
-
|
86
|
-
after = node.at_xpath("following::node()[1]").to_s[0]
|
87
|
-
after = if after
|
88
|
-
after&.match?(/\s|,|;|"|\.\?!/) ? true : false
|
89
|
-
else
|
90
|
-
true
|
91
|
-
end
|
92
|
-
if !after && (node.to_s[-1] =~ /\s/)
|
93
|
-
after = true
|
94
|
-
end
|
95
|
-
|
96
|
-
before && after
|
97
|
-
end
|
98
102
|
end
|
99
103
|
end
|
100
104
|
end
|
@@ -4,22 +4,13 @@ module Coradoc::ReverseAdoc
|
|
4
4
|
def to_coradoc(node, state = {})
|
5
5
|
node["id"]
|
6
6
|
cite = node["cite"]
|
7
|
-
attributes =
|
8
|
-
|
9
|
-
else
|
10
|
-
Coradoc::Element::AttributeList.new(
|
11
|
-
"quote", cite
|
12
|
-
)
|
13
|
-
end
|
7
|
+
attributes = Coradoc::Element::AttributeList.new
|
8
|
+
attributes.add_positional("quote", cite) if !cite.nil?
|
14
9
|
content = treat_children(node, state).strip
|
15
10
|
content = Coradoc::ReverseAdoc.cleaner.remove_newlines(content)
|
16
11
|
Coradoc::Element::Block::Quote.new(nil, lines: content,
|
17
12
|
attributes: attributes)
|
18
13
|
end
|
19
|
-
|
20
|
-
def convert(node, state = {})
|
21
|
-
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
|
22
|
-
end
|
23
14
|
end
|
24
15
|
|
25
16
|
register :blockquote, Blockquote.new
|
@@ -1,49 +1,12 @@
|
|
1
1
|
module Coradoc::ReverseAdoc
|
2
2
|
module Converters
|
3
|
-
class Code <
|
4
|
-
def
|
5
|
-
|
6
|
-
|
7
|
-
if Coradoc::Generator.gen_adoc(content).strip.empty?
|
8
|
-
return ""
|
9
|
-
end
|
10
|
-
|
11
|
-
if node_has_ancestor?(node, ["code", "tt", "kbd", "samp", "var"])
|
12
|
-
return content
|
13
|
-
end
|
14
|
-
|
15
|
-
node.text =~ /^(\s+)/
|
16
|
-
leading_whitespace = $1
|
17
|
-
has_leading_whitespace = !leading_whitespace.nil?
|
18
|
-
|
19
|
-
if has_leading_whitespace
|
20
|
-
first_text = node.at_xpath("./text()[1]")
|
21
|
-
first_text.replace(first_text.text.lstrip)
|
22
|
-
leading_whitespace = " "
|
23
|
-
end
|
24
|
-
|
25
|
-
node.text =~ /(\s+)$/
|
26
|
-
trailing_whitespace = $1
|
27
|
-
has_trailing_whitespace = !trailing_whitespace.nil?
|
28
|
-
|
29
|
-
if has_trailing_whitespace
|
30
|
-
last_text = node.at_xpath("./text()[last()]")
|
31
|
-
last_text.replace(last_text.text.rstrip)
|
32
|
-
trailing_whitespace = " "
|
33
|
-
end
|
34
|
-
|
35
|
-
u_before = unconstrained_before?(node)
|
36
|
-
u_after = unconstrained_after?(node)
|
37
|
-
u = !((!u_before || has_leading_whitespace) && (!u_after || has_trailing_whitespace))
|
38
|
-
e = Coradoc::Element::Inline::Monospace.new(
|
39
|
-
Coradoc::Element::TextElement.new(content), u
|
40
|
-
)
|
41
|
-
|
42
|
-
[leading_whitespace, e, trailing_whitespace]
|
3
|
+
class Code < Markup
|
4
|
+
def coradoc_class
|
5
|
+
Coradoc::Element::Inline::Monospace
|
43
6
|
end
|
44
7
|
|
45
|
-
def
|
46
|
-
|
8
|
+
def markup_ancestor_tag_names
|
9
|
+
%w[code tt kbd samp var]
|
47
10
|
end
|
48
11
|
end
|
49
12
|
|
@@ -6,10 +6,6 @@ module Coradoc::ReverseAdoc
|
|
6
6
|
contents = treat_children_coradoc(node, state)
|
7
7
|
Coradoc::Element::Section.new(nil, id: id, contents: contents)
|
8
8
|
end
|
9
|
-
|
10
|
-
def convert(node, state = {})
|
11
|
-
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
|
12
|
-
end
|
13
9
|
end
|
14
10
|
|
15
11
|
register :div, Div.new
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Coradoc::ReverseAdoc
|
2
|
+
module Converters
|
3
|
+
class Dl < Base
|
4
|
+
def to_coradoc(node, state = {})
|
5
|
+
items = process_dl(node, state)
|
6
|
+
items2 = items.map do |item|
|
7
|
+
Coradoc::Element::ListItemDefinition.new(item[:name], item[:value])
|
8
|
+
end
|
9
|
+
Coradoc::Element::List::Definition.new(items2, delimiter: "::")
|
10
|
+
end
|
11
|
+
|
12
|
+
def process_dl(node, state = {})
|
13
|
+
groups = []
|
14
|
+
current = {name: [], value: []}
|
15
|
+
|
16
|
+
seen_dd = false
|
17
|
+
child = node.at_xpath("*[1]")
|
18
|
+
grandchild = nil
|
19
|
+
while !child.nil?
|
20
|
+
if child.name == "div"
|
21
|
+
grandchild = child.at_xpath("*[1]")
|
22
|
+
while !grandchild.nil?
|
23
|
+
groups, current, seen_dd = process_dt_or_dd(groups, current, seen_dd, grandchild, state)
|
24
|
+
grandchild = grandchild.at_xpath("following-sibling::*[1]")
|
25
|
+
end
|
26
|
+
elsif ["dt", "dd"].include?(child.name)
|
27
|
+
groups, current, seen_dd = process_dt_or_dd(groups, current, seen_dd, child, state)
|
28
|
+
end
|
29
|
+
child = child.at_xpath("following-sibling::*[1]")
|
30
|
+
if current[:name].any? && current[:value].any?
|
31
|
+
groups << current
|
32
|
+
end
|
33
|
+
end
|
34
|
+
groups
|
35
|
+
end
|
36
|
+
|
37
|
+
def process_dt_or_dd(groups, current, seen_dd, subnode, state = {})
|
38
|
+
if subnode.name == "dt"
|
39
|
+
if seen_dd
|
40
|
+
# groups << current
|
41
|
+
current = {name: [], value: []}
|
42
|
+
seen_dd = false
|
43
|
+
end
|
44
|
+
current[:name] += treat_children_coradoc(subnode, state)
|
45
|
+
elsif subnode.name == "dd"
|
46
|
+
current[:value] += treat_children_coradoc(subnode, state)
|
47
|
+
seen_dd = true
|
48
|
+
end
|
49
|
+
[groups, current, seen_dd]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
register :dl, Dl.new
|
54
|
+
end
|
55
|
+
end
|
@@ -1,50 +1,12 @@
|
|
1
1
|
module Coradoc::ReverseAdoc
|
2
2
|
module Converters
|
3
|
-
class Em <
|
4
|
-
def
|
5
|
-
|
6
|
-
state.merge(already_italic: true))
|
7
|
-
|
8
|
-
if Coradoc::Generator.gen_adoc(content).strip.empty?
|
9
|
-
return ""
|
10
|
-
end
|
11
|
-
|
12
|
-
if node_has_ancestor?(node, ["em", "i", "cite"])
|
13
|
-
return content
|
14
|
-
end
|
15
|
-
|
16
|
-
node.text =~ /^(\s+)/
|
17
|
-
leading_whitespace = $1
|
18
|
-
has_leading_whitespace = !leading_whitespace.nil?
|
19
|
-
|
20
|
-
if has_leading_whitespace
|
21
|
-
first_text = node.at_xpath("./text()[1]")
|
22
|
-
first_text.replace(first_text.text.lstrip)
|
23
|
-
leading_whitespace = " "
|
24
|
-
end
|
25
|
-
|
26
|
-
node.text =~ /(\s+)$/
|
27
|
-
trailing_whitespace = $1
|
28
|
-
has_trailing_whitespace = !trailing_whitespace.nil?
|
29
|
-
|
30
|
-
if has_trailing_whitespace
|
31
|
-
last_text = node.at_xpath("./text()[last()]")
|
32
|
-
last_text.replace(last_text.text.rstrip)
|
33
|
-
trailing_whitespace = " "
|
34
|
-
end
|
35
|
-
|
36
|
-
u_before = unconstrained_before?(node)
|
37
|
-
u_after = unconstrained_after?(node)
|
38
|
-
u = !((!u_before || has_leading_whitespace) && (!u_after || has_trailing_whitespace))
|
39
|
-
e = Coradoc::Element::Inline::Italic.new(
|
40
|
-
Coradoc::Element::TextElement.new(content), u
|
41
|
-
)
|
42
|
-
|
43
|
-
[leading_whitespace, e, trailing_whitespace]
|
3
|
+
class Em < Markup
|
4
|
+
def coradoc_class
|
5
|
+
Coradoc::Element::Inline::Italic
|
44
6
|
end
|
45
7
|
|
46
|
-
def
|
47
|
-
|
8
|
+
def markup_ancestor_tag_names
|
9
|
+
%w[em i cite]
|
48
10
|
end
|
49
11
|
end
|
50
12
|
|