coradoc 1.1.2 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/coradoc/element/attribute_list.rb +13 -1
- data/lib/coradoc/element/base.rb +2 -0
- data/lib/coradoc/element/block/core.rb +4 -3
- data/lib/coradoc/element/block/example.rb +1 -1
- data/lib/coradoc/element/block/listing.rb +21 -0
- data/lib/coradoc/element/block/literal.rb +4 -2
- data/lib/coradoc/element/block/open.rb +22 -0
- data/lib/coradoc/element/block.rb +3 -1
- data/lib/coradoc/element/list/core.rb +2 -2
- data/lib/coradoc/element/list/ordered.rb +1 -0
- data/lib/coradoc/element/list/unordered.rb +1 -0
- data/lib/coradoc/element/list_item.rb +13 -5
- data/lib/coradoc/element/section.rb +2 -2
- data/lib/coradoc/element/text_element.rb +9 -0
- data/lib/coradoc/input/html/converters/base.rb +2 -2
- data/lib/coradoc/input/html/converters/div.rb +1 -0
- data/lib/coradoc/input/html/converters/table.rb +7 -1
- data/lib/coradoc/input/html/postprocessor.rb +77 -15
- data/lib/coradoc/parser/asciidoc/attribute_list.rb +7 -1
- data/lib/coradoc/parser/asciidoc/base.rb +52 -134
- data/lib/coradoc/parser/asciidoc/block.rb +51 -38
- data/lib/coradoc/parser/asciidoc/content.rb +13 -3
- data/lib/coradoc/parser/asciidoc/list.rb +56 -22
- data/lib/coradoc/parser/asciidoc/paragraph.rb +16 -4
- data/lib/coradoc/parser/asciidoc/section.rb +3 -1
- data/lib/coradoc/parser/asciidoc/term.rb +2 -0
- data/lib/coradoc/parser/asciidoc/text.rb +161 -0
- data/lib/coradoc/parser/base.rb +4 -28
- data/lib/coradoc/transformer.rb +23 -39
- data/lib/coradoc/version.rb +1 -1
- data/utils/round_trip.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e528645f0bdb38f707239ed1862dc74944dc7eba4149cf9f1243458e98591edb
|
4
|
+
data.tar.gz: 268acd80823a507d3a86e83bfeaef55a8145819822aac9903f0bc96cee29d55c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 624f718300d0877f0d610a3fbf953b324d5bfd6a73c68de30db69ddc75b45e1b396eeb5bdfdcfc7d0ecac2d50c8301d7011f118a2d23d4ac02d11c2e54e92e6a
|
7
|
+
data.tar.gz: 849b5851b2ca0b37313d8f7c743defbc168f698284c694c0a467536dbffd9cc10f5ec263fd2c28e1f287a926c6acbe3756c8753f6f3ae303dc61fc9678981ff1
|
@@ -12,6 +12,16 @@ module Coradoc
|
|
12
12
|
@rejected_named = []
|
13
13
|
end
|
14
14
|
|
15
|
+
def inspect
|
16
|
+
"AttributeList: " +
|
17
|
+
[
|
18
|
+
@positional.map(&:inspect).join(", "),
|
19
|
+
@named.map { |k, v| "#{k}: #{v.inspect}" }.join(", "),
|
20
|
+
(@rejected_positional.empty? or "rejected: #{@rejected_positional.inspect}"),
|
21
|
+
(@rejected_positional.empty? or "rejected: #{@rejected_named.inspect}"),
|
22
|
+
].reject { |i| i == true || i.empty? }.join(", ")
|
23
|
+
end
|
24
|
+
|
15
25
|
def add_positional(*attr)
|
16
26
|
@positional += attr
|
17
27
|
end
|
@@ -65,7 +75,9 @@ module Coradoc
|
|
65
75
|
|
66
76
|
adoc = +""
|
67
77
|
if !@positional.empty?
|
68
|
-
adoc << @positional.map
|
78
|
+
adoc << @positional.map do |p|
|
79
|
+
[nil, ""].include?(p) ? '""' : p
|
80
|
+
end.join(",")
|
69
81
|
end
|
70
82
|
adoc << "," if @positional.any? && @named.any?
|
71
83
|
adoc << @named.map do |k, v|
|
data/lib/coradoc/element/base.rb
CHANGED
@@ -61,12 +61,13 @@ module Coradoc
|
|
61
61
|
|
62
62
|
def type_hash
|
63
63
|
@type_hash ||= {
|
64
|
-
"____" => :quote,
|
65
|
-
"****" => :side,
|
66
|
-
"----" => :source,
|
67
64
|
"====" => :example,
|
68
65
|
"...." => :literal,
|
66
|
+
"--" => :open,
|
69
67
|
"++++" => :pass,
|
68
|
+
"____" => :quote,
|
69
|
+
"****" => :side,
|
70
|
+
"----" => :source,
|
70
71
|
}
|
71
72
|
end
|
72
73
|
end
|
@@ -15,7 +15,7 @@ module Coradoc
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def to_adoc
|
18
|
-
"\n\n#{gen_anchor}#{gen_title}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
|
18
|
+
"\n\n#{gen_anchor}#{gen_title}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
|
19
19
|
end
|
20
20
|
end
|
21
21
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Coradoc
|
2
|
+
module Element
|
3
|
+
module Block
|
4
|
+
class Listing < Core
|
5
|
+
def initialize(_title, options = {})
|
6
|
+
@id = options.fetch(:id, nil)
|
7
|
+
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
8
|
+
@lang = options.fetch(:lang, "")
|
9
|
+
@attributes = options.fetch(:attributes, AttributeList.new)
|
10
|
+
@lines = options.fetch(:lines, [])
|
11
|
+
@delimiter_char = "-"
|
12
|
+
@delimiter_len = options.fetch(:delimiter_len, 4)
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_adoc
|
16
|
+
"\n\n#{gen_anchor}#{gen_attributes}\n#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -2,16 +2,18 @@ module Coradoc
|
|
2
2
|
module Element
|
3
3
|
module Block
|
4
4
|
class Literal < Core
|
5
|
-
def initialize(
|
5
|
+
def initialize(title, options = {})
|
6
|
+
@title = title
|
6
7
|
@id = options.fetch(:id, nil)
|
7
8
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
9
|
+
@attributes = options.fetch(:attributes, AttributeList.new)
|
8
10
|
@lines = options.fetch(:lines, [])
|
9
11
|
@delimiter_char = "."
|
10
12
|
@delimiter_len = options.fetch(:delimiter_len, 4)
|
11
13
|
end
|
12
14
|
|
13
15
|
def to_adoc
|
14
|
-
"\n\n#{gen_anchor}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
|
16
|
+
"\n\n#{gen_anchor}#{gen_title}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
|
15
17
|
end
|
16
18
|
end
|
17
19
|
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Coradoc
|
2
|
+
module Element
|
3
|
+
module Block
|
4
|
+
class Open < Core
|
5
|
+
def initialize(title, options = {})
|
6
|
+
@title = title
|
7
|
+
@id = options.fetch(:id, nil)
|
8
|
+
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
9
|
+
@lang = options.fetch(:lang, "")
|
10
|
+
@attributes = options.fetch(:attributes, AttributeList.new)
|
11
|
+
@lines = options.fetch(:lines, [])
|
12
|
+
@delimiter_char = "-"
|
13
|
+
@delimiter_len = options.fetch(:delimiter_len, 2)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_adoc
|
17
|
+
"\n\n#{gen_anchor}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -8,8 +8,10 @@ end
|
|
8
8
|
require_relative "block/core"
|
9
9
|
require_relative "block/example"
|
10
10
|
require_relative "block/literal"
|
11
|
-
require_relative "block/
|
11
|
+
require_relative "block/listing"
|
12
|
+
require_relative "block/open"
|
12
13
|
require_relative "block/pass"
|
14
|
+
require_relative "block/quote"
|
13
15
|
require_relative "block/side"
|
14
16
|
require_relative "block/sourcecode"
|
15
17
|
require_relative "block/reviewer_comment"
|
@@ -19,8 +19,8 @@ module Coradoc
|
|
19
19
|
m = @items.select do |i|
|
20
20
|
i.is_a?(Coradoc::Element::ListItem) &&
|
21
21
|
!i.marker.nil?
|
22
|
-
end.first&.marker
|
23
|
-
@ol_count = m.size
|
22
|
+
end.first&.marker.to_s
|
23
|
+
@ol_count = m.size
|
24
24
|
end
|
25
25
|
@ol_count = 1 if @ol_count.nil?
|
26
26
|
@attrs = options.fetch(:attrs, AttributeList.new)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
3
|
class ListItem < Base
|
4
|
-
attr_accessor :marker, :id, :anchor, :content, :line_break
|
4
|
+
attr_accessor :marker, :id, :anchor, :content, :subitem, :line_break
|
5
5
|
|
6
6
|
declare_children :content, :id, :anchor
|
7
7
|
|
@@ -10,11 +10,14 @@ module Coradoc
|
|
10
10
|
@id = options.fetch(:id, nil)
|
11
11
|
@anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
|
12
12
|
@content = content
|
13
|
+
@attached = options.fetch(:attached, [])
|
14
|
+
@nested = options.fetch(:nested, nil)
|
13
15
|
@line_break = options.fetch(:line_break, "\n")
|
14
16
|
end
|
15
17
|
|
16
18
|
def to_adoc
|
17
|
-
anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
|
19
|
+
anchor = @anchor.nil? ? "" : " #{@anchor.to_adoc.to_s} "
|
20
|
+
# text = Coradoc::Generator.gen_adoc(@content)
|
18
21
|
content = Array(@content).map do |subitem|
|
19
22
|
next if subitem.is_a? Inline::HardLineBreak
|
20
23
|
|
@@ -24,10 +27,15 @@ module Coradoc
|
|
24
27
|
if Coradoc.a_single?(subitem, Coradoc::Element::TextElement)
|
25
28
|
subcontent = Coradoc.strip_unicode(subcontent)
|
26
29
|
end
|
27
|
-
subcontent
|
30
|
+
subcontent
|
28
31
|
end.compact.join("\n+\n")
|
29
|
-
|
30
|
-
|
32
|
+
# attach = Coradoc::Generator.gen_adoc(@attached)
|
33
|
+
attach = @attached.map do |elem|
|
34
|
+
"+\n" + Coradoc::Generator.gen_adoc(elem)
|
35
|
+
end.join
|
36
|
+
nest = Coradoc::Generator.gen_adoc(@nested)
|
37
|
+
out = " #{anchor}#{content}#{@line_break}"
|
38
|
+
out + attach + nest
|
31
39
|
end
|
32
40
|
end
|
33
41
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
3
|
class Section < Base
|
4
|
-
attr_accessor :id, :title, :attrs, :contents, :sections
|
4
|
+
attr_accessor :id, :title, :attrs, :contents, :sections, :anchor
|
5
5
|
|
6
6
|
declare_children :id, :title, :contents, :sections
|
7
7
|
|
@@ -49,7 +49,7 @@ module Coradoc
|
|
49
49
|
# HTML element and if it happens inside some other block element, can be
|
50
50
|
# safely collapsed.
|
51
51
|
def safe_to_collapse?
|
52
|
-
@title.nil? && @
|
52
|
+
@title.nil? && @sections.empty?
|
53
53
|
end
|
54
54
|
|
55
55
|
private
|
@@ -15,6 +15,15 @@ module Coradoc
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
+
def inspect
|
19
|
+
str = "TextElement"
|
20
|
+
str += "(#{@id})" if @id
|
21
|
+
str += ": "
|
22
|
+
str += @content.inspect
|
23
|
+
str += " + #{@line_break.inspect}" unless line_break.to_s.empty?
|
24
|
+
str
|
25
|
+
end
|
26
|
+
|
18
27
|
def to_adoc
|
19
28
|
Coradoc::Generator.gen_adoc(@content) + @line_break
|
20
29
|
end
|
@@ -72,14 +72,14 @@ module Coradoc::Input::HTML
|
|
72
72
|
leading_whitespace = $1
|
73
73
|
if !leading_whitespace.nil?
|
74
74
|
first_text = node.at_xpath("./text()[1]")
|
75
|
-
first_text.replace(first_text.text.lstrip)
|
75
|
+
first_text.replace(first_text.text.lstrip) if first_text
|
76
76
|
leading_whitespace = " "
|
77
77
|
end
|
78
78
|
node.text =~ /(\s+)$/
|
79
79
|
trailing_whitespace = $1
|
80
80
|
if !trailing_whitespace.nil?
|
81
81
|
last_text = node.at_xpath("./text()[last()]")
|
82
|
-
last_text.replace(last_text.text.rstrip)
|
82
|
+
last_text.replace(last_text.text.rstrip) if last_text
|
83
83
|
trailing_whitespace = " "
|
84
84
|
end
|
85
85
|
[leading_whitespace, trailing_whitespace]
|
@@ -114,6 +114,12 @@ module Coradoc::Input::HTML
|
|
114
114
|
columns = row.xpath("./td | ./th")
|
115
115
|
column_id = 0
|
116
116
|
|
117
|
+
cell_references[i] ||= []
|
118
|
+
cell_matrix[i] ||= []
|
119
|
+
|
120
|
+
# Empty row support: pass row object via an instance variable
|
121
|
+
cell_references[i].instance_variable_set(:@row_obj, row)
|
122
|
+
|
117
123
|
columns.each do |cell|
|
118
124
|
colspan = cell["colspan"]&.to_i || 1
|
119
125
|
rowspan = cell["rowspan"]&.to_i || 1
|
@@ -179,7 +185,7 @@ module Coradoc::Input::HTML
|
|
179
185
|
min_rows.each do |row|
|
180
186
|
break if row.length != cpr_min
|
181
187
|
|
182
|
-
row_obj = row.last
|
188
|
+
row_obj = row.last&.first&.parent || row.instance_variable_get(:@row_obj)
|
183
189
|
doc = row_obj.document
|
184
190
|
added_node = Nokogiri::XML::Node.new("td", doc)
|
185
191
|
added_node["x-added"] = "x-added"
|
@@ -4,6 +4,8 @@ module Coradoc::Input::HTML
|
|
4
4
|
# is compatible with what we would get out of Coradoc, if
|
5
5
|
# it parsed it directly.
|
6
6
|
class Postprocessor
|
7
|
+
Element = Coradoc::Element
|
8
|
+
|
7
9
|
def self.process(coradoc)
|
8
10
|
new(coradoc).process
|
9
11
|
end
|
@@ -12,17 +14,74 @@ module Coradoc::Input::HTML
|
|
12
14
|
@tree = coradoc
|
13
15
|
end
|
14
16
|
|
17
|
+
# Extracts titles from lists. This happens in HTML files
|
18
|
+
# generated from DOCX documents by LibreOffice.
|
19
|
+
#
|
20
|
+
# We are interested in a particular tree:
|
21
|
+
# Element::List::Ordered items:
|
22
|
+
# Element::List::Ordered items: (any depth)
|
23
|
+
# Element::ListItem content:
|
24
|
+
# Element::Title
|
25
|
+
# (any number of other titles of the same scheme)
|
26
|
+
#
|
27
|
+
# This tree is flattened into:
|
28
|
+
# Element::Title
|
29
|
+
# Element::Title (any number of titles)
|
30
|
+
def extract_titles_from_lists
|
31
|
+
@tree = Element::Base.visit(@tree) do |elem, dir|
|
32
|
+
next elem unless dir == :pre
|
33
|
+
next elem unless elem.is_a?(Element::List::Ordered)
|
34
|
+
next elem if elem.items.length != 1
|
35
|
+
|
36
|
+
anchors = []
|
37
|
+
anchors << elem.anchor if elem.anchor
|
38
|
+
|
39
|
+
# Extract ListItem from any depth of List::Ordered
|
40
|
+
processed = elem
|
41
|
+
while processed.is_a?(Element::List::Ordered)
|
42
|
+
if processed.items.length != 1
|
43
|
+
backtrack = true
|
44
|
+
break
|
45
|
+
end
|
46
|
+
anchors << processed.anchor if processed.anchor
|
47
|
+
processed = processed.items.first
|
48
|
+
end
|
49
|
+
|
50
|
+
# Something went wrong? Anything not matching on the way?
|
51
|
+
next elem if backtrack
|
52
|
+
next elem unless processed.is_a?(Element::ListItem)
|
53
|
+
|
54
|
+
anchors << processed.anchor if processed.anchor
|
55
|
+
|
56
|
+
# Now we must have a title (or titles).
|
57
|
+
titles = processed.content.flatten
|
58
|
+
|
59
|
+
# Don't bother if there's no title in there.
|
60
|
+
next elem unless titles.any? { |i| i.is_a? Element::Title }
|
61
|
+
|
62
|
+
# Ordered is another iteration for our cleanup.
|
63
|
+
next elem unless titles.all? do |i|
|
64
|
+
i.is_a?(Element::Title) || i.is_a?(Element::List::Ordered)
|
65
|
+
end
|
66
|
+
|
67
|
+
# We are done now.
|
68
|
+
titles + anchors
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
15
72
|
# Collapse DIVs that only have a title, or nest another DIV.
|
16
73
|
def collapse_meaningless_sections
|
17
|
-
@tree =
|
18
|
-
if elem.is_a?(
|
74
|
+
@tree = Element::Base.visit(@tree) do |elem, _dir|
|
75
|
+
if elem.is_a?(Element::Section) && elem.safe_to_collapse?
|
19
76
|
children_classes = Array(elem.contents).map(&:class)
|
20
77
|
count = children_classes.length
|
21
|
-
safe_classes = [
|
78
|
+
safe_classes = [Element::Section, Element::Title]
|
22
79
|
|
23
80
|
# Count > 0 because some documents use <div> as a <br>.
|
24
81
|
if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
|
25
|
-
|
82
|
+
contents = elem.contents.dup
|
83
|
+
contents.prepend(elem.anchor) if elem.anchor
|
84
|
+
next contents
|
26
85
|
end
|
27
86
|
end
|
28
87
|
elem
|
@@ -32,12 +91,14 @@ module Coradoc::Input::HTML
|
|
32
91
|
# tree should now be more cleaned up, so we can progress with
|
33
92
|
# creating meaningful sections
|
34
93
|
def generate_meaningful_sections
|
35
|
-
@tree =
|
94
|
+
@tree = Element::Base.visit(@tree) do |elem, dir|
|
36
95
|
# We are searching for an array, that has a title. This
|
37
96
|
# will be a candidate for our section array.
|
38
97
|
if dir == :post &&
|
39
98
|
elem.is_a?(Array) &&
|
40
|
-
!elem.grep(
|
99
|
+
!elem.flatten.grep(Element::Title).empty?
|
100
|
+
|
101
|
+
elem = elem.flatten
|
41
102
|
|
42
103
|
new_array = []
|
43
104
|
content_array = new_array
|
@@ -47,12 +108,12 @@ module Coradoc::Input::HTML
|
|
47
108
|
# all descendant sections into those sections. Otherwise, we push
|
48
109
|
# an element as content of current section.
|
49
110
|
elem.each do |e|
|
50
|
-
if e.is_a?
|
111
|
+
if e.is_a? Element::Title
|
51
112
|
title = e
|
52
113
|
content_array = []
|
53
114
|
section_array = []
|
54
115
|
level = title.level_int
|
55
|
-
section =
|
116
|
+
section = Element::Section.new(
|
56
117
|
title, contents: content_array, sections: section_array
|
57
118
|
)
|
58
119
|
# Some documents may not be consistent and eg. follow H4 after
|
@@ -82,11 +143,11 @@ module Coradoc::Input::HTML
|
|
82
143
|
previous_sections = {}
|
83
144
|
|
84
145
|
determine_section_id = ->(elem) do
|
85
|
-
if elem.title.style == "appendix"
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
146
|
+
level = if elem.title.style == "appendix"
|
147
|
+
"A"
|
148
|
+
else
|
149
|
+
1
|
150
|
+
end
|
90
151
|
|
91
152
|
section = previous_sections[elem]
|
92
153
|
while section
|
@@ -102,8 +163,8 @@ module Coradoc::Input::HTML
|
|
102
163
|
style
|
103
164
|
end
|
104
165
|
|
105
|
-
@tree =
|
106
|
-
title = elem.title if elem.is_a?(
|
166
|
+
@tree = Element::Base.visit(@tree) do |elem, dir|
|
167
|
+
title = elem.title if elem.is_a?(Element::Section)
|
107
168
|
|
108
169
|
if title && title.level_int <= max_level
|
109
170
|
if dir == :pre
|
@@ -137,6 +198,7 @@ module Coradoc::Input::HTML
|
|
137
198
|
end
|
138
199
|
|
139
200
|
def process
|
201
|
+
extract_titles_from_lists
|
140
202
|
collapse_meaningless_sections
|
141
203
|
generate_meaningful_sections
|
142
204
|
# Do it again to simplify the document further.
|
@@ -11,8 +11,13 @@ module Coradoc
|
|
11
11
|
match('[^\],]').repeat(1)
|
12
12
|
end
|
13
13
|
|
14
|
+
def named_key
|
15
|
+
(str('reviewer') |
|
16
|
+
match('[a-zA-Z0-9_-]').repeat(1)).as(:named_key)
|
17
|
+
end
|
18
|
+
|
14
19
|
def named_attribute
|
15
|
-
(
|
20
|
+
( named_key >>
|
16
21
|
str(' ').maybe >> str("=") >> str(' ').maybe >>
|
17
22
|
match['a-zA-Z0-9_\- \"'].repeat(1).as(:named_value) >>
|
18
23
|
str(' ').maybe
|
@@ -51,6 +56,7 @@ module Coradoc
|
|
51
56
|
end
|
52
57
|
|
53
58
|
def attribute_list(name = :attribute_list)
|
59
|
+
str('[').present? >>
|
54
60
|
str('[') >> str("[").absent? >>
|
55
61
|
( named_many |
|
56
62
|
positional_one_named_many |
|