coradoc 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/element/attribute_list.rb +13 -1
  3. data/lib/coradoc/element/base.rb +2 -0
  4. data/lib/coradoc/element/block/core.rb +4 -3
  5. data/lib/coradoc/element/block/example.rb +1 -1
  6. data/lib/coradoc/element/block/listing.rb +21 -0
  7. data/lib/coradoc/element/block/literal.rb +4 -2
  8. data/lib/coradoc/element/block/open.rb +22 -0
  9. data/lib/coradoc/element/block.rb +3 -1
  10. data/lib/coradoc/element/list/core.rb +2 -2
  11. data/lib/coradoc/element/list/ordered.rb +1 -0
  12. data/lib/coradoc/element/list/unordered.rb +1 -0
  13. data/lib/coradoc/element/list_item.rb +13 -5
  14. data/lib/coradoc/element/section.rb +2 -2
  15. data/lib/coradoc/element/text_element.rb +9 -0
  16. data/lib/coradoc/input/html/converters/base.rb +2 -2
  17. data/lib/coradoc/input/html/converters/div.rb +1 -0
  18. data/lib/coradoc/input/html/converters/table.rb +7 -1
  19. data/lib/coradoc/input/html/postprocessor.rb +77 -15
  20. data/lib/coradoc/parser/asciidoc/attribute_list.rb +7 -1
  21. data/lib/coradoc/parser/asciidoc/base.rb +52 -134
  22. data/lib/coradoc/parser/asciidoc/block.rb +51 -38
  23. data/lib/coradoc/parser/asciidoc/content.rb +13 -3
  24. data/lib/coradoc/parser/asciidoc/list.rb +56 -22
  25. data/lib/coradoc/parser/asciidoc/paragraph.rb +16 -4
  26. data/lib/coradoc/parser/asciidoc/section.rb +3 -1
  27. data/lib/coradoc/parser/asciidoc/term.rb +2 -0
  28. data/lib/coradoc/parser/asciidoc/text.rb +161 -0
  29. data/lib/coradoc/parser/base.rb +4 -28
  30. data/lib/coradoc/transformer.rb +23 -39
  31. data/lib/coradoc/version.rb +1 -1
  32. data/utils/round_trip.rb +1 -1
  33. metadata +5 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b6e77fc4eb6d79071cb1d309530adbd49a4364265b446d139c18eac2da3bce54
4
- data.tar.gz: 07a84837196bde12cc9d91ccd5084b8a173a4da5c237eeaea06a6d786c2f876c
3
+ metadata.gz: e528645f0bdb38f707239ed1862dc74944dc7eba4149cf9f1243458e98591edb
4
+ data.tar.gz: 268acd80823a507d3a86e83bfeaef55a8145819822aac9903f0bc96cee29d55c
5
5
  SHA512:
6
- metadata.gz: 69d2d12389a4e254b5ee8f4d67d3ddd406abeabebe1874b343acb4cd7c560c35284c8a58a4d2ee58cc788cfafd68989e88e783aa79fa3c32dc0f9dd389a276ac
7
- data.tar.gz: 7f034ef9a649446198793d8d8221a1bba5e22069f2feec85c5d30529170f8a87315540f481bc8b1219c03aaba78e42de67bbf94e25fd3e4ab93b6f221a356ca4
6
+ metadata.gz: 624f718300d0877f0d610a3fbf953b324d5bfd6a73c68de30db69ddc75b45e1b396eeb5bdfdcfc7d0ecac2d50c8301d7011f118a2d23d4ac02d11c2e54e92e6a
7
+ data.tar.gz: 849b5851b2ca0b37313d8f7c743defbc168f698284c694c0a467536dbffd9cc10f5ec263fd2c28e1f287a926c6acbe3756c8753f6f3ae303dc61fc9678981ff1
@@ -12,6 +12,16 @@ module Coradoc
12
12
  @rejected_named = []
13
13
  end
14
14
 
15
+ def inspect
16
+ "AttributeList: " +
17
+ [
18
+ @positional.map(&:inspect).join(", "),
19
+ @named.map { |k, v| "#{k}: #{v.inspect}" }.join(", "),
20
+ (@rejected_positional.empty? or "rejected: #{@rejected_positional.inspect}"),
21
+ (@rejected_positional.empty? or "rejected: #{@rejected_named.inspect}"),
22
+ ].reject { |i| i == true || i.empty? }.join(", ")
23
+ end
24
+
15
25
  def add_positional(*attr)
16
26
  @positional += attr
17
27
  end
@@ -65,7 +75,9 @@ module Coradoc
65
75
 
66
76
  adoc = +""
67
77
  if !@positional.empty?
68
- adoc << @positional.map { |p| [nil, ""].include?(p) ? '""' : p }.join(",")
78
+ adoc << @positional.map do |p|
79
+ [nil, ""].include?(p) ? '""' : p
80
+ end.join(",")
69
81
  end
70
82
  adoc << "," if @positional.any? && @named.any?
71
83
  adoc << @named.map do |k, v|
@@ -14,6 +14,8 @@ module Coradoc
14
14
  when Coradoc::Element::Section
15
15
  return content unless i.safe_to_collapse?
16
16
 
17
+ collected_content << i.anchor if i.anchor
18
+
17
19
  simplified = simplify_block_content(i.contents)
18
20
 
19
21
  if simplified && !simplified.empty?
@@ -61,12 +61,13 @@ module Coradoc
61
61
 
62
62
  def type_hash
63
63
  @type_hash ||= {
64
- "____" => :quote,
65
- "****" => :side,
66
- "----" => :source,
67
64
  "====" => :example,
68
65
  "...." => :literal,
66
+ "--" => :open,
69
67
  "++++" => :pass,
68
+ "____" => :quote,
69
+ "****" => :side,
70
+ "----" => :source,
70
71
  }
71
72
  end
72
73
  end
@@ -15,7 +15,7 @@ module Coradoc
15
15
  end
16
16
 
17
17
  def to_adoc
18
- "\n\n#{gen_anchor}#{gen_title}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
18
+ "\n\n#{gen_anchor}#{gen_title}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
19
19
  end
20
20
  end
21
21
  end
@@ -0,0 +1,21 @@
1
+ module Coradoc
2
+ module Element
3
+ module Block
4
+ class Listing < Core
5
+ def initialize(_title, options = {})
6
+ @id = options.fetch(:id, nil)
7
+ @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
8
+ @lang = options.fetch(:lang, "")
9
+ @attributes = options.fetch(:attributes, AttributeList.new)
10
+ @lines = options.fetch(:lines, [])
11
+ @delimiter_char = "-"
12
+ @delimiter_len = options.fetch(:delimiter_len, 4)
13
+ end
14
+
15
+ def to_adoc
16
+ "\n\n#{gen_anchor}#{gen_attributes}\n#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -2,16 +2,18 @@ module Coradoc
2
2
  module Element
3
3
  module Block
4
4
  class Literal < Core
5
- def initialize(_title, options = {})
5
+ def initialize(title, options = {})
6
+ @title = title
6
7
  @id = options.fetch(:id, nil)
7
8
  @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
9
+ @attributes = options.fetch(:attributes, AttributeList.new)
8
10
  @lines = options.fetch(:lines, [])
9
11
  @delimiter_char = "."
10
12
  @delimiter_len = options.fetch(:delimiter_len, 4)
11
13
  end
12
14
 
13
15
  def to_adoc
14
- "\n\n#{gen_anchor}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
16
+ "\n\n#{gen_anchor}#{gen_title}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
15
17
  end
16
18
  end
17
19
  end
@@ -0,0 +1,22 @@
1
+ module Coradoc
2
+ module Element
3
+ module Block
4
+ class Open < Core
5
+ def initialize(title, options = {})
6
+ @title = title
7
+ @id = options.fetch(:id, nil)
8
+ @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
9
+ @lang = options.fetch(:lang, "")
10
+ @attributes = options.fetch(:attributes, AttributeList.new)
11
+ @lines = options.fetch(:lines, [])
12
+ @delimiter_char = "-"
13
+ @delimiter_len = options.fetch(:delimiter_len, 2)
14
+ end
15
+
16
+ def to_adoc
17
+ "\n\n#{gen_anchor}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -8,8 +8,10 @@ end
8
8
  require_relative "block/core"
9
9
  require_relative "block/example"
10
10
  require_relative "block/literal"
11
- require_relative "block/quote"
11
+ require_relative "block/listing"
12
+ require_relative "block/open"
12
13
  require_relative "block/pass"
14
+ require_relative "block/quote"
13
15
  require_relative "block/side"
14
16
  require_relative "block/sourcecode"
15
17
  require_relative "block/reviewer_comment"
@@ -19,8 +19,8 @@ module Coradoc
19
19
  m = @items.select do |i|
20
20
  i.is_a?(Coradoc::Element::ListItem) &&
21
21
  !i.marker.nil?
22
- end.first&.marker
23
- @ol_count = m.size if m.is_a?(String)
22
+ end.first&.marker.to_s
23
+ @ol_count = m.size
24
24
  end
25
25
  @ol_count = 1 if @ol_count.nil?
26
26
  @attrs = options.fetch(:attrs, AttributeList.new)
@@ -7,6 +7,7 @@ module Coradoc
7
7
  end
8
8
 
9
9
  def prefix
10
+ return @marker if @marker
10
11
  "." * [@ol_count, 1].max
11
12
  end
12
13
  end
@@ -7,6 +7,7 @@ module Coradoc
7
7
  end
8
8
 
9
9
  def prefix
10
+ return @marker if @marker
10
11
  "*" * [@ol_count, 1].max
11
12
  end
12
13
  end
@@ -1,7 +1,7 @@
1
1
  module Coradoc
2
2
  module Element
3
3
  class ListItem < Base
4
- attr_accessor :marker, :id, :anchor, :content, :line_break
4
+ attr_accessor :marker, :id, :anchor, :content, :subitem, :line_break
5
5
 
6
6
  declare_children :content, :id, :anchor
7
7
 
@@ -10,11 +10,14 @@ module Coradoc
10
10
  @id = options.fetch(:id, nil)
11
11
  @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
12
12
  @content = content
13
+ @attached = options.fetch(:attached, [])
14
+ @nested = options.fetch(:nested, nil)
13
15
  @line_break = options.fetch(:line_break, "\n")
14
16
  end
15
17
 
16
18
  def to_adoc
17
- anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
19
+ anchor = @anchor.nil? ? "" : " #{@anchor.to_adoc.to_s} "
20
+ # text = Coradoc::Generator.gen_adoc(@content)
18
21
  content = Array(@content).map do |subitem|
19
22
  next if subitem.is_a? Inline::HardLineBreak
20
23
 
@@ -24,10 +27,15 @@ module Coradoc
24
27
  if Coradoc.a_single?(subitem, Coradoc::Element::TextElement)
25
28
  subcontent = Coradoc.strip_unicode(subcontent)
26
29
  end
27
- subcontent.chomp
30
+ subcontent
28
31
  end.compact.join("\n+\n")
29
-
30
- " #{anchor}#{content.chomp}#{@line_break}"
32
+ # attach = Coradoc::Generator.gen_adoc(@attached)
33
+ attach = @attached.map do |elem|
34
+ "+\n" + Coradoc::Generator.gen_adoc(elem)
35
+ end.join
36
+ nest = Coradoc::Generator.gen_adoc(@nested)
37
+ out = " #{anchor}#{content}#{@line_break}"
38
+ out + attach + nest
31
39
  end
32
40
  end
33
41
  end
@@ -1,7 +1,7 @@
1
1
  module Coradoc
2
2
  module Element
3
3
  class Section < Base
4
- attr_accessor :id, :title, :attrs, :contents, :sections
4
+ attr_accessor :id, :title, :attrs, :contents, :sections, :anchor
5
5
 
6
6
  declare_children :id, :title, :contents, :sections
7
7
 
@@ -49,7 +49,7 @@ module Coradoc
49
49
  # HTML element and if it happens inside some other block element, can be
50
50
  # safely collapsed.
51
51
  def safe_to_collapse?
52
- @title.nil? && @id.nil? && @sections.empty?
52
+ @title.nil? && @sections.empty?
53
53
  end
54
54
 
55
55
  private
@@ -15,6 +15,15 @@ module Coradoc
15
15
  end
16
16
  end
17
17
 
18
+ def inspect
19
+ str = "TextElement"
20
+ str += "(#{@id})" if @id
21
+ str += ": "
22
+ str += @content.inspect
23
+ str += " + #{@line_break.inspect}" unless line_break.to_s.empty?
24
+ str
25
+ end
26
+
18
27
  def to_adoc
19
28
  Coradoc::Generator.gen_adoc(@content) + @line_break
20
29
  end
@@ -72,14 +72,14 @@ module Coradoc::Input::HTML
72
72
  leading_whitespace = $1
73
73
  if !leading_whitespace.nil?
74
74
  first_text = node.at_xpath("./text()[1]")
75
- first_text.replace(first_text.text.lstrip)
75
+ first_text.replace(first_text.text.lstrip) if first_text
76
76
  leading_whitespace = " "
77
77
  end
78
78
  node.text =~ /(\s+)$/
79
79
  trailing_whitespace = $1
80
80
  if !trailing_whitespace.nil?
81
81
  last_text = node.at_xpath("./text()[last()]")
82
- last_text.replace(last_text.text.rstrip)
82
+ last_text.replace(last_text.text.rstrip) if last_text
83
83
  trailing_whitespace = " "
84
84
  end
85
85
  [leading_whitespace, trailing_whitespace]
@@ -10,5 +10,6 @@ module Coradoc::Input::HTML
10
10
 
11
11
  register :div, Div.new
12
12
  register :article, Div.new
13
+ register :center, Div.new
13
14
  end
14
15
  end
@@ -114,6 +114,12 @@ module Coradoc::Input::HTML
114
114
  columns = row.xpath("./td | ./th")
115
115
  column_id = 0
116
116
 
117
+ cell_references[i] ||= []
118
+ cell_matrix[i] ||= []
119
+
120
+ # Empty row support: pass row object via an instance variable
121
+ cell_references[i].instance_variable_set(:@row_obj, row)
122
+
117
123
  columns.each do |cell|
118
124
  colspan = cell["colspan"]&.to_i || 1
119
125
  rowspan = cell["rowspan"]&.to_i || 1
@@ -179,7 +185,7 @@ module Coradoc::Input::HTML
179
185
  min_rows.each do |row|
180
186
  break if row.length != cpr_min
181
187
 
182
- row_obj = row.last.first.parent
188
+ row_obj = row.last&.first&.parent || row.instance_variable_get(:@row_obj)
183
189
  doc = row_obj.document
184
190
  added_node = Nokogiri::XML::Node.new("td", doc)
185
191
  added_node["x-added"] = "x-added"
@@ -4,6 +4,8 @@ module Coradoc::Input::HTML
4
4
  # is compatible with what we would get out of Coradoc, if
5
5
  # it parsed it directly.
6
6
  class Postprocessor
7
+ Element = Coradoc::Element
8
+
7
9
  def self.process(coradoc)
8
10
  new(coradoc).process
9
11
  end
@@ -12,17 +14,74 @@ module Coradoc::Input::HTML
12
14
  @tree = coradoc
13
15
  end
14
16
 
17
+ # Extracts titles from lists. This happens in HTML files
18
+ # generated from DOCX documents by LibreOffice.
19
+ #
20
+ # We are interested in a particular tree:
21
+ # Element::List::Ordered items:
22
+ # Element::List::Ordered items: (any depth)
23
+ # Element::ListItem content:
24
+ # Element::Title
25
+ # (any number of other titles of the same scheme)
26
+ #
27
+ # This tree is flattened into:
28
+ # Element::Title
29
+ # Element::Title (any number of titles)
30
+ def extract_titles_from_lists
31
+ @tree = Element::Base.visit(@tree) do |elem, dir|
32
+ next elem unless dir == :pre
33
+ next elem unless elem.is_a?(Element::List::Ordered)
34
+ next elem if elem.items.length != 1
35
+
36
+ anchors = []
37
+ anchors << elem.anchor if elem.anchor
38
+
39
+ # Extract ListItem from any depth of List::Ordered
40
+ processed = elem
41
+ while processed.is_a?(Element::List::Ordered)
42
+ if processed.items.length != 1
43
+ backtrack = true
44
+ break
45
+ end
46
+ anchors << processed.anchor if processed.anchor
47
+ processed = processed.items.first
48
+ end
49
+
50
+ # Something went wrong? Anything not matching on the way?
51
+ next elem if backtrack
52
+ next elem unless processed.is_a?(Element::ListItem)
53
+
54
+ anchors << processed.anchor if processed.anchor
55
+
56
+ # Now we must have a title (or titles).
57
+ titles = processed.content.flatten
58
+
59
+ # Don't bother if there's no title in there.
60
+ next elem unless titles.any? { |i| i.is_a? Element::Title }
61
+
62
+ # Ordered is another iteration for our cleanup.
63
+ next elem unless titles.all? do |i|
64
+ i.is_a?(Element::Title) || i.is_a?(Element::List::Ordered)
65
+ end
66
+
67
+ # We are done now.
68
+ titles + anchors
69
+ end
70
+ end
71
+
15
72
  # Collapse DIVs that only have a title, or nest another DIV.
16
73
  def collapse_meaningless_sections
17
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
- if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
74
+ @tree = Element::Base.visit(@tree) do |elem, _dir|
75
+ if elem.is_a?(Element::Section) && elem.safe_to_collapse?
19
76
  children_classes = Array(elem.contents).map(&:class)
20
77
  count = children_classes.length
21
- safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
78
+ safe_classes = [Element::Section, Element::Title]
22
79
 
23
80
  # Count > 0 because some documents use <div> as a <br>.
24
81
  if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
- next elem.contents
82
+ contents = elem.contents.dup
83
+ contents.prepend(elem.anchor) if elem.anchor
84
+ next contents
26
85
  end
27
86
  end
28
87
  elem
@@ -32,12 +91,14 @@ module Coradoc::Input::HTML
32
91
  # tree should now be more cleaned up, so we can progress with
33
92
  # creating meaningful sections
34
93
  def generate_meaningful_sections
35
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
94
+ @tree = Element::Base.visit(@tree) do |elem, dir|
36
95
  # We are searching for an array, that has a title. This
37
96
  # will be a candidate for our section array.
38
97
  if dir == :post &&
39
98
  elem.is_a?(Array) &&
40
- !elem.grep(Coradoc::Element::Title).empty?
99
+ !elem.flatten.grep(Element::Title).empty?
100
+
101
+ elem = elem.flatten
41
102
 
42
103
  new_array = []
43
104
  content_array = new_array
@@ -47,12 +108,12 @@ module Coradoc::Input::HTML
47
108
  # all descendant sections into those sections. Otherwise, we push
48
109
  # an element as content of current section.
49
110
  elem.each do |e|
50
- if e.is_a? Coradoc::Element::Title
111
+ if e.is_a? Element::Title
51
112
  title = e
52
113
  content_array = []
53
114
  section_array = []
54
115
  level = title.level_int
55
- section = Coradoc::Element::Section.new(
116
+ section = Element::Section.new(
56
117
  title, contents: content_array, sections: section_array
57
118
  )
58
119
  # Some documents may not be consistent and eg. follow H4 after
@@ -82,11 +143,11 @@ module Coradoc::Input::HTML
82
143
  previous_sections = {}
83
144
 
84
145
  determine_section_id = ->(elem) do
85
- if elem.title.style == "appendix"
86
- level = "A"
87
- else
88
- level = 1
89
- end
146
+ level = if elem.title.style == "appendix"
147
+ "A"
148
+ else
149
+ 1
150
+ end
90
151
 
91
152
  section = previous_sections[elem]
92
153
  while section
@@ -102,8 +163,8 @@ module Coradoc::Input::HTML
102
163
  style
103
164
  end
104
165
 
105
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
106
- title = elem.title if elem.is_a?(Coradoc::Element::Section)
166
+ @tree = Element::Base.visit(@tree) do |elem, dir|
167
+ title = elem.title if elem.is_a?(Element::Section)
107
168
 
108
169
  if title && title.level_int <= max_level
109
170
  if dir == :pre
@@ -137,6 +198,7 @@ module Coradoc::Input::HTML
137
198
  end
138
199
 
139
200
  def process
201
+ extract_titles_from_lists
140
202
  collapse_meaningless_sections
141
203
  generate_meaningful_sections
142
204
  # Do it again to simplify the document further.
@@ -11,8 +11,13 @@ module Coradoc
11
11
  match('[^\],]').repeat(1)
12
12
  end
13
13
 
14
+ def named_key
15
+ (str('reviewer') |
16
+ match('[a-zA-Z0-9_-]').repeat(1)).as(:named_key)
17
+ end
18
+
14
19
  def named_attribute
15
- (match('[a-zA-Z0-9_-]').repeat(1).as(:named_key) >>
20
+ ( named_key >>
16
21
  str(' ').maybe >> str("=") >> str(' ').maybe >>
17
22
  match['a-zA-Z0-9_\- \"'].repeat(1).as(:named_value) >>
18
23
  str(' ').maybe
@@ -51,6 +56,7 @@ module Coradoc
51
56
  end
52
57
 
53
58
  def attribute_list(name = :attribute_list)
59
+ str('[').present? >>
54
60
  str('[') >> str("[").absent? >>
55
61
  ( named_many |
56
62
  positional_one_named_many |