coradoc 1.1.2 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/element/attribute_list.rb +13 -1
  3. data/lib/coradoc/element/base.rb +2 -0
  4. data/lib/coradoc/element/block/core.rb +4 -3
  5. data/lib/coradoc/element/block/example.rb +1 -1
  6. data/lib/coradoc/element/block/listing.rb +21 -0
  7. data/lib/coradoc/element/block/literal.rb +4 -2
  8. data/lib/coradoc/element/block/open.rb +22 -0
  9. data/lib/coradoc/element/block.rb +3 -1
  10. data/lib/coradoc/element/list/core.rb +2 -2
  11. data/lib/coradoc/element/list/ordered.rb +1 -0
  12. data/lib/coradoc/element/list/unordered.rb +1 -0
  13. data/lib/coradoc/element/list_item.rb +13 -5
  14. data/lib/coradoc/element/section.rb +2 -2
  15. data/lib/coradoc/element/text_element.rb +9 -0
  16. data/lib/coradoc/input/html/converters/base.rb +2 -2
  17. data/lib/coradoc/input/html/converters/div.rb +1 -0
  18. data/lib/coradoc/input/html/converters/table.rb +7 -1
  19. data/lib/coradoc/input/html/postprocessor.rb +77 -15
  20. data/lib/coradoc/parser/asciidoc/attribute_list.rb +7 -1
  21. data/lib/coradoc/parser/asciidoc/base.rb +52 -134
  22. data/lib/coradoc/parser/asciidoc/block.rb +51 -38
  23. data/lib/coradoc/parser/asciidoc/content.rb +13 -3
  24. data/lib/coradoc/parser/asciidoc/list.rb +56 -22
  25. data/lib/coradoc/parser/asciidoc/paragraph.rb +16 -4
  26. data/lib/coradoc/parser/asciidoc/section.rb +3 -1
  27. data/lib/coradoc/parser/asciidoc/term.rb +2 -0
  28. data/lib/coradoc/parser/asciidoc/text.rb +161 -0
  29. data/lib/coradoc/parser/base.rb +4 -28
  30. data/lib/coradoc/transformer.rb +23 -39
  31. data/lib/coradoc/version.rb +1 -1
  32. data/utils/round_trip.rb +1 -1
  33. metadata +5 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b6e77fc4eb6d79071cb1d309530adbd49a4364265b446d139c18eac2da3bce54
4
- data.tar.gz: 07a84837196bde12cc9d91ccd5084b8a173a4da5c237eeaea06a6d786c2f876c
3
+ metadata.gz: e528645f0bdb38f707239ed1862dc74944dc7eba4149cf9f1243458e98591edb
4
+ data.tar.gz: 268acd80823a507d3a86e83bfeaef55a8145819822aac9903f0bc96cee29d55c
5
5
  SHA512:
6
- metadata.gz: 69d2d12389a4e254b5ee8f4d67d3ddd406abeabebe1874b343acb4cd7c560c35284c8a58a4d2ee58cc788cfafd68989e88e783aa79fa3c32dc0f9dd389a276ac
7
- data.tar.gz: 7f034ef9a649446198793d8d8221a1bba5e22069f2feec85c5d30529170f8a87315540f481bc8b1219c03aaba78e42de67bbf94e25fd3e4ab93b6f221a356ca4
6
+ metadata.gz: 624f718300d0877f0d610a3fbf953b324d5bfd6a73c68de30db69ddc75b45e1b396eeb5bdfdcfc7d0ecac2d50c8301d7011f118a2d23d4ac02d11c2e54e92e6a
7
+ data.tar.gz: 849b5851b2ca0b37313d8f7c743defbc168f698284c694c0a467536dbffd9cc10f5ec263fd2c28e1f287a926c6acbe3756c8753f6f3ae303dc61fc9678981ff1
@@ -12,6 +12,16 @@ module Coradoc
12
12
  @rejected_named = []
13
13
  end
14
14
 
15
+ def inspect
16
+ "AttributeList: " +
17
+ [
18
+ @positional.map(&:inspect).join(", "),
19
+ @named.map { |k, v| "#{k}: #{v.inspect}" }.join(", "),
20
+ (@rejected_positional.empty? or "rejected: #{@rejected_positional.inspect}"),
21
+ (@rejected_positional.empty? or "rejected: #{@rejected_named.inspect}"),
22
+ ].reject { |i| i == true || i.empty? }.join(", ")
23
+ end
24
+
15
25
  def add_positional(*attr)
16
26
  @positional += attr
17
27
  end
@@ -65,7 +75,9 @@ module Coradoc
65
75
 
66
76
  adoc = +""
67
77
  if !@positional.empty?
68
- adoc << @positional.map { |p| [nil, ""].include?(p) ? '""' : p }.join(",")
78
+ adoc << @positional.map do |p|
79
+ [nil, ""].include?(p) ? '""' : p
80
+ end.join(",")
69
81
  end
70
82
  adoc << "," if @positional.any? && @named.any?
71
83
  adoc << @named.map do |k, v|
@@ -14,6 +14,8 @@ module Coradoc
14
14
  when Coradoc::Element::Section
15
15
  return content unless i.safe_to_collapse?
16
16
 
17
+ collected_content << i.anchor if i.anchor
18
+
17
19
  simplified = simplify_block_content(i.contents)
18
20
 
19
21
  if simplified && !simplified.empty?
@@ -61,12 +61,13 @@ module Coradoc
61
61
 
62
62
  def type_hash
63
63
  @type_hash ||= {
64
- "____" => :quote,
65
- "****" => :side,
66
- "----" => :source,
67
64
  "====" => :example,
68
65
  "...." => :literal,
66
+ "--" => :open,
69
67
  "++++" => :pass,
68
+ "____" => :quote,
69
+ "****" => :side,
70
+ "----" => :source,
70
71
  }
71
72
  end
72
73
  end
@@ -15,7 +15,7 @@ module Coradoc
15
15
  end
16
16
 
17
17
  def to_adoc
18
- "\n\n#{gen_anchor}#{gen_title}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
18
+ "\n\n#{gen_anchor}#{gen_title}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
19
19
  end
20
20
  end
21
21
  end
@@ -0,0 +1,21 @@
1
+ module Coradoc
2
+ module Element
3
+ module Block
4
+ class Listing < Core
5
+ def initialize(_title, options = {})
6
+ @id = options.fetch(:id, nil)
7
+ @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
8
+ @lang = options.fetch(:lang, "")
9
+ @attributes = options.fetch(:attributes, AttributeList.new)
10
+ @lines = options.fetch(:lines, [])
11
+ @delimiter_char = "-"
12
+ @delimiter_len = options.fetch(:delimiter_len, 4)
13
+ end
14
+
15
+ def to_adoc
16
+ "\n\n#{gen_anchor}#{gen_attributes}\n#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -2,16 +2,18 @@ module Coradoc
2
2
  module Element
3
3
  module Block
4
4
  class Literal < Core
5
- def initialize(_title, options = {})
5
+ def initialize(title, options = {})
6
+ @title = title
6
7
  @id = options.fetch(:id, nil)
7
8
  @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
9
+ @attributes = options.fetch(:attributes, AttributeList.new)
8
10
  @lines = options.fetch(:lines, [])
9
11
  @delimiter_char = "."
10
12
  @delimiter_len = options.fetch(:delimiter_len, 4)
11
13
  end
12
14
 
13
15
  def to_adoc
14
- "\n\n#{gen_anchor}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
16
+ "\n\n#{gen_anchor}#{gen_title}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
15
17
  end
16
18
  end
17
19
  end
@@ -0,0 +1,22 @@
1
+ module Coradoc
2
+ module Element
3
+ module Block
4
+ class Open < Core
5
+ def initialize(title, options = {})
6
+ @title = title
7
+ @id = options.fetch(:id, nil)
8
+ @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
9
+ @lang = options.fetch(:lang, "")
10
+ @attributes = options.fetch(:attributes, AttributeList.new)
11
+ @lines = options.fetch(:lines, [])
12
+ @delimiter_char = "-"
13
+ @delimiter_len = options.fetch(:delimiter_len, 2)
14
+ end
15
+
16
+ def to_adoc
17
+ "\n\n#{gen_anchor}#{gen_attributes}#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -8,8 +8,10 @@ end
8
8
  require_relative "block/core"
9
9
  require_relative "block/example"
10
10
  require_relative "block/literal"
11
- require_relative "block/quote"
11
+ require_relative "block/listing"
12
+ require_relative "block/open"
12
13
  require_relative "block/pass"
14
+ require_relative "block/quote"
13
15
  require_relative "block/side"
14
16
  require_relative "block/sourcecode"
15
17
  require_relative "block/reviewer_comment"
@@ -19,8 +19,8 @@ module Coradoc
19
19
  m = @items.select do |i|
20
20
  i.is_a?(Coradoc::Element::ListItem) &&
21
21
  !i.marker.nil?
22
- end.first&.marker
23
- @ol_count = m.size if m.is_a?(String)
22
+ end.first&.marker.to_s
23
+ @ol_count = m.size
24
24
  end
25
25
  @ol_count = 1 if @ol_count.nil?
26
26
  @attrs = options.fetch(:attrs, AttributeList.new)
@@ -7,6 +7,7 @@ module Coradoc
7
7
  end
8
8
 
9
9
  def prefix
10
+ return @marker if @marker
10
11
  "." * [@ol_count, 1].max
11
12
  end
12
13
  end
@@ -7,6 +7,7 @@ module Coradoc
7
7
  end
8
8
 
9
9
  def prefix
10
+ return @marker if @marker
10
11
  "*" * [@ol_count, 1].max
11
12
  end
12
13
  end
@@ -1,7 +1,7 @@
1
1
  module Coradoc
2
2
  module Element
3
3
  class ListItem < Base
4
- attr_accessor :marker, :id, :anchor, :content, :line_break
4
+ attr_accessor :marker, :id, :anchor, :content, :subitem, :line_break
5
5
 
6
6
  declare_children :content, :id, :anchor
7
7
 
@@ -10,11 +10,14 @@ module Coradoc
10
10
  @id = options.fetch(:id, nil)
11
11
  @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
12
12
  @content = content
13
+ @attached = options.fetch(:attached, [])
14
+ @nested = options.fetch(:nested, nil)
13
15
  @line_break = options.fetch(:line_break, "\n")
14
16
  end
15
17
 
16
18
  def to_adoc
17
- anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
19
+ anchor = @anchor.nil? ? "" : " #{@anchor.to_adoc.to_s} "
20
+ # text = Coradoc::Generator.gen_adoc(@content)
18
21
  content = Array(@content).map do |subitem|
19
22
  next if subitem.is_a? Inline::HardLineBreak
20
23
 
@@ -24,10 +27,15 @@ module Coradoc
24
27
  if Coradoc.a_single?(subitem, Coradoc::Element::TextElement)
25
28
  subcontent = Coradoc.strip_unicode(subcontent)
26
29
  end
27
- subcontent.chomp
30
+ subcontent
28
31
  end.compact.join("\n+\n")
29
-
30
- " #{anchor}#{content.chomp}#{@line_break}"
32
+ # attach = Coradoc::Generator.gen_adoc(@attached)
33
+ attach = @attached.map do |elem|
34
+ "+\n" + Coradoc::Generator.gen_adoc(elem)
35
+ end.join
36
+ nest = Coradoc::Generator.gen_adoc(@nested)
37
+ out = " #{anchor}#{content}#{@line_break}"
38
+ out + attach + nest
31
39
  end
32
40
  end
33
41
  end
@@ -1,7 +1,7 @@
1
1
  module Coradoc
2
2
  module Element
3
3
  class Section < Base
4
- attr_accessor :id, :title, :attrs, :contents, :sections
4
+ attr_accessor :id, :title, :attrs, :contents, :sections, :anchor
5
5
 
6
6
  declare_children :id, :title, :contents, :sections
7
7
 
@@ -49,7 +49,7 @@ module Coradoc
49
49
  # HTML element and if it happens inside some other block element, can be
50
50
  # safely collapsed.
51
51
  def safe_to_collapse?
52
- @title.nil? && @id.nil? && @sections.empty?
52
+ @title.nil? && @sections.empty?
53
53
  end
54
54
 
55
55
  private
@@ -15,6 +15,15 @@ module Coradoc
15
15
  end
16
16
  end
17
17
 
18
+ def inspect
19
+ str = "TextElement"
20
+ str += "(#{@id})" if @id
21
+ str += ": "
22
+ str += @content.inspect
23
+ str += " + #{@line_break.inspect}" unless line_break.to_s.empty?
24
+ str
25
+ end
26
+
18
27
  def to_adoc
19
28
  Coradoc::Generator.gen_adoc(@content) + @line_break
20
29
  end
@@ -72,14 +72,14 @@ module Coradoc::Input::HTML
72
72
  leading_whitespace = $1
73
73
  if !leading_whitespace.nil?
74
74
  first_text = node.at_xpath("./text()[1]")
75
- first_text.replace(first_text.text.lstrip)
75
+ first_text.replace(first_text.text.lstrip) if first_text
76
76
  leading_whitespace = " "
77
77
  end
78
78
  node.text =~ /(\s+)$/
79
79
  trailing_whitespace = $1
80
80
  if !trailing_whitespace.nil?
81
81
  last_text = node.at_xpath("./text()[last()]")
82
- last_text.replace(last_text.text.rstrip)
82
+ last_text.replace(last_text.text.rstrip) if last_text
83
83
  trailing_whitespace = " "
84
84
  end
85
85
  [leading_whitespace, trailing_whitespace]
@@ -10,5 +10,6 @@ module Coradoc::Input::HTML
10
10
 
11
11
  register :div, Div.new
12
12
  register :article, Div.new
13
+ register :center, Div.new
13
14
  end
14
15
  end
@@ -114,6 +114,12 @@ module Coradoc::Input::HTML
114
114
  columns = row.xpath("./td | ./th")
115
115
  column_id = 0
116
116
 
117
+ cell_references[i] ||= []
118
+ cell_matrix[i] ||= []
119
+
120
+ # Empty row support: pass row object via an instance variable
121
+ cell_references[i].instance_variable_set(:@row_obj, row)
122
+
117
123
  columns.each do |cell|
118
124
  colspan = cell["colspan"]&.to_i || 1
119
125
  rowspan = cell["rowspan"]&.to_i || 1
@@ -179,7 +185,7 @@ module Coradoc::Input::HTML
179
185
  min_rows.each do |row|
180
186
  break if row.length != cpr_min
181
187
 
182
- row_obj = row.last.first.parent
188
+ row_obj = row.last&.first&.parent || row.instance_variable_get(:@row_obj)
183
189
  doc = row_obj.document
184
190
  added_node = Nokogiri::XML::Node.new("td", doc)
185
191
  added_node["x-added"] = "x-added"
@@ -4,6 +4,8 @@ module Coradoc::Input::HTML
4
4
  # is compatible with what we would get out of Coradoc, if
5
5
  # it parsed it directly.
6
6
  class Postprocessor
7
+ Element = Coradoc::Element
8
+
7
9
  def self.process(coradoc)
8
10
  new(coradoc).process
9
11
  end
@@ -12,17 +14,74 @@ module Coradoc::Input::HTML
12
14
  @tree = coradoc
13
15
  end
14
16
 
17
+ # Extracts titles from lists. This happens in HTML files
18
+ # generated from DOCX documents by LibreOffice.
19
+ #
20
+ # We are interested in a particular tree:
21
+ # Element::List::Ordered items:
22
+ # Element::List::Ordered items: (any depth)
23
+ # Element::ListItem content:
24
+ # Element::Title
25
+ # (any number of other titles of the same scheme)
26
+ #
27
+ # This tree is flattened into:
28
+ # Element::Title
29
+ # Element::Title (any number of titles)
30
+ def extract_titles_from_lists
31
+ @tree = Element::Base.visit(@tree) do |elem, dir|
32
+ next elem unless dir == :pre
33
+ next elem unless elem.is_a?(Element::List::Ordered)
34
+ next elem if elem.items.length != 1
35
+
36
+ anchors = []
37
+ anchors << elem.anchor if elem.anchor
38
+
39
+ # Extract ListItem from any depth of List::Ordered
40
+ processed = elem
41
+ while processed.is_a?(Element::List::Ordered)
42
+ if processed.items.length != 1
43
+ backtrack = true
44
+ break
45
+ end
46
+ anchors << processed.anchor if processed.anchor
47
+ processed = processed.items.first
48
+ end
49
+
50
+ # Something went wrong? Anything not matching on the way?
51
+ next elem if backtrack
52
+ next elem unless processed.is_a?(Element::ListItem)
53
+
54
+ anchors << processed.anchor if processed.anchor
55
+
56
+ # Now we must have a title (or titles).
57
+ titles = processed.content.flatten
58
+
59
+ # Don't bother if there's no title in there.
60
+ next elem unless titles.any? { |i| i.is_a? Element::Title }
61
+
62
+ # Ordered is another iteration for our cleanup.
63
+ next elem unless titles.all? do |i|
64
+ i.is_a?(Element::Title) || i.is_a?(Element::List::Ordered)
65
+ end
66
+
67
+ # We are done now.
68
+ titles + anchors
69
+ end
70
+ end
71
+
15
72
  # Collapse DIVs that only have a title, or nest another DIV.
16
73
  def collapse_meaningless_sections
17
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
- if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
74
+ @tree = Element::Base.visit(@tree) do |elem, _dir|
75
+ if elem.is_a?(Element::Section) && elem.safe_to_collapse?
19
76
  children_classes = Array(elem.contents).map(&:class)
20
77
  count = children_classes.length
21
- safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
78
+ safe_classes = [Element::Section, Element::Title]
22
79
 
23
80
  # Count > 0 because some documents use <div> as a <br>.
24
81
  if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
- next elem.contents
82
+ contents = elem.contents.dup
83
+ contents.prepend(elem.anchor) if elem.anchor
84
+ next contents
26
85
  end
27
86
  end
28
87
  elem
@@ -32,12 +91,14 @@ module Coradoc::Input::HTML
32
91
  # tree should now be more cleaned up, so we can progress with
33
92
  # creating meaningful sections
34
93
  def generate_meaningful_sections
35
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
94
+ @tree = Element::Base.visit(@tree) do |elem, dir|
36
95
  # We are searching for an array, that has a title. This
37
96
  # will be a candidate for our section array.
38
97
  if dir == :post &&
39
98
  elem.is_a?(Array) &&
40
- !elem.grep(Coradoc::Element::Title).empty?
99
+ !elem.flatten.grep(Element::Title).empty?
100
+
101
+ elem = elem.flatten
41
102
 
42
103
  new_array = []
43
104
  content_array = new_array
@@ -47,12 +108,12 @@ module Coradoc::Input::HTML
47
108
  # all descendant sections into those sections. Otherwise, we push
48
109
  # an element as content of current section.
49
110
  elem.each do |e|
50
- if e.is_a? Coradoc::Element::Title
111
+ if e.is_a? Element::Title
51
112
  title = e
52
113
  content_array = []
53
114
  section_array = []
54
115
  level = title.level_int
55
- section = Coradoc::Element::Section.new(
116
+ section = Element::Section.new(
56
117
  title, contents: content_array, sections: section_array
57
118
  )
58
119
  # Some documents may not be consistent and eg. follow H4 after
@@ -82,11 +143,11 @@ module Coradoc::Input::HTML
82
143
  previous_sections = {}
83
144
 
84
145
  determine_section_id = ->(elem) do
85
- if elem.title.style == "appendix"
86
- level = "A"
87
- else
88
- level = 1
89
- end
146
+ level = if elem.title.style == "appendix"
147
+ "A"
148
+ else
149
+ 1
150
+ end
90
151
 
91
152
  section = previous_sections[elem]
92
153
  while section
@@ -102,8 +163,8 @@ module Coradoc::Input::HTML
102
163
  style
103
164
  end
104
165
 
105
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
106
- title = elem.title if elem.is_a?(Coradoc::Element::Section)
166
+ @tree = Element::Base.visit(@tree) do |elem, dir|
167
+ title = elem.title if elem.is_a?(Element::Section)
107
168
 
108
169
  if title && title.level_int <= max_level
109
170
  if dir == :pre
@@ -137,6 +198,7 @@ module Coradoc::Input::HTML
137
198
  end
138
199
 
139
200
  def process
201
+ extract_titles_from_lists
140
202
  collapse_meaningless_sections
141
203
  generate_meaningful_sections
142
204
  # Do it again to simplify the document further.
@@ -11,8 +11,13 @@ module Coradoc
11
11
  match('[^\],]').repeat(1)
12
12
  end
13
13
 
14
+ def named_key
15
+ (str('reviewer') |
16
+ match('[a-zA-Z0-9_-]').repeat(1)).as(:named_key)
17
+ end
18
+
14
19
  def named_attribute
15
- (match('[a-zA-Z0-9_-]').repeat(1).as(:named_key) >>
20
+ ( named_key >>
16
21
  str(' ').maybe >> str("=") >> str(' ').maybe >>
17
22
  match['a-zA-Z0-9_\- \"'].repeat(1).as(:named_value) >>
18
23
  str(' ').maybe
@@ -51,6 +56,7 @@ module Coradoc
51
56
  end
52
57
 
53
58
  def attribute_list(name = :attribute_list)
59
+ str('[').present? >>
54
60
  str('[') >> str("[").absent? >>
55
61
  ( named_many |
56
62
  positional_one_named_many |