coradoc 1.1.1 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0187fda1ca8860383e973840ca92de0d24b7de0f4b548eb92364b83dc76773c2'
4
- data.tar.gz: bcccecebdf83ef11138182fc66ad27f5e24bb01d0b2824fa8b470d3c32ba413a
3
+ metadata.gz: 4f515fbc05baa87f58f84a59737c9818603c6e9f0fc8835cdeb9bd6be9eb39e4
4
+ data.tar.gz: ce51ff395a3dfb4bf77c37f6b54fe745a14a7262911df50479088d43392d4927
5
5
  SHA512:
6
- metadata.gz: 37f09a7b2ba8fc861c4cf88f8c5638e6c654cc19a65b1b6c68314c6d9ff1b45f046559d573a849ee972b4dfa0422384271c1951ce5baa02491dc1c03000c2e77
7
- data.tar.gz: c6844f3ccad625662b838cba656fa6e1f773675548d4fe664278f93569f7cf0ae86edcd70e26de2e60cb487090d3873720bc0c6e0f6892f8559e77a78129af4d
6
+ metadata.gz: 3269512745aea59b9780e5df2d872af7c3f19851666868335bc4876a534e50c64df410c5257a335324e9a9e0ad573dacf00058752ee00fd4b830f2f150cadeb2
7
+ data.tar.gz: 00e4122ec5e234e8e7e54d3e5769e03df15ad0f9851059576dadde7de58c0e2dd87f3c18ab14283574347bda6c95536cf31ee3ce6c202ea8b4c3f0da96f3f399
data/coradoc.gemspec CHANGED
@@ -36,7 +36,7 @@ Gem::Specification.new do |spec|
36
36
  spec.add_dependency "premailer", "~> 1.11.0"
37
37
  spec.add_dependency "word-to-markdown"
38
38
  spec.add_dependency "base64"
39
- spec.add_dependency "thor"
39
+ spec.add_dependency "thor", ">= 1.3.0"
40
40
  spec.add_development_dependency "codeclimate-test-reporter"
41
41
  spec.add_development_dependency "pry"
42
42
  spec.add_development_dependency "rake"
@@ -12,6 +12,16 @@ module Coradoc
12
12
  @rejected_named = []
13
13
  end
14
14
 
15
+ def inspect
16
+ "AttributeList: " +
17
+ [
18
+ @positional.map(&:inspect).join(", "),
19
+ @named.map { |k, v| "#{k}: #{v.inspect}" }.join(", "),
20
+ (@rejected_positional.empty? or "rejected: #{@rejected_positional.inspect}"),
21
+ (@rejected_positional.empty? or "rejected: #{@rejected_named.inspect}"),
22
+ ].reject { |i| i == true || i.empty? }.join(", ")
23
+ end
24
+
15
25
  def add_positional(*attr)
16
26
  @positional += attr
17
27
  end
@@ -65,7 +75,9 @@ module Coradoc
65
75
 
66
76
  adoc = +""
67
77
  if !@positional.empty?
68
- adoc << @positional.map { |p| [nil, ""].include?(p) ? '""' : p }.join(",")
78
+ adoc << @positional.map do |p|
79
+ [nil, ""].include?(p) ? '""' : p
80
+ end.join(",")
69
81
  end
70
82
  adoc << "," if @positional.any? && @named.any?
71
83
  adoc << @named.map do |k, v|
@@ -14,6 +14,8 @@ module Coradoc
14
14
  when Coradoc::Element::Section
15
15
  return content unless i.safe_to_collapse?
16
16
 
17
+ collected_content << i.anchor if i.anchor
18
+
17
19
  simplified = simplify_block_content(i.contents)
18
20
 
19
21
  if simplified && !simplified.empty?
@@ -1,7 +1,7 @@
1
1
  module Coradoc
2
2
  module Element
3
3
  class Section < Base
4
- attr_accessor :id, :title, :attrs, :contents, :sections
4
+ attr_accessor :id, :title, :attrs, :contents, :sections, :anchor
5
5
 
6
6
  declare_children :id, :title, :contents, :sections
7
7
 
@@ -49,7 +49,7 @@ module Coradoc
49
49
  # HTML element and if it happens inside some other block element, can be
50
50
  # safely collapsed.
51
51
  def safe_to_collapse?
52
- @title.nil? && @id.nil? && @sections.empty?
52
+ @title.nil? && @sections.empty?
53
53
  end
54
54
 
55
55
  private
@@ -15,6 +15,15 @@ module Coradoc
15
15
  end
16
16
  end
17
17
 
18
+ def inspect
19
+ str = "TextElement"
20
+ str += "(#{@id})" if @id
21
+ str += ": "
22
+ str += @content.inspect
23
+ str += " + #{@line_break.inspect}" unless line_break.empty?
24
+ str
25
+ end
26
+
18
27
  def to_adoc
19
28
  Coradoc::Generator.gen_adoc(@content) + @line_break
20
29
  end
@@ -10,9 +10,9 @@ module Coradoc::Input::HTML
10
10
 
11
11
  # Note: treat_children won't run plugin hooks
12
12
  def treat_children(node, state)
13
- node.children.inject("") do |memo, child|
14
- memo << treat(child, state)
15
- end
13
+ node.children.map do |child|
14
+ treat(child, state)
15
+ end.join
16
16
  end
17
17
 
18
18
  def treat(node, state)
@@ -20,9 +20,9 @@ module Coradoc::Input::HTML
20
20
  end
21
21
 
22
22
  def treat_children_coradoc(node, state)
23
- node.children.inject([]) do |memo, child|
24
- memo << treat_coradoc(child, state)
25
- end.flatten.reject { |x| x == "" || x.nil? }
23
+ node.children.map do |child|
24
+ treat_coradoc(child, state)
25
+ end.flatten.reject { |x| x.to_s.empty? }
26
26
  end
27
27
 
28
28
  def treat_coradoc(node, state)
@@ -72,14 +72,14 @@ module Coradoc::Input::HTML
72
72
  leading_whitespace = $1
73
73
  if !leading_whitespace.nil?
74
74
  first_text = node.at_xpath("./text()[1]")
75
- first_text.replace(first_text.text.lstrip)
75
+ first_text.replace(first_text.text.lstrip) if first_text
76
76
  leading_whitespace = " "
77
77
  end
78
78
  node.text =~ /(\s+)$/
79
79
  trailing_whitespace = $1
80
80
  if !trailing_whitespace.nil?
81
81
  last_text = node.at_xpath("./text()[last()]")
82
- last_text.replace(last_text.text.rstrip)
82
+ last_text.replace(last_text.text.rstrip) if last_text
83
83
  trailing_whitespace = " "
84
84
  end
85
85
  [leading_whitespace, trailing_whitespace]
@@ -10,5 +10,6 @@ module Coradoc::Input::HTML
10
10
 
11
11
  register :div, Div.new
12
12
  register :article, Div.new
13
+ register :center, Div.new
13
14
  end
14
15
  end
@@ -6,7 +6,9 @@ module Coradoc::Input::HTML
6
6
  internal_anchor = treat_children_anchors(node, state)
7
7
 
8
8
  if id.to_s.empty? && internal_anchor.size.positive?
9
- id = internal_anchor.first.id
9
+ if internal_anchor.first.respond_to? :id
10
+ id = internal_anchor.first.id
11
+ end
10
12
  end
11
13
 
12
14
  level = node.name[/\d/].to_i
@@ -16,14 +18,14 @@ module Coradoc::Input::HTML
16
18
  end
17
19
 
18
20
  def treat_children_no_anchors(node, state)
19
- node.children.reject { |a| a.name == "a" }.inject([]) do |memo, child|
20
- memo << treat_coradoc(child, state)
21
+ node.children.reject { |a| a.name == "a" }.map do |child|
22
+ treat_coradoc(child, state)
21
23
  end
22
24
  end
23
25
 
24
26
  def treat_children_anchors(node, state)
25
- node.children.select { |a| a.name == "a" }.inject([]) do |memo, child|
26
- memo << treat_coradoc(child, state)
27
+ node.children.select { |a| a.name == "a" }.map do |child|
28
+ treat_coradoc(child, state)
27
29
  end
28
30
  end
29
31
  end
@@ -4,6 +4,8 @@ module Coradoc::Input::HTML
4
4
  # is compatible with what we would get out of Coradoc, if
5
5
  # it parsed it directly.
6
6
  class Postprocessor
7
+ Element = Coradoc::Element
8
+
7
9
  def self.process(coradoc)
8
10
  new(coradoc).process
9
11
  end
@@ -12,17 +14,74 @@ module Coradoc::Input::HTML
12
14
  @tree = coradoc
13
15
  end
14
16
 
17
+ # Extracts titles from lists. This happens in HTML files
18
+ # generated from DOCX documents by LibreOffice.
19
+ #
20
+ # We are interested in a particular tree:
21
+ # Element::List::Ordered items:
22
+ # Element::List::Ordered items: (any depth)
23
+ # Element::ListItem content:
24
+ # Element::Title
25
+ # (any number of other titles of the same scheme)
26
+ #
27
+ # This tree is flattened into:
28
+ # Element::Title
29
+ # Element::Title (any number of titles)
30
+ def extract_titles_from_lists
31
+ @tree = Element::Base.visit(@tree) do |elem, dir|
32
+ next elem unless dir == :pre
33
+ next elem unless elem.is_a?(Element::List::Ordered)
34
+ next elem if elem.items.length != 1
35
+
36
+ anchors = []
37
+ anchors << elem.anchor if elem.anchor
38
+
39
+ # Extract ListItem from any depth of List::Ordered
40
+ processed = elem
41
+ while processed.is_a?(Element::List::Ordered)
42
+ if processed.items.length != 1
43
+ backtrack = true
44
+ break
45
+ end
46
+ anchors << processed.anchor if processed.anchor
47
+ processed = processed.items.first
48
+ end
49
+
50
+ # Something went wrong? Anything not matching on the way?
51
+ next elem if backtrack
52
+ next elem unless processed.is_a?(Element::ListItem)
53
+
54
+ anchors << processed.anchor if processed.anchor
55
+
56
+ # Now we must have a title (or titles).
57
+ titles = processed.content.flatten
58
+
59
+ # Don't bother if there's no title in there.
60
+ next elem unless titles.any? { |i| i.is_a? Element::Title }
61
+
62
+ # Ordered is another iteration for our cleanup.
63
+ next elem unless titles.all? do |i|
64
+ i.is_a?(Element::Title) || i.is_a?(Element::List::Ordered)
65
+ end
66
+
67
+ # We are done now.
68
+ titles + anchors
69
+ end
70
+ end
71
+
15
72
  # Collapse DIVs that only have a title, or nest another DIV.
16
73
  def collapse_meaningless_sections
17
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
- if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
74
+ @tree = Element::Base.visit(@tree) do |elem, _dir|
75
+ if elem.is_a?(Element::Section) && elem.safe_to_collapse?
19
76
  children_classes = Array(elem.contents).map(&:class)
20
77
  count = children_classes.length
21
- safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
78
+ safe_classes = [Element::Section, Element::Title]
22
79
 
23
80
  # Count > 0 because some documents use <div> as a <br>.
24
81
  if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
- next elem.contents
82
+ contents = elem.contents.dup
83
+ contents.prepend(elem.anchor) if elem.anchor
84
+ next contents
26
85
  end
27
86
  end
28
87
  elem
@@ -32,12 +91,14 @@ module Coradoc::Input::HTML
32
91
  # tree should now be more cleaned up, so we can progress with
33
92
  # creating meaningful sections
34
93
  def generate_meaningful_sections
35
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
94
+ @tree = Element::Base.visit(@tree) do |elem, dir|
36
95
  # We are searching for an array, that has a title. This
37
96
  # will be a candidate for our section array.
38
97
  if dir == :post &&
39
98
  elem.is_a?(Array) &&
40
- !elem.grep(Coradoc::Element::Title).empty?
99
+ !elem.flatten.grep(Element::Title).empty?
100
+
101
+ elem = elem.flatten
41
102
 
42
103
  new_array = []
43
104
  content_array = new_array
@@ -47,12 +108,12 @@ module Coradoc::Input::HTML
47
108
  # all descendant sections into those sections. Otherwise, we push
48
109
  # an element as content of current section.
49
110
  elem.each do |e|
50
- if e.is_a? Coradoc::Element::Title
111
+ if e.is_a? Element::Title
51
112
  title = e
52
113
  content_array = []
53
114
  section_array = []
54
115
  level = title.level_int
55
- section = Coradoc::Element::Section.new(
116
+ section = Element::Section.new(
56
117
  title, contents: content_array, sections: section_array
57
118
  )
58
119
  # Some documents may not be consistent and eg. follow H4 after
@@ -82,11 +143,11 @@ module Coradoc::Input::HTML
82
143
  previous_sections = {}
83
144
 
84
145
  determine_section_id = ->(elem) do
85
- if elem.title.style == "appendix"
86
- level = "A"
87
- else
88
- level = 1
89
- end
146
+ level = if elem.title.style == "appendix"
147
+ "A"
148
+ else
149
+ 1
150
+ end
90
151
 
91
152
  section = previous_sections[elem]
92
153
  while section
@@ -102,8 +163,8 @@ module Coradoc::Input::HTML
102
163
  style
103
164
  end
104
165
 
105
- @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
106
- title = elem.title if elem.is_a?(Coradoc::Element::Section)
166
+ @tree = Element::Base.visit(@tree) do |elem, dir|
167
+ title = elem.title if elem.is_a?(Element::Section)
107
168
 
108
169
  if title && title.level_int <= max_level
109
170
  if dir == :pre
@@ -137,6 +198,7 @@ module Coradoc::Input::HTML
137
198
  end
138
199
 
139
200
  def process
201
+ extract_titles_from_lists
140
202
  collapse_meaningless_sections
141
203
  generate_meaningful_sections
142
204
  # Do it again to simplify the document further.
@@ -0,0 +1,18 @@
1
+ warn <<~WARN
2
+ Deprecated: coradoc/reverse_adoc has been renamed to coradoc/input/html.
3
+ | Please update your references from:
4
+ | require 'coradoc/reverse_adoc'
5
+ | To:
6
+ | require 'coradoc/input/html'
7
+ |
8
+ | You are referencing an old require here:
9
+ | #{caller.join("\n| ")}
10
+ |
11
+ | Please also ensure that you replace all references to Coradoc::ReverseAdoc
12
+ | in your code with Coradoc::Input::HTML.
13
+ WARN
14
+
15
+ require 'coradoc'
16
+ require 'coradoc/input/html'
17
+
18
+ Coradoc::ReverseAdoc = Coradoc::Input::HTML
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = "1.1.1"
4
+ VERSION = "1.1.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-09-17 00:00:00.000000000 Z
12
+ date: 2024-11-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: marcel
@@ -129,14 +129,14 @@ dependencies:
129
129
  requirements:
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: '0'
132
+ version: 1.3.0
133
133
  type: :runtime
134
134
  prerelease: false
135
135
  version_requirements: !ruby/object:Gem::Requirement
136
136
  requirements:
137
137
  - - ">="
138
138
  - !ruby/object:Gem::Version
139
- version: '0'
139
+ version: 1.3.0
140
140
  - !ruby/object:Gem::Dependency
141
141
  name: codeclimate-test-reporter
142
142
  requirement: !ruby/object:Gem::Requirement
@@ -411,6 +411,7 @@ files:
411
411
  - lib/coradoc/parser/asciidoc/table.rb
412
412
  - lib/coradoc/parser/asciidoc/term.rb
413
413
  - lib/coradoc/parser/base.rb
414
+ - lib/coradoc/reverse_adoc.rb
414
415
  - lib/coradoc/transformer.rb
415
416
  - lib/coradoc/util.rb
416
417
  - lib/coradoc/version.rb