coradoc 0.3.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/exe/reverse_adoc +24 -3
  4. data/lib/coradoc/document.rb +1 -0
  5. data/lib/coradoc/element/admonition.rb +2 -2
  6. data/lib/coradoc/element/attribute.rb +2 -2
  7. data/lib/coradoc/element/attribute_list.rb +94 -15
  8. data/lib/coradoc/element/audio.rb +13 -2
  9. data/lib/coradoc/element/author.rb +4 -2
  10. data/lib/coradoc/element/base.rb +70 -7
  11. data/lib/coradoc/element/block/core.rb +8 -4
  12. data/lib/coradoc/element/block/quote.rb +1 -1
  13. data/lib/coradoc/element/block/side.rb +1 -1
  14. data/lib/coradoc/element/break.rb +1 -1
  15. data/lib/coradoc/element/document_attributes.rb +6 -6
  16. data/lib/coradoc/element/header.rb +4 -2
  17. data/lib/coradoc/element/image/block_image.rb +13 -2
  18. data/lib/coradoc/element/image/core.rb +37 -6
  19. data/lib/coradoc/element/image/inline_image.rb +2 -2
  20. data/lib/coradoc/element/inline/anchor.rb +4 -2
  21. data/lib/coradoc/element/inline/bold.rb +9 -4
  22. data/lib/coradoc/element/inline/cross_reference.rb +4 -2
  23. data/lib/coradoc/element/inline/hard_line_break.rb +1 -1
  24. data/lib/coradoc/element/inline/highlight.rb +11 -6
  25. data/lib/coradoc/element/inline/italic.rb +9 -4
  26. data/lib/coradoc/element/inline/link.rb +22 -6
  27. data/lib/coradoc/element/inline/monospace.rb +9 -4
  28. data/lib/coradoc/element/inline/quotation.rb +3 -1
  29. data/lib/coradoc/element/inline/subscript.rb +4 -2
  30. data/lib/coradoc/element/inline/superscript.rb +4 -2
  31. data/lib/coradoc/element/list/core.rb +15 -7
  32. data/lib/coradoc/element/list/definition.rb +22 -1
  33. data/lib/coradoc/element/list/ordered.rb +1 -1
  34. data/lib/coradoc/element/list/unordered.rb +1 -1
  35. data/lib/coradoc/element/list.rb +1 -0
  36. data/lib/coradoc/element/list_item.rb +16 -3
  37. data/lib/coradoc/element/list_item_definition.rb +32 -0
  38. data/lib/coradoc/element/paragraph.rb +6 -4
  39. data/lib/coradoc/element/revision.rb +4 -2
  40. data/lib/coradoc/element/section.rb +27 -4
  41. data/lib/coradoc/element/table.rb +32 -10
  42. data/lib/coradoc/element/text_element.rb +48 -8
  43. data/lib/coradoc/element/title.rb +27 -7
  44. data/lib/coradoc/element/video.rb +32 -5
  45. data/lib/coradoc/reverse_adoc/README.adoc +14 -8
  46. data/lib/coradoc/reverse_adoc/cleaner.rb +21 -10
  47. data/lib/coradoc/reverse_adoc/config.rb +35 -16
  48. data/lib/coradoc/reverse_adoc/converters/a.rb +17 -12
  49. data/lib/coradoc/reverse_adoc/converters/aside.rb +0 -4
  50. data/lib/coradoc/reverse_adoc/converters/audio.rb +0 -4
  51. data/lib/coradoc/reverse_adoc/converters/base.rb +48 -44
  52. data/lib/coradoc/reverse_adoc/converters/blockquote.rb +2 -11
  53. data/lib/coradoc/reverse_adoc/converters/br.rb +0 -4
  54. data/lib/coradoc/reverse_adoc/converters/bypass.rb +0 -4
  55. data/lib/coradoc/reverse_adoc/converters/code.rb +5 -42
  56. data/lib/coradoc/reverse_adoc/converters/div.rb +0 -4
  57. data/lib/coradoc/reverse_adoc/converters/dl.rb +55 -0
  58. data/lib/coradoc/reverse_adoc/converters/em.rb +5 -43
  59. data/lib/coradoc/reverse_adoc/converters/figure.rb +0 -4
  60. data/lib/coradoc/reverse_adoc/converters/h.rb +0 -4
  61. data/lib/coradoc/reverse_adoc/converters/head.rb +0 -4
  62. data/lib/coradoc/reverse_adoc/converters/hr.rb +0 -4
  63. data/lib/coradoc/reverse_adoc/converters/img.rb +30 -18
  64. data/lib/coradoc/reverse_adoc/converters/li.rb +0 -4
  65. data/lib/coradoc/reverse_adoc/converters/mark.rb +5 -11
  66. data/lib/coradoc/reverse_adoc/converters/markup.rb +27 -0
  67. data/lib/coradoc/reverse_adoc/converters/ol.rb +0 -4
  68. data/lib/coradoc/reverse_adoc/converters/p.rb +0 -4
  69. data/lib/coradoc/reverse_adoc/converters/pre.rb +0 -4
  70. data/lib/coradoc/reverse_adoc/converters/q.rb +0 -4
  71. data/lib/coradoc/reverse_adoc/converters/strong.rb +5 -41
  72. data/lib/coradoc/reverse_adoc/converters/sub.rb +6 -4
  73. data/lib/coradoc/reverse_adoc/converters/sup.rb +7 -5
  74. data/lib/coradoc/reverse_adoc/converters/table.rb +240 -4
  75. data/lib/coradoc/reverse_adoc/converters/td.rb +1 -7
  76. data/lib/coradoc/reverse_adoc/converters/text.rb +1 -38
  77. data/lib/coradoc/reverse_adoc/converters/tr.rb +0 -4
  78. data/lib/coradoc/reverse_adoc/converters/video.rb +0 -4
  79. data/lib/coradoc/reverse_adoc/converters.rb +24 -1
  80. data/lib/coradoc/reverse_adoc/html_converter.rb +109 -20
  81. data/lib/coradoc/reverse_adoc/plugin.rb +131 -0
  82. data/lib/coradoc/reverse_adoc/plugins/plateau.rb +206 -0
  83. data/lib/coradoc/reverse_adoc/postprocessor.rb +152 -0
  84. data/lib/coradoc/reverse_adoc.rb +3 -0
  85. data/lib/coradoc/util.rb +10 -0
  86. data/lib/coradoc/version.rb +1 -1
  87. data/lib/coradoc.rb +1 -0
  88. data/lib/reverse_adoc.rb +1 -1
  89. metadata +9 -3
  90. data/lib/coradoc/element/inline/image.rb +0 -26
@@ -0,0 +1,152 @@
1
+ module Coradoc::ReverseAdoc
2
+ # Postprocessor's aim is to convert a Coradoc tree from
3
+ # a mess that has been created from HTML into a tree that
4
+ # is compatible with what we would get out of Coradoc, if
5
+ # it parsed it directly.
6
+ class Postprocessor
7
+ def self.process(coradoc)
8
+ new(coradoc).process
9
+ end
10
+
11
+ def initialize(coradoc)
12
+ @tree = coradoc
13
+ end
14
+
15
+ # Collapse DIVs that only have a title, or nest another DIV.
16
+ def collapse_meaningless_sections
17
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, _dir|
18
+ if elem.is_a?(Coradoc::Element::Section) && elem.safe_to_collapse?
19
+ children_classes = Array(elem.contents).map(&:class)
20
+ count = children_classes.length
21
+ safe_classes = [Coradoc::Element::Section, Coradoc::Element::Title]
22
+
23
+ # Count > 0 because some documents use <div> as a <br>.
24
+ if count > 0 && children_classes.all? { |i| safe_classes.include?(i) }
25
+ next elem.contents
26
+ end
27
+ end
28
+ elem
29
+ end
30
+ end
31
+
32
+ # tree should now be more cleaned up, so we can progress with
33
+ # creating meaningful sections
34
+ def generate_meaningful_sections
35
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
36
+ # We are searching for an array, that has a title. This
37
+ # will be a candidate for our section array.
38
+ if dir == :post &&
39
+ elem.is_a?(Array) &&
40
+ !elem.grep(Coradoc::Element::Title).empty?
41
+
42
+ new_array = []
43
+ content_array = new_array
44
+ section_arrays_by_level = [new_array] * 8
45
+
46
+ # For each title element, we create a new section. Then we push
47
+ # all descendant sections into those sections. Otherwise, we push
48
+ # an element as content of current section.
49
+ elem.each do |e|
50
+ if e.is_a? Coradoc::Element::Title
51
+ title = e
52
+ content_array = []
53
+ section_array = []
54
+ level = title.level_int
55
+ section = Coradoc::Element::Section.new(
56
+ title, contents: content_array, sections: section_array
57
+ )
58
+ # Some documents may not be consistent and eg. follow H4 after
59
+ # H2. Let's ensure that proceeding sections will land in a
60
+ # correct place.
61
+ (8 - level).times do |j|
62
+ section_arrays_by_level[level + j] = section_array
63
+ end
64
+ section_arrays_by_level[level - 1] << section
65
+ else
66
+ content_array << e
67
+ end
68
+ end
69
+ next new_array
70
+ end
71
+ elem
72
+ end
73
+ end
74
+
75
+ def split_sections
76
+ max_level = Coradoc::ReverseAdoc.config.split_sections
77
+
78
+ return unless max_level
79
+
80
+ sections = {}
81
+ parent_sections = []
82
+ previous_sections = {}
83
+
84
+ determine_section_id = ->(elem) do
85
+ if elem.title.style == "appendix"
86
+ level = "A"
87
+ else
88
+ level = 1
89
+ end
90
+
91
+ section = previous_sections[elem]
92
+ while section
93
+ level = level.succ if elem.title.style == section.title.style
94
+ section = previous_sections[section]
95
+ end
96
+ level.is_a?(Integer) ? "%02d" % level : level
97
+ end
98
+
99
+ determine_style = ->(elem) do
100
+ style = elem.title.style || "section"
101
+ style += "-"
102
+ style
103
+ end
104
+
105
+ @tree = Coradoc::Element::Base.visit(@tree) do |elem, dir|
106
+ title = elem.title if elem.is_a?(Coradoc::Element::Section)
107
+
108
+ if title && title.level_int <= max_level
109
+ if dir == :pre
110
+ # In the PRE pass, we build a tree of sections, so that
111
+ # we can compute numbers
112
+ previous_sections[elem] = parent_sections[title.level_int]
113
+ parent_sections[title.level_int] = elem
114
+ parent_sections[(title.level_int+1)..nil] = nil
115
+
116
+ elem
117
+ else
118
+ # In the POST pass, we replace the sections with their
119
+ # include tag.
120
+ section_file = "sections/"
121
+ section_file += parent_sections[1..title.level_int].map do |parent|
122
+ determine_style.(parent) + determine_section_id.(parent)
123
+ end.join("/")
124
+ section_file += ".adoc"
125
+
126
+ sections[section_file] = elem
127
+ up = "../" * (title.level_int - 1)
128
+ "\ninclude::#{up}#{section_file}[]\n"
129
+ end
130
+ else
131
+ elem
132
+ end
133
+ end
134
+
135
+ sections[nil] = @tree
136
+ @tree = sections
137
+ end
138
+
139
+ def process
140
+ collapse_meaningless_sections
141
+ generate_meaningful_sections
142
+ # Do it again to simplify the document further.
143
+ # Since the structure is changed, we may have new meaningful
144
+ # sections as only children of some meaningless sections.
145
+ collapse_meaningless_sections
146
+
147
+ split_sections
148
+
149
+ @tree
150
+ end
151
+ end
152
+ end
@@ -9,6 +9,9 @@ require_relative "reverse_adoc/config"
9
9
  require_relative "reverse_adoc/converters"
10
10
  require_relative "reverse_adoc/converters/base"
11
11
  require_relative "reverse_adoc/html_converter"
12
+ require_relative "reverse_adoc/plugin"
13
+ require_relative "reverse_adoc/postprocessor"
14
+
12
15
 
13
16
  module Coradoc::ReverseAdoc
14
17
  def self.convert(input, options = {})
@@ -0,0 +1,10 @@
1
+ module Coradoc
2
+ def self.strip_unicode(str)
3
+ str.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
4
+ end
5
+
6
+ def self.is_a_single?(obj, klass)
7
+ obj.is_a?(klass) ||
8
+ (obj.is_a?(Array) && obj.length == 1 && obj.first.is_a?(klass))
9
+ end
10
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = "0.3.0"
4
+ VERSION = "1.1.0"
5
5
  end
data/lib/coradoc.rb CHANGED
@@ -4,6 +4,7 @@ require "pathname"
4
4
 
5
5
  require "parslet"
6
6
  require_relative "coradoc/version"
7
+ require_relative "coradoc/util"
7
8
  require_relative "coradoc/parser"
8
9
  require_relative "coradoc/transformer"
9
10
  require_relative "coradoc/generator"
data/lib/reverse_adoc.rb CHANGED
@@ -8,7 +8,7 @@ warn <<~END
8
8
  | You are referencing an old require here:
9
9
  | #{caller.join("\n| ")}
10
10
  |
11
- | You should also replace 'coradoc' with 'reverse_adoc' in your gem dependencies.
11
+ | You should also replace 'reverse_adoc' with 'coradoc' in your gem dependencies.
12
12
  | reverse_adoc 2.0.0 will be kept with 'coradoc' as the only dependency.
13
13
  |
14
14
  | Please also ensure that you replace all references to ReverseAdoc in your code
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-05-21 00:00:00.000000000 Z
12
+ date: 2024-06-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: marcel
@@ -278,7 +278,6 @@ files:
278
278
  - lib/coradoc/element/inline/cross_reference.rb
279
279
  - lib/coradoc/element/inline/hard_line_break.rb
280
280
  - lib/coradoc/element/inline/highlight.rb
281
- - lib/coradoc/element/inline/image.rb
282
281
  - lib/coradoc/element/inline/italic.rb
283
282
  - lib/coradoc/element/inline/link.rb
284
283
  - lib/coradoc/element/inline/monospace.rb
@@ -291,6 +290,7 @@ files:
291
290
  - lib/coradoc/element/list/ordered.rb
292
291
  - lib/coradoc/element/list/unordered.rb
293
292
  - lib/coradoc/element/list_item.rb
293
+ - lib/coradoc/element/list_item_definition.rb
294
294
  - lib/coradoc/element/paragraph.rb
295
295
  - lib/coradoc/element/revision.rb
296
296
  - lib/coradoc/element/section.rb
@@ -323,6 +323,7 @@ files:
323
323
  - lib/coradoc/reverse_adoc/converters/bypass.rb
324
324
  - lib/coradoc/reverse_adoc/converters/code.rb
325
325
  - lib/coradoc/reverse_adoc/converters/div.rb
326
+ - lib/coradoc/reverse_adoc/converters/dl.rb
326
327
  - lib/coradoc/reverse_adoc/converters/drop.rb
327
328
  - lib/coradoc/reverse_adoc/converters/em.rb
328
329
  - lib/coradoc/reverse_adoc/converters/figure.rb
@@ -333,6 +334,7 @@ files:
333
334
  - lib/coradoc/reverse_adoc/converters/img.rb
334
335
  - lib/coradoc/reverse_adoc/converters/li.rb
335
336
  - lib/coradoc/reverse_adoc/converters/mark.rb
337
+ - lib/coradoc/reverse_adoc/converters/markup.rb
336
338
  - lib/coradoc/reverse_adoc/converters/math.rb
337
339
  - lib/coradoc/reverse_adoc/converters/ol.rb
338
340
  - lib/coradoc/reverse_adoc/converters/p.rb
@@ -350,7 +352,11 @@ files:
350
352
  - lib/coradoc/reverse_adoc/converters/video.rb
351
353
  - lib/coradoc/reverse_adoc/errors.rb
352
354
  - lib/coradoc/reverse_adoc/html_converter.rb
355
+ - lib/coradoc/reverse_adoc/plugin.rb
356
+ - lib/coradoc/reverse_adoc/plugins/plateau.rb
357
+ - lib/coradoc/reverse_adoc/postprocessor.rb
353
358
  - lib/coradoc/transformer.rb
359
+ - lib/coradoc/util.rb
354
360
  - lib/coradoc/version.rb
355
361
  - lib/reverse_adoc.rb
356
362
  - todo.md
@@ -1,26 +0,0 @@
1
- require_relative "../image/"
2
- module Coradoc
3
- module Element
4
- module Inline
5
- class Image
6
- attr_reader :title, :id, :src, :attributes
7
-
8
- def initialize(title, id, src, options = ())
9
- @title = title
10
- @id = id
11
- @anchor = @id.nil? ? nil : Inline::Anchor.new(@id)
12
- @src = src
13
- @attributes = options.fetch(:attributes, [])
14
- @title = options.fetch(:title, nil)
15
- end
16
-
17
- def to_adoc
18
- anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
19
- title = ".#{@title}\n" unless @title.to_s.empty?
20
- attrs = @attributes.empty? ? "\[\]" : @attributes.to_adoc
21
- [anchor, title, "image::", @src, attrs].join("")
22
- end
23
- end
24
- end
25
- end
26
- end