coradoc 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 71bfd2ac723d58ca75b591bceabcdf834abd9cbbf32e54b96352da21b8f15095
4
- data.tar.gz: 1a7ea6bf80d64ab4c1349c948aa2aee9ed704dc36ddd07fb7cce78293dca86cd
3
+ metadata.gz: 5e44c5e565e224487496ecdd7be8c5c88e5f05fd2a42a4f2cbf31746a908aa25
4
+ data.tar.gz: a1e8fb651b29b516071e91c5c6f7bf72a68c34fd4ae9512a928d3ea3185bc0c6
5
5
  SHA512:
6
- metadata.gz: 555800621a06ffafc07e03a5c2cdca1210de6369d524a606de51df78b1ee9b19aaba30d32d8a448347237e488362a59677781a02502a1d8891fb89aa1cc267f5
7
- data.tar.gz: 22707b18c6ed99fb4c6127885d871875ed1fed2906d2d10dcdd5f4ed9bdd1b1bfe3f19fd092768ccc4d488bdb7e705aba9ba7ab948ceace9cc679f1ee733eddc
6
+ metadata.gz: 4df0a3edcebe6990006c7ca951b59f717f05d9b25eb6481394fe276dbaa318f6016e7a915d268abc19d61cec0605ab7d44466662d499cfa08a0ffa2f98bd8dcc
7
+ data.tar.gz: 9a44116b58e7e99a14ae856b1b06e23c26cbe2913f970a6849615282c3f8e33b05c538c39c72a827fa65aef30f56c9eb494d2f48574ea01f86bca858d9692e16
@@ -9,7 +9,7 @@ module Coradoc
9
9
  end
10
10
 
11
11
  def to_adoc
12
- "\n\n#{gen_delimiter}" << gen_lines << "\n#{gen_delimiter}\n\n"
12
+ "\n\n#{gen_delimiter}\n" << gen_lines << "\n#{gen_delimiter}\n\n"
13
13
  end
14
14
  end
15
15
  end
@@ -12,6 +12,7 @@ module Coradoc
12
12
  @anchor = @id.nil? ? nil : Coradoc::Element::Inline::Anchor.new(@id)
13
13
  @src = src
14
14
  @attributes = options.fetch(:attributes, AttributeList.new)
15
+ @annotate_missing = options.fetch(:annotate_missing)
15
16
  @title = options.fetch(:title, nil)
16
17
  if @attributes.any?
17
18
  @attributes.validate_positional(VALIDATORS_POSITIONAL)
@@ -20,10 +21,11 @@ module Coradoc
20
21
  end
21
22
 
22
23
  def to_adoc
24
+ missing = "// FIXME: Missing image: #{@annotate_missing}\n" if @annotate_missing
23
25
  anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
24
26
  title = ".#{@title}\n" unless @title.to_s.empty?
25
27
  attrs = @attributes.to_adoc
26
- [anchor, title, "image", @colons, @src, attrs].join("")
28
+ [missing, anchor, title, "image", @colons, @src, attrs].join("")
27
29
  end
28
30
 
29
31
  extend AttributeList::Matchers
@@ -25,8 +25,13 @@ module Coradoc
25
25
  @items.each do |item|
26
26
  c = Coradoc::Generator.gen_adoc(item)
27
27
  if !c.empty?
28
- content << prefix.to_s
29
- content << " " if c[0]!=" "
28
+ # If there's a list inside a list directly, we want to
29
+ # skip adding an empty list item.
30
+ # See: https://github.com/metanorma/coradoc/issues/96
31
+ unless item.is_a? List::Core
32
+ content << prefix.to_s
33
+ content << " " if c[0]!=" "
34
+ end
30
35
  content << c
31
36
  end
32
37
  end
@@ -1,9 +1,11 @@
1
1
  module Coradoc
2
2
  module Element
3
3
  module List
4
- class Definition < Core
4
+ class Definition < Base
5
5
  attr_accessor :items, :delimiter
6
6
 
7
+ declare_children :items
8
+
7
9
  def initialize(items, options = {})
8
10
  @items = items
9
11
  @delimiter = options.fetch(:delimiter, "::")
@@ -1,7 +1,7 @@
1
1
  module Coradoc
2
2
  module Element
3
3
  class ListItem < Base
4
- attr_accessor :id
4
+ attr_accessor :id, :content, :anchor
5
5
 
6
6
  declare_children :content, :id, :anchor
7
7
 
@@ -14,8 +14,16 @@ module Coradoc
14
14
  def to_adoc
15
15
  anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
16
16
  content = Array(@content).map do |subitem|
17
- Coradoc::Generator.gen_adoc(subitem).chomp
18
- end.join("\n+\n")
17
+ next if subitem.is_a? Coradoc::Element::Inline::HardLineBreak
18
+
19
+ subcontent = Coradoc::Generator.gen_adoc(subitem)
20
+ # Only try to postprocess elements that are text,
21
+ # otherwise we could strip markup.
22
+ if Coradoc.is_a_single?(subitem, Coradoc::Element::TextElement)
23
+ subcontent = Coradoc.strip_unicode(subcontent)
24
+ end
25
+ subcontent.chomp
26
+ end.compact.join("\n+\n")
19
27
 
20
28
  " #{anchor}#{content.chomp}\n"
21
29
  end
@@ -24,9 +24,9 @@ module Coradoc
24
24
  def to_adoc
25
25
  anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
26
26
  if @tdsinglepara
27
- anchor.to_s << Coradoc::Generator.gen_adoc(@content).strip
27
+ anchor.to_s << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content))
28
28
  else
29
- "\n\n#{anchor}" << Coradoc::Generator.gen_adoc(@content).strip << "\n\n"
29
+ "\n\n#{anchor}" << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content)) << "\n\n"
30
30
  end
31
31
  end
32
32
  end
@@ -34,6 +34,12 @@ module Coradoc
34
34
  # with something.
35
35
  content = "&nbsp;#{content}" if content.start_with?(" +\n")
36
36
 
37
+ # Only try to postprocess elements that are text,
38
+ # otherwise we could strip markup.
39
+ if Coradoc.is_a_single?(@contents, Coradoc::Element::TextElement)
40
+ content = Coradoc.strip_unicode(content)
41
+ end
42
+
37
43
  "\n#{anchor}" << title << content << sections << "\n"
38
44
  end
39
45
 
@@ -76,6 +76,11 @@ module Coradoc
76
76
  anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
77
77
  content = simplify_block_content(@content)
78
78
  content = Coradoc::Generator.gen_adoc(content)
79
+ # Only try to postprocess elements that are text,
80
+ # otherwise we could strip markup.
81
+ if Coradoc.is_a_single?(@content, Coradoc::Element::TextElement)
82
+ content = Coradoc.strip_unicode(content)
83
+ end
79
84
  "#{@colrowattr}#{@alignattr}#{@style}| #{anchor}#{content}"
80
85
  end
81
86
  end
@@ -21,7 +21,7 @@ module Coradoc
21
21
 
22
22
  def to_adoc
23
23
  anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
24
- content = Coradoc::Generator.gen_adoc(@content)
24
+ content = Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content))
25
25
  <<~HERE
26
26
 
27
27
  #{anchor}#{style_str}#{level_str} #{content}
@@ -83,8 +83,7 @@ module Coradoc::ReverseAdoc
83
83
 
84
84
  def scrub_whitespace(string)
85
85
  string.gsub!(/&nbsp;|&#xA0;|\u00a0/i, "&#xA0;") # HTML encoded spaces
86
- string.sub!(/^\A[[:space:]]+/m, "") # document leading whitespace
87
- string.sub!(/[[:space:]]+\z$/m, "") # document trailing whitespace
86
+ string = Coradoc.strip_unicode(string) # Strip document-level leading and trailing whitespace
88
87
  string.gsub!(/( +)$/, " ") # line trailing whitespace
89
88
  string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
90
89
  # string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs
@@ -33,7 +33,13 @@ module Coradoc::ReverseAdoc
33
33
  # puts "image_dest_path: #{image_dest_path.to_s}"
34
34
  # puts "image_src_path: #{image_src_path.to_s}"
35
35
 
36
- FileUtils.cp(image_src_path, image_dest_path)
36
+ if File.exist?(image_src_path)
37
+ FileUtils.cp(image_src_path, image_dest_path)
38
+ else
39
+ @annotate_missing = image_src_path
40
+ Kernel.warn "Image #{image_src_path} does not exist"
41
+ end
42
+
37
43
  image_number_increment
38
44
 
39
45
  image_dest_path.relative_path_from(dest_dir)
@@ -88,11 +94,12 @@ module Coradoc::ReverseAdoc
88
94
 
89
95
  if src
90
96
  Coradoc::Element::Image::BlockImage.new(title, id, src,
91
- attributes: attributes)
97
+ attributes: attributes,
98
+ annotate_missing: @annotate_missing)
92
99
  end
93
100
  end
94
101
  end
95
102
 
96
- register :img, Img.new
103
+ register :img, Img
97
104
  end
98
105
  end
@@ -53,13 +53,17 @@ module Coradoc::ReverseAdoc
53
53
  rules_attr = rules(node)
54
54
  attrs.add_named("rules", rules_attr) if rules_attr
55
55
 
56
- cols = ensure_row_column_integrity_and_get_column_sizes(node)
57
- attrs.add_named("cols", cols)
58
-
59
- # Header first rows can't span multiple riws - drop header if they do.
60
- header = node.at_xpath(".//tr")
61
- unless header.xpath("./td | ./th").all? { |i| [nil, "1", ""].include? i["rowspan"] }
62
- attrs.add_named("options", ["noheader"])
56
+ # We can't, and shouldn't do those calculation if the table we are
57
+ # processing is empty.
58
+ unless empty?(node)
59
+ cols = ensure_row_column_integrity_and_get_column_sizes(node)
60
+ attrs.add_named("cols", cols)
61
+
62
+ # Header first rows can't span multiple riws - drop header if they do.
63
+ header = node.at_xpath(".//tr")
64
+ unless header.xpath("./td | ./th").all? { |i| [nil, "1", ""].include? i["rowspan"] }
65
+ attrs.add_named("options", ["noheader"])
66
+ end
63
67
  end
64
68
 
65
69
  # This line should be removed.
@@ -68,6 +72,10 @@ module Coradoc::ReverseAdoc
68
72
  attrs
69
73
  end
70
74
 
75
+ def empty?(node)
76
+ !node.at_xpath(".//td | .//th")
77
+ end
78
+
71
79
  def ensure_row_column_integrity_and_get_column_sizes(node)
72
80
  rows = node.xpath(".//tr")
73
81
  num_rows = rows.length
@@ -173,7 +181,9 @@ module Coradoc::ReverseAdoc
173
181
 
174
182
  row_obj = row.last.first.parent
175
183
  doc = row_obj.document
176
- row_obj.add_child(Nokogiri::XML::Node.new("td", doc))
184
+ added_node = Nokogiri::XML::Node.new("td", doc)
185
+ added_node["x-added"] = "x-added"
186
+ row_obj.add_child(added_node)
177
187
 
178
188
  modified = true
179
189
  end
@@ -194,6 +204,21 @@ module Coradoc::ReverseAdoc
194
204
  end
195
205
 
196
206
  unless cell_matrix_correct
207
+ # It may be a special case that we need to add virtual cells at the
208
+ # beginning not the end of a row.
209
+ needs_recompute = false
210
+ cell_matrix.each do |row|
211
+ if row.compact.length != row.length
212
+ last_cell = row.last
213
+ if last_cell["x-added"]
214
+ last_cell.parent.prepend_child(last_cell)
215
+ needs_recompute = true
216
+ end
217
+ end
218
+ end
219
+ recompute.() if needs_recompute
220
+
221
+ # But otherwise... we've got a really nasty table.
197
222
  warn <<~WARNING.gsub("\n", " ")
198
223
  **** Couldn't construct a valid image of a table on line
199
224
  #{node.line}. We need that to reliably compute column
@@ -10,7 +10,9 @@ module Coradoc::ReverseAdoc
10
10
  end
11
11
 
12
12
  def self.lookup(tag_name)
13
- @@converters[tag_name.to_sym] or default_converter(tag_name)
13
+ converter = @@converters[tag_name.to_sym] || default_converter(tag_name)
14
+ converter = converter.new if converter.respond_to? :new
15
+ converter
14
16
  end
15
17
 
16
18
  # Note: process won't run plugin hooks
@@ -30,12 +30,20 @@ module Coradoc::ReverseAdoc
30
30
  html_tree_change_tag_name_by_css(".pitemdata", "h3")
31
31
  html_tree_change_tag_name_by_css(".sitemdata", "h4")
32
32
  html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
33
+ html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
34
+ html_tree_change_tag_name_by_css('.framedata, .frame_container_box', 'aside')
35
+ html_tree_change_tag_name_by_css('.frame2data', 'pre')
36
+ # Assumption that all code snippets in those documents are XML...
37
+ html_tree_change_properties_by_css(".frame2data", class: "brush:xml;")
33
38
 
34
39
  # Remove some CSS ids that are not important to us
35
40
  html_tree_change_properties_by_css("#__nuxt", id: nil)
36
41
  html_tree_change_properties_by_css("#__layout", id: nil)
37
42
  html_tree_change_properties_by_css("#app", id: nil)
38
43
 
44
+ # Handle lists of document 02
45
+ html_tree_replace_with_children_by_css(".list_num-wrap")
46
+
39
47
  # Convert table/img caption to become a caption
40
48
  html_tree.css(".imagedata").each do |e|
41
49
  table = e.parent.next&.children&.first
@@ -75,66 +83,82 @@ module Coradoc::ReverseAdoc
75
83
  end
76
84
  end
77
85
 
78
- html_tree_add_hook_pre_by_css ".text3data" do |node,|
79
- text = html_tree_process_to_adoc(node).strip
80
- next "" if text.empty? || text == "\u3000"
81
-
82
- text = text.strip.gsub(/^/, "*** ")
83
- "\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
84
- end
85
-
86
- html_tree_add_hook_pre_by_css ".text4data" do |node,|
87
- text = html_tree_process_to_adoc(node).strip
88
- next "" if text.empty? || text == "\u3000"
86
+ (3..4).each do |i|
87
+ html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
88
+ text = html_tree_process_to_adoc(node).strip
89
+ next "" if text.empty? || text == "\u3000"
89
90
 
90
- text = text.strip.gsub(/^/, "**** ")
91
- "\n\n//-PT4D\n#{text}\n//-ENDPT4D\n\n"
91
+ text = text.strip.gsub(/^/, "#{'*' * i} ")
92
+ "\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
93
+ end
92
94
  end
93
95
 
94
- html_tree_add_hook_pre_by_css ".text2data_point ul" do |node,|
95
- text = html_tree_process_to_adoc(node.children.first.children).strip
96
+ (2..3).each do |i|
97
+ html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
98
+ text = html_tree_process_to_adoc(node.children.first.children).strip
96
99
 
97
- "** #{text}\n"
100
+ "#{'*' * i} #{text}\n"
101
+ end
98
102
  end
99
103
 
100
- html_tree_add_hook_pre_by_css ".text3data_point ul" do |node,|
101
- text = html_tree_process_to_adoc(node.children.first.children).strip
104
+ (1..20).each do |i|
105
+ html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
106
+ text = html_tree_process_to_adoc(node).strip
102
107
 
103
- "*** #{text}\n"
108
+ "[start=#{i}]\n. #{text}\n"
109
+ end
104
110
  end
105
111
 
106
112
  # html_tree_preview
107
113
  end
108
114
 
115
+ IM = /[A-Z0-9]{1,3}/
116
+
109
117
  def handle_headers(node, coradoc, state)
110
- if coradoc.id.start_with?("toc0_")
111
- content = coradoc.content.map(&:content).join
118
+ content = coradoc.content.map(&:content).join
119
+
120
+ if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
112
121
  # Special content
113
122
  case content.strip
114
123
  when "はじめに" # Introduction
115
124
  coradoc.style = "abstract" # The older version document has ".preface"
125
+ coradoc.level_int = 1
116
126
  when "改定の概要" # Revision overview
117
127
  coradoc.style = "abstract" # The older version document has ".preface"
128
+ coradoc.level_int = 1
118
129
  when "参考文献" # Bibliography
119
130
  coradoc.style = "bibliography"
131
+ coradoc.level_int = 1
120
132
  when "改訂履歴" # Document history
121
133
  coradoc.style = "appendix"
134
+ coradoc.level_int = 1
135
+ when "0 概要" # Overview
136
+ coradoc.style = "abstract" # I'm not sure this is correct
137
+ coradoc.level_int = 1
138
+ when "索引" # Index
139
+ coradoc.style = "index" # I'm not sure this is correct
140
+ coradoc.level_int = 1
122
141
  else
123
- warn "Unknown section #{coradoc.content.map(&:content).join.inspect}"
142
+ warn "Unknown section #{content.inspect}"
124
143
  end
144
+ end
125
145
 
126
- # Ensure they are generated as level 1
127
- coradoc.level_int = 1
146
+ if node.name == "h1"
147
+ if content.start_with?("Annex")
148
+ coradoc.style = "appendix"
149
+ coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, "")
150
+ end
128
151
  end
129
152
 
130
153
  # Remove numbers
131
- coradoc.content.first.content.sub!(/\A[\d\s.]+/, "")
154
+ coradoc.content.first.content.sub!(/\A(#{IM}\.)*#{IM}[[:space:]]/, "")
132
155
 
133
156
  coradoc
134
157
  end
135
158
 
136
159
  def handle_headers_h4(node, coradoc, state)
137
- case coradoc.content.first.content
160
+ title = Coradoc.strip_unicode(coradoc.content.first.content)
161
+ case title
138
162
  when /\A\(\d+\)(.*)/
139
163
  coradoc.level_int = 4
140
164
  coradoc.content.first.content = $1.strip
@@ -143,8 +167,16 @@ module Coradoc::ReverseAdoc
143
167
  coradoc.level_int = 5
144
168
  coradoc.content.first.content = $1.strip
145
169
  coradoc
170
+ when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/
171
+ coradoc.level_int = 4
172
+ coradoc.content.first.content = $1.strip
146
173
  else
147
- ["// FIXME\n", coradoc]
174
+ if title.empty?
175
+ # Strip instances of faulty empty paragraphs
176
+ nil
177
+ else
178
+ ["// FIXME\n", coradoc]
179
+ end
148
180
  end
149
181
  end
150
182
 
@@ -82,13 +82,18 @@ module Coradoc::ReverseAdoc
82
82
  previous_sections = {}
83
83
 
84
84
  determine_section_id = ->(elem) do
85
- level = 0
86
- section = elem
85
+ if elem.title.style == "appendix"
86
+ level = "A"
87
+ else
88
+ level = 1
89
+ end
90
+
91
+ section = previous_sections[elem]
87
92
  while section
88
- level += 1 if elem.title.style == section.title.style
93
+ level = level.succ if elem.title.style == section.title.style
89
94
  section = previous_sections[section]
90
95
  end
91
- level
96
+ level.is_a?(Integer) ? "%02d" % level : level
92
97
  end
93
98
 
94
99
  determine_style = ->(elem) do
@@ -114,8 +119,7 @@ module Coradoc::ReverseAdoc
114
119
  # include tag.
115
120
  section_file = "sections/"
116
121
  section_file += parent_sections[1..title.level_int].map do |parent|
117
- style = determine_style.(parent)
118
- "%s%02d" % [style, determine_section_id.(parent)]
122
+ determine_style.(parent) + determine_section_id.(parent)
119
123
  end.join("/")
120
124
  section_file += ".adoc"
121
125
 
@@ -0,0 +1,10 @@
1
+ module Coradoc
2
+ def self.strip_unicode(str)
3
+ str.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
4
+ end
5
+
6
+ def self.is_a_single?(obj, klass)
7
+ obj.is_a?(klass) ||
8
+ (obj.is_a?(Array) && obj.length == 1 && obj.first.is_a?(klass))
9
+ end
10
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = "1.0.0"
4
+ VERSION = "1.1.0"
5
5
  end
data/lib/coradoc.rb CHANGED
@@ -4,6 +4,7 @@ require "pathname"
4
4
 
5
5
  require "parslet"
6
6
  require_relative "coradoc/version"
7
+ require_relative "coradoc/util"
7
8
  require_relative "coradoc/parser"
8
9
  require_relative "coradoc/transformer"
9
10
  require_relative "coradoc/generator"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-06-01 00:00:00.000000000 Z
12
+ date: 2024-06-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: marcel
@@ -356,6 +356,7 @@ files:
356
356
  - lib/coradoc/reverse_adoc/plugins/plateau.rb
357
357
  - lib/coradoc/reverse_adoc/postprocessor.rb
358
358
  - lib/coradoc/transformer.rb
359
+ - lib/coradoc/util.rb
359
360
  - lib/coradoc/version.rb
360
361
  - lib/reverse_adoc.rb
361
362
  - todo.md