coradoc 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/coradoc/element/block/side.rb +1 -1
- data/lib/coradoc/element/image/core.rb +3 -1
- data/lib/coradoc/element/list/core.rb +7 -2
- data/lib/coradoc/element/list/definition.rb +3 -1
- data/lib/coradoc/element/list_item.rb +11 -3
- data/lib/coradoc/element/paragraph.rb +2 -2
- data/lib/coradoc/element/section.rb +6 -0
- data/lib/coradoc/element/table.rb +5 -0
- data/lib/coradoc/element/title.rb +1 -1
- data/lib/coradoc/reverse_adoc/cleaner.rb +1 -2
- data/lib/coradoc/reverse_adoc/converters/img.rb +10 -3
- data/lib/coradoc/reverse_adoc/converters/table.rb +33 -8
- data/lib/coradoc/reverse_adoc/converters.rb +3 -1
- data/lib/coradoc/reverse_adoc/plugins/plateau.rb +59 -27
- data/lib/coradoc/reverse_adoc/postprocessor.rb +10 -6
- data/lib/coradoc/util.rb +10 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e44c5e565e224487496ecdd7be8c5c88e5f05fd2a42a4f2cbf31746a908aa25
|
4
|
+
data.tar.gz: a1e8fb651b29b516071e91c5c6f7bf72a68c34fd4ae9512a928d3ea3185bc0c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4df0a3edcebe6990006c7ca951b59f717f05d9b25eb6481394fe276dbaa318f6016e7a915d268abc19d61cec0605ab7d44466662d499cfa08a0ffa2f98bd8dcc
|
7
|
+
data.tar.gz: 9a44116b58e7e99a14ae856b1b06e23c26cbe2913f970a6849615282c3f8e33b05c538c39c72a827fa65aef30f56c9eb494d2f48574ea01f86bca858d9692e16
|
@@ -12,6 +12,7 @@ module Coradoc
|
|
12
12
|
@anchor = @id.nil? ? nil : Coradoc::Element::Inline::Anchor.new(@id)
|
13
13
|
@src = src
|
14
14
|
@attributes = options.fetch(:attributes, AttributeList.new)
|
15
|
+
@annotate_missing = options.fetch(:annotate_missing)
|
15
16
|
@title = options.fetch(:title, nil)
|
16
17
|
if @attributes.any?
|
17
18
|
@attributes.validate_positional(VALIDATORS_POSITIONAL)
|
@@ -20,10 +21,11 @@ module Coradoc
|
|
20
21
|
end
|
21
22
|
|
22
23
|
def to_adoc
|
24
|
+
missing = "// FIXME: Missing image: #{@annotate_missing}\n" if @annotate_missing
|
23
25
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
24
26
|
title = ".#{@title}\n" unless @title.to_s.empty?
|
25
27
|
attrs = @attributes.to_adoc
|
26
|
-
[anchor, title, "image", @colons, @src, attrs].join("")
|
28
|
+
[missing, anchor, title, "image", @colons, @src, attrs].join("")
|
27
29
|
end
|
28
30
|
|
29
31
|
extend AttributeList::Matchers
|
@@ -25,8 +25,13 @@ module Coradoc
|
|
25
25
|
@items.each do |item|
|
26
26
|
c = Coradoc::Generator.gen_adoc(item)
|
27
27
|
if !c.empty?
|
28
|
-
|
29
|
-
|
28
|
+
# If there's a list inside a list directly, we want to
|
29
|
+
# skip adding an empty list item.
|
30
|
+
# See: https://github.com/metanorma/coradoc/issues/96
|
31
|
+
unless item.is_a? List::Core
|
32
|
+
content << prefix.to_s
|
33
|
+
content << " " if c[0]!=" "
|
34
|
+
end
|
30
35
|
content << c
|
31
36
|
end
|
32
37
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
3
|
module List
|
4
|
-
class Definition <
|
4
|
+
class Definition < Base
|
5
5
|
attr_accessor :items, :delimiter
|
6
6
|
|
7
|
+
declare_children :items
|
8
|
+
|
7
9
|
def initialize(items, options = {})
|
8
10
|
@items = items
|
9
11
|
@delimiter = options.fetch(:delimiter, "::")
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Coradoc
|
2
2
|
module Element
|
3
3
|
class ListItem < Base
|
4
|
-
attr_accessor :id
|
4
|
+
attr_accessor :id, :content, :anchor
|
5
5
|
|
6
6
|
declare_children :content, :id, :anchor
|
7
7
|
|
@@ -14,8 +14,16 @@ module Coradoc
|
|
14
14
|
def to_adoc
|
15
15
|
anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
|
16
16
|
content = Array(@content).map do |subitem|
|
17
|
-
Coradoc::
|
18
|
-
|
17
|
+
next if subitem.is_a? Coradoc::Element::Inline::HardLineBreak
|
18
|
+
|
19
|
+
subcontent = Coradoc::Generator.gen_adoc(subitem)
|
20
|
+
# Only try to postprocess elements that are text,
|
21
|
+
# otherwise we could strip markup.
|
22
|
+
if Coradoc.is_a_single?(subitem, Coradoc::Element::TextElement)
|
23
|
+
subcontent = Coradoc.strip_unicode(subcontent)
|
24
|
+
end
|
25
|
+
subcontent.chomp
|
26
|
+
end.compact.join("\n+\n")
|
19
27
|
|
20
28
|
" #{anchor}#{content.chomp}\n"
|
21
29
|
end
|
@@ -24,9 +24,9 @@ module Coradoc
|
|
24
24
|
def to_adoc
|
25
25
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
26
26
|
if @tdsinglepara
|
27
|
-
anchor.to_s << Coradoc::Generator.gen_adoc(@content)
|
27
|
+
anchor.to_s << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content))
|
28
28
|
else
|
29
|
-
"\n\n#{anchor}" << Coradoc::Generator.gen_adoc(@content)
|
29
|
+
"\n\n#{anchor}" << Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content)) << "\n\n"
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
@@ -34,6 +34,12 @@ module Coradoc
|
|
34
34
|
# with something.
|
35
35
|
content = " #{content}" if content.start_with?(" +\n")
|
36
36
|
|
37
|
+
# Only try to postprocess elements that are text,
|
38
|
+
# otherwise we could strip markup.
|
39
|
+
if Coradoc.is_a_single?(@contents, Coradoc::Element::TextElement)
|
40
|
+
content = Coradoc.strip_unicode(content)
|
41
|
+
end
|
42
|
+
|
37
43
|
"\n#{anchor}" << title << content << sections << "\n"
|
38
44
|
end
|
39
45
|
|
@@ -76,6 +76,11 @@ module Coradoc
|
|
76
76
|
anchor = @anchor.nil? ? "" : @anchor.to_adoc.to_s
|
77
77
|
content = simplify_block_content(@content)
|
78
78
|
content = Coradoc::Generator.gen_adoc(content)
|
79
|
+
# Only try to postprocess elements that are text,
|
80
|
+
# otherwise we could strip markup.
|
81
|
+
if Coradoc.is_a_single?(@content, Coradoc::Element::TextElement)
|
82
|
+
content = Coradoc.strip_unicode(content)
|
83
|
+
end
|
79
84
|
"#{@colrowattr}#{@alignattr}#{@style}| #{anchor}#{content}"
|
80
85
|
end
|
81
86
|
end
|
@@ -21,7 +21,7 @@ module Coradoc
|
|
21
21
|
|
22
22
|
def to_adoc
|
23
23
|
anchor = @anchor.nil? ? "" : "#{@anchor.to_adoc}\n"
|
24
|
-
content = Coradoc::Generator.gen_adoc(@content)
|
24
|
+
content = Coradoc.strip_unicode(Coradoc::Generator.gen_adoc(@content))
|
25
25
|
<<~HERE
|
26
26
|
|
27
27
|
#{anchor}#{style_str}#{level_str} #{content}
|
@@ -83,8 +83,7 @@ module Coradoc::ReverseAdoc
|
|
83
83
|
|
84
84
|
def scrub_whitespace(string)
|
85
85
|
string.gsub!(/ | |\u00a0/i, " ") # HTML encoded spaces
|
86
|
-
string.
|
87
|
-
string.sub!(/[[:space:]]+\z$/m, "") # document trailing whitespace
|
86
|
+
string = Coradoc.strip_unicode(string) # Strip document-level leading and trailing whitespace
|
88
87
|
string.gsub!(/( +)$/, " ") # line trailing whitespace
|
89
88
|
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
90
89
|
# string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs
|
@@ -33,7 +33,13 @@ module Coradoc::ReverseAdoc
|
|
33
33
|
# puts "image_dest_path: #{image_dest_path.to_s}"
|
34
34
|
# puts "image_src_path: #{image_src_path.to_s}"
|
35
35
|
|
36
|
-
|
36
|
+
if File.exist?(image_src_path)
|
37
|
+
FileUtils.cp(image_src_path, image_dest_path)
|
38
|
+
else
|
39
|
+
@annotate_missing = image_src_path
|
40
|
+
Kernel.warn "Image #{image_src_path} does not exist"
|
41
|
+
end
|
42
|
+
|
37
43
|
image_number_increment
|
38
44
|
|
39
45
|
image_dest_path.relative_path_from(dest_dir)
|
@@ -88,11 +94,12 @@ module Coradoc::ReverseAdoc
|
|
88
94
|
|
89
95
|
if src
|
90
96
|
Coradoc::Element::Image::BlockImage.new(title, id, src,
|
91
|
-
attributes: attributes
|
97
|
+
attributes: attributes,
|
98
|
+
annotate_missing: @annotate_missing)
|
92
99
|
end
|
93
100
|
end
|
94
101
|
end
|
95
102
|
|
96
|
-
register :img, Img
|
103
|
+
register :img, Img
|
97
104
|
end
|
98
105
|
end
|
@@ -53,13 +53,17 @@ module Coradoc::ReverseAdoc
|
|
53
53
|
rules_attr = rules(node)
|
54
54
|
attrs.add_named("rules", rules_attr) if rules_attr
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
56
|
+
# We can't, and shouldn't do those calculation if the table we are
|
57
|
+
# processing is empty.
|
58
|
+
unless empty?(node)
|
59
|
+
cols = ensure_row_column_integrity_and_get_column_sizes(node)
|
60
|
+
attrs.add_named("cols", cols)
|
61
|
+
|
62
|
+
# Header first rows can't span multiple riws - drop header if they do.
|
63
|
+
header = node.at_xpath(".//tr")
|
64
|
+
unless header.xpath("./td | ./th").all? { |i| [nil, "1", ""].include? i["rowspan"] }
|
65
|
+
attrs.add_named("options", ["noheader"])
|
66
|
+
end
|
63
67
|
end
|
64
68
|
|
65
69
|
# This line should be removed.
|
@@ -68,6 +72,10 @@ module Coradoc::ReverseAdoc
|
|
68
72
|
attrs
|
69
73
|
end
|
70
74
|
|
75
|
+
def empty?(node)
|
76
|
+
!node.at_xpath(".//td | .//th")
|
77
|
+
end
|
78
|
+
|
71
79
|
def ensure_row_column_integrity_and_get_column_sizes(node)
|
72
80
|
rows = node.xpath(".//tr")
|
73
81
|
num_rows = rows.length
|
@@ -173,7 +181,9 @@ module Coradoc::ReverseAdoc
|
|
173
181
|
|
174
182
|
row_obj = row.last.first.parent
|
175
183
|
doc = row_obj.document
|
176
|
-
|
184
|
+
added_node = Nokogiri::XML::Node.new("td", doc)
|
185
|
+
added_node["x-added"] = "x-added"
|
186
|
+
row_obj.add_child(added_node)
|
177
187
|
|
178
188
|
modified = true
|
179
189
|
end
|
@@ -194,6 +204,21 @@ module Coradoc::ReverseAdoc
|
|
194
204
|
end
|
195
205
|
|
196
206
|
unless cell_matrix_correct
|
207
|
+
# It may be a special case that we need to add virtual cells at the
|
208
|
+
# beginning not the end of a row.
|
209
|
+
needs_recompute = false
|
210
|
+
cell_matrix.each do |row|
|
211
|
+
if row.compact.length != row.length
|
212
|
+
last_cell = row.last
|
213
|
+
if last_cell["x-added"]
|
214
|
+
last_cell.parent.prepend_child(last_cell)
|
215
|
+
needs_recompute = true
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
recompute.() if needs_recompute
|
220
|
+
|
221
|
+
# But otherwise... we've got a really nasty table.
|
197
222
|
warn <<~WARNING.gsub("\n", " ")
|
198
223
|
**** Couldn't construct a valid image of a table on line
|
199
224
|
#{node.line}. We need that to reliably compute column
|
@@ -10,7 +10,9 @@ module Coradoc::ReverseAdoc
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def self.lookup(tag_name)
|
13
|
-
@@converters[tag_name.to_sym]
|
13
|
+
converter = @@converters[tag_name.to_sym] || default_converter(tag_name)
|
14
|
+
converter = converter.new if converter.respond_to? :new
|
15
|
+
converter
|
14
16
|
end
|
15
17
|
|
16
18
|
# Note: process won't run plugin hooks
|
@@ -30,12 +30,20 @@ module Coradoc::ReverseAdoc
|
|
30
30
|
html_tree_change_tag_name_by_css(".pitemdata", "h3")
|
31
31
|
html_tree_change_tag_name_by_css(".sitemdata", "h4")
|
32
32
|
html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
|
33
|
+
html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
|
34
|
+
html_tree_change_tag_name_by_css('.framedata, .frame_container_box', 'aside')
|
35
|
+
html_tree_change_tag_name_by_css('.frame2data', 'pre')
|
36
|
+
# Assumption that all code snippets in those documents are XML...
|
37
|
+
html_tree_change_properties_by_css(".frame2data", class: "brush:xml;")
|
33
38
|
|
34
39
|
# Remove some CSS ids that are not important to us
|
35
40
|
html_tree_change_properties_by_css("#__nuxt", id: nil)
|
36
41
|
html_tree_change_properties_by_css("#__layout", id: nil)
|
37
42
|
html_tree_change_properties_by_css("#app", id: nil)
|
38
43
|
|
44
|
+
# Handle lists of document 02
|
45
|
+
html_tree_replace_with_children_by_css(".list_num-wrap")
|
46
|
+
|
39
47
|
# Convert table/img caption to become a caption
|
40
48
|
html_tree.css(".imagedata").each do |e|
|
41
49
|
table = e.parent.next&.children&.first
|
@@ -75,66 +83,82 @@ module Coradoc::ReverseAdoc
|
|
75
83
|
end
|
76
84
|
end
|
77
85
|
|
78
|
-
|
79
|
-
text
|
80
|
-
|
81
|
-
|
82
|
-
text = text.strip.gsub(/^/, "*** ")
|
83
|
-
"\n\n//-PT3D\n#{text}\n//-ENDPT3D\n\n"
|
84
|
-
end
|
85
|
-
|
86
|
-
html_tree_add_hook_pre_by_css ".text4data" do |node,|
|
87
|
-
text = html_tree_process_to_adoc(node).strip
|
88
|
-
next "" if text.empty? || text == "\u3000"
|
86
|
+
(3..4).each do |i|
|
87
|
+
html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
|
88
|
+
text = html_tree_process_to_adoc(node).strip
|
89
|
+
next "" if text.empty? || text == "\u3000"
|
89
90
|
|
90
|
-
|
91
|
-
|
91
|
+
text = text.strip.gsub(/^/, "#{'*' * i} ")
|
92
|
+
"\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
|
93
|
+
end
|
92
94
|
end
|
93
95
|
|
94
|
-
|
95
|
-
text
|
96
|
+
(2..3).each do |i|
|
97
|
+
html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
|
98
|
+
text = html_tree_process_to_adoc(node.children.first.children).strip
|
96
99
|
|
97
|
-
|
100
|
+
"#{'*' * i} #{text}\n"
|
101
|
+
end
|
98
102
|
end
|
99
103
|
|
100
|
-
|
101
|
-
|
104
|
+
(1..20).each do |i|
|
105
|
+
html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
|
106
|
+
text = html_tree_process_to_adoc(node).strip
|
102
107
|
|
103
|
-
|
108
|
+
"[start=#{i}]\n. #{text}\n"
|
109
|
+
end
|
104
110
|
end
|
105
111
|
|
106
112
|
# html_tree_preview
|
107
113
|
end
|
108
114
|
|
115
|
+
IM = /[A-Z0-9]{1,3}/
|
116
|
+
|
109
117
|
def handle_headers(node, coradoc, state)
|
110
|
-
|
111
|
-
|
118
|
+
content = coradoc.content.map(&:content).join
|
119
|
+
|
120
|
+
if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
|
112
121
|
# Special content
|
113
122
|
case content.strip
|
114
123
|
when "はじめに" # Introduction
|
115
124
|
coradoc.style = "abstract" # The older version document has ".preface"
|
125
|
+
coradoc.level_int = 1
|
116
126
|
when "改定の概要" # Revision overview
|
117
127
|
coradoc.style = "abstract" # The older version document has ".preface"
|
128
|
+
coradoc.level_int = 1
|
118
129
|
when "参考文献" # Bibliography
|
119
130
|
coradoc.style = "bibliography"
|
131
|
+
coradoc.level_int = 1
|
120
132
|
when "改訂履歴" # Document history
|
121
133
|
coradoc.style = "appendix"
|
134
|
+
coradoc.level_int = 1
|
135
|
+
when "0 概要" # Overview
|
136
|
+
coradoc.style = "abstract" # I'm not sure this is correct
|
137
|
+
coradoc.level_int = 1
|
138
|
+
when "索引" # Index
|
139
|
+
coradoc.style = "index" # I'm not sure this is correct
|
140
|
+
coradoc.level_int = 1
|
122
141
|
else
|
123
|
-
warn "Unknown section #{
|
142
|
+
warn "Unknown section #{content.inspect}"
|
124
143
|
end
|
144
|
+
end
|
125
145
|
|
126
|
-
|
127
|
-
|
146
|
+
if node.name == "h1"
|
147
|
+
if content.start_with?("Annex")
|
148
|
+
coradoc.style = "appendix"
|
149
|
+
coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, "")
|
150
|
+
end
|
128
151
|
end
|
129
152
|
|
130
153
|
# Remove numbers
|
131
|
-
coradoc.content.first.content.sub!(/\A[
|
154
|
+
coradoc.content.first.content.sub!(/\A(#{IM}\.)*#{IM}[[:space:]]/, "")
|
132
155
|
|
133
156
|
coradoc
|
134
157
|
end
|
135
158
|
|
136
159
|
def handle_headers_h4(node, coradoc, state)
|
137
|
-
|
160
|
+
title = Coradoc.strip_unicode(coradoc.content.first.content)
|
161
|
+
case title
|
138
162
|
when /\A\(\d+\)(.*)/
|
139
163
|
coradoc.level_int = 4
|
140
164
|
coradoc.content.first.content = $1.strip
|
@@ -143,8 +167,16 @@ module Coradoc::ReverseAdoc
|
|
143
167
|
coradoc.level_int = 5
|
144
168
|
coradoc.content.first.content = $1.strip
|
145
169
|
coradoc
|
170
|
+
when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/
|
171
|
+
coradoc.level_int = 4
|
172
|
+
coradoc.content.first.content = $1.strip
|
146
173
|
else
|
147
|
-
|
174
|
+
if title.empty?
|
175
|
+
# Strip instances of faulty empty paragraphs
|
176
|
+
nil
|
177
|
+
else
|
178
|
+
["// FIXME\n", coradoc]
|
179
|
+
end
|
148
180
|
end
|
149
181
|
end
|
150
182
|
|
@@ -82,13 +82,18 @@ module Coradoc::ReverseAdoc
|
|
82
82
|
previous_sections = {}
|
83
83
|
|
84
84
|
determine_section_id = ->(elem) do
|
85
|
-
|
86
|
-
|
85
|
+
if elem.title.style == "appendix"
|
86
|
+
level = "A"
|
87
|
+
else
|
88
|
+
level = 1
|
89
|
+
end
|
90
|
+
|
91
|
+
section = previous_sections[elem]
|
87
92
|
while section
|
88
|
-
level
|
93
|
+
level = level.succ if elem.title.style == section.title.style
|
89
94
|
section = previous_sections[section]
|
90
95
|
end
|
91
|
-
level
|
96
|
+
level.is_a?(Integer) ? "%02d" % level : level
|
92
97
|
end
|
93
98
|
|
94
99
|
determine_style = ->(elem) do
|
@@ -114,8 +119,7 @@ module Coradoc::ReverseAdoc
|
|
114
119
|
# include tag.
|
115
120
|
section_file = "sections/"
|
116
121
|
section_file += parent_sections[1..title.level_int].map do |parent|
|
117
|
-
|
118
|
-
"%s%02d" % [style, determine_section_id.(parent)]
|
122
|
+
determine_style.(parent) + determine_section_id.(parent)
|
119
123
|
end.join("/")
|
120
124
|
section_file += ".adoc"
|
121
125
|
|
data/lib/coradoc/util.rb
ADDED
data/lib/coradoc/version.rb
CHANGED
data/lib/coradoc.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coradoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-06-
|
12
|
+
date: 2024-06-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: marcel
|
@@ -356,6 +356,7 @@ files:
|
|
356
356
|
- lib/coradoc/reverse_adoc/plugins/plateau.rb
|
357
357
|
- lib/coradoc/reverse_adoc/postprocessor.rb
|
358
358
|
- lib/coradoc/transformer.rb
|
359
|
+
- lib/coradoc/util.rb
|
359
360
|
- lib/coradoc/version.rb
|
360
361
|
- lib/reverse_adoc.rb
|
361
362
|
- todo.md
|