metanorma-utils 1.5.5 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/utils/log.rb +105 -19
- data/lib/utils/main.rb +51 -35
- data/lib/utils/version.rb +1 -1
- data/lib/utils/xml.rb +42 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3234e3bc74a4ba02f150016c23db7e9b3d03386ae9f78ac42e3a64d78e31556e
|
4
|
+
data.tar.gz: 5ac7ae07a58d238b199f7e4cb9b79175e6ea0e99dfa5185358fffb579906de0f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc1e64c876f4d3bafd6996bd10421c810370ca3b512f702a1a09079a49d242773c90206f68a8f1187a574f13a7fbd9c7b4ec6dd122be18b9e7d6568785483066
|
7
|
+
data.tar.gz: dde8f1ea187e9cf6e9d80972c3b785c499a2c55fd6c8e9f3550e666a6b0e69ae711ba27c3e604d0c5e9f0fbe085a28ee45c240e5f969995497a5fd5b81c4aa59
|
data/lib/utils/log.rb
CHANGED
@@ -1,30 +1,57 @@
|
|
1
|
+
require "htmlentities"
|
2
|
+
|
1
3
|
module Metanorma
|
2
4
|
module Utils
|
3
5
|
class Log
|
6
|
+
attr_writer :xml
|
7
|
+
|
4
8
|
def initialize
|
5
9
|
@log = {}
|
10
|
+
@c = HTMLEntities.new
|
11
|
+
@mapid = {}
|
6
12
|
end
|
7
13
|
|
8
14
|
def add(category, loc, msg)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
@log[category] <<
|
13
|
-
context: context(loc) }
|
15
|
+
@novalid and return
|
16
|
+
@log[category] ||= []
|
17
|
+
item = create_entry(loc, msg)
|
18
|
+
@log[category] << item
|
14
19
|
loc = loc.nil? ? "" : "(#{current_location(loc)}): "
|
15
20
|
warn "#{category}: #{loc}#{msg}"
|
16
21
|
end
|
17
22
|
|
23
|
+
def create_entry(loc, msg)
|
24
|
+
msg = msg.encode("UTF-8", invalid: :replace, undef: :replace)
|
25
|
+
item = { location: current_location(loc),
|
26
|
+
message: msg, context: context(loc), line: line(loc, msg) }
|
27
|
+
if item[:message].include?(" :: ")
|
28
|
+
a = item[:message].split(" :: ", 2)
|
29
|
+
item[:context] = a[1]
|
30
|
+
item[:message] = a[0]
|
31
|
+
end
|
32
|
+
item
|
33
|
+
end
|
34
|
+
|
18
35
|
def current_location(node)
|
19
36
|
if node.nil? then ""
|
37
|
+
elsif node.respond_to?(:id) && !node.id.nil? then "ID #{node.id}"
|
38
|
+
elsif node.respond_to?(:id) && node.id.nil? && node.respond_to?(:parent)
|
39
|
+
while !node.nil? && node.id.nil?
|
40
|
+
node = node.parent
|
41
|
+
end
|
42
|
+
node.nil? ? "" : "ID #{node.id}"
|
43
|
+
elsif node.respond_to?(:to_xml) && node.respond_to?(:parent)
|
44
|
+
while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
|
45
|
+
node = node.parent
|
46
|
+
end
|
47
|
+
node.respond_to?(:parent) ? "ID #{node['id']}" : ""
|
20
48
|
elsif node.is_a? String then node
|
21
49
|
elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
|
22
50
|
!node.lineno.empty?
|
23
51
|
"Asciidoctor Line #{'%06d' % node.lineno}"
|
24
52
|
elsif node.respond_to?(:line) && !node.line.nil?
|
25
53
|
"XML Line #{'%06d' % node.line}"
|
26
|
-
elsif node.respond_to?(:
|
27
|
-
else
|
54
|
+
elsif node.respond_to?(:parent)
|
28
55
|
while !node.nil? &&
|
29
56
|
(!node.respond_to?(:level) || node.level.positive?) &&
|
30
57
|
(!node.respond_to?(:context) || node.context != :section)
|
@@ -33,6 +60,17 @@ module Metanorma
|
|
33
60
|
node&.context == :section
|
34
61
|
end
|
35
62
|
"??"
|
63
|
+
else "??"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def line(node, msg)
|
68
|
+
if node.respond_to?(:line) && !node.line.nil?
|
69
|
+
"#{'%06d' % node.line}"
|
70
|
+
elsif /^XML Line /.match?(msg)
|
71
|
+
msg.sub(/^XML Line /, "").sub(/:.*$/, "")
|
72
|
+
else
|
73
|
+
"000000"
|
36
74
|
end
|
37
75
|
end
|
38
76
|
|
@@ -54,25 +92,73 @@ module Metanorma
|
|
54
92
|
ret.to_xml
|
55
93
|
end
|
56
94
|
|
95
|
+
def log_hdr(file)
|
96
|
+
<<~HTML
|
97
|
+
<html><head><title>#{file} errors</title>
|
98
|
+
<style> pre { white-space: pre-wrap; } </style>
|
99
|
+
</head><body><h1>#{file} errors</h1>
|
100
|
+
HTML
|
101
|
+
end
|
102
|
+
|
57
103
|
def write(file)
|
104
|
+
@filename = file.sub(".err.html", ".html")
|
58
105
|
File.open(file, "w:UTF-8") do |f|
|
59
|
-
f.puts
|
60
|
-
@log.each_key
|
61
|
-
|
62
|
-
@log[key].sort_by { |a| a[:location] }.each do |n|
|
63
|
-
write1(f, n)
|
64
|
-
end
|
65
|
-
end
|
106
|
+
f.puts log_hdr(file)
|
107
|
+
@log.each_key { |key| write_key(f, key) }
|
108
|
+
f.puts "</body></html>\n"
|
66
109
|
end
|
67
110
|
end
|
68
111
|
|
112
|
+
def write_key(file, key)
|
113
|
+
file.puts <<~HTML
|
114
|
+
<h2>#{key}</h2>\n<table border="1">
|
115
|
+
<thead><th width="5%">Line</th><th width="20%">ID</th><th width="30%">Message</th><th width="45%">Context</th></thead>
|
116
|
+
<tbody>
|
117
|
+
HTML
|
118
|
+
@log[key].sort_by { |a| [a[:line], a[:location], a[:message]] }
|
119
|
+
.each do |n|
|
120
|
+
write1(file, n)
|
121
|
+
end
|
122
|
+
file.puts "</tbody></table>\n"
|
123
|
+
end
|
124
|
+
|
69
125
|
def write1(file, entry)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
entry[:
|
74
|
-
|
126
|
+
line = entry[:line]
|
127
|
+
line = nil if line == "000000"
|
128
|
+
loc = loc_link(entry)
|
129
|
+
msg = break_up_long_str(entry[:message], 10, 2)
|
130
|
+
.gsub(/`([^`]+)`/, "<code>\\1</code>")
|
131
|
+
entry[:context] and context = entry[:context].split("\n").first(5)
|
132
|
+
.join("\n").gsub("><", "> <")
|
133
|
+
write_entry(file, line, loc, msg, context)
|
134
|
+
end
|
135
|
+
|
136
|
+
def mapid(old, new)
|
137
|
+
@mapid[old] = new
|
138
|
+
end
|
139
|
+
|
140
|
+
def loc_link(entry)
|
141
|
+
loc = entry[:location]
|
142
|
+
loc.nil? || loc.empty? and loc = "--"
|
143
|
+
if /^ID /.match?(loc)
|
144
|
+
loc.sub!(/^ID /, "")
|
145
|
+
loc = @mapid[loc] while @mapid[loc]
|
146
|
+
url = "#{@filename}##{loc}"
|
75
147
|
end
|
148
|
+
loc &&= break_up_long_str(loc, 10, 2)
|
149
|
+
url and loc = "<a href='#{url}'>#{loc}</a>"
|
150
|
+
loc
|
151
|
+
end
|
152
|
+
|
153
|
+
def break_up_long_str(str, threshold, punct)
|
154
|
+
Metanorma::Utils.break_up_long_str(str, threshold, punct)
|
155
|
+
end
|
156
|
+
|
157
|
+
def write_entry(file, line, loc, msg, context)
|
158
|
+
context &&= @c.encode(break_up_long_str(context, 40, 2))
|
159
|
+
file.print <<~HTML
|
160
|
+
<tr><td>#{line}</td><th><code>#{loc}</code></th><td>#{msg}</td><td><pre>#{context}</pre></td></tr>
|
161
|
+
HTML
|
76
162
|
end
|
77
163
|
end
|
78
164
|
end
|
data/lib/utils/main.rb
CHANGED
@@ -8,12 +8,6 @@ require "csv"
|
|
8
8
|
module Metanorma
|
9
9
|
module Utils
|
10
10
|
class << self
|
11
|
-
def attr_code(attributes)
|
12
|
-
attributes.compact.transform_values do |v|
|
13
|
-
v.is_a?(String) ? HTMLEntities.new.decode(v) : v
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
11
|
# , " => ," : CSV definition does not deal with space followed by quote
|
18
12
|
# at start of field
|
19
13
|
def csv_split(text, delim = ";")
|
@@ -23,15 +17,6 @@ module Metanorma
|
|
23
17
|
col_sep: delim)&.compact&.map(&:strip)
|
24
18
|
end
|
25
19
|
|
26
|
-
# if the contents of node are blocks, output them to out;
|
27
|
-
# else, wrap them in <p>
|
28
|
-
def wrap_in_para(node, out)
|
29
|
-
if node.blocks? then out << node.content
|
30
|
-
else
|
31
|
-
out.p { |p| p << node.content }
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
20
|
def asciidoc_sub(text, flavour = :standoc)
|
36
21
|
return nil if text.nil?
|
37
22
|
return "" if text.empty?
|
@@ -141,29 +126,60 @@ module Metanorma
|
|
141
126
|
%w(Arab Aran Hebr).include? script
|
142
127
|
end
|
143
128
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
129
|
+
LONGSTR_THRESHOLD = 10
|
130
|
+
LONGSTR_NOPUNCT = 2
|
131
|
+
|
132
|
+
# break on punct every LONGSTRING_THRESHOLD chars, with zero width space
|
133
|
+
# if punct fails, try break on camel case, with soft hyphen
|
134
|
+
# break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
|
135
|
+
# with soft hyphen
|
136
|
+
def break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT)
|
137
|
+
/^\s*$/.match?(text) and return text
|
138
|
+
text.split(/(?=\s)/).map do |w|
|
139
|
+
if /^\s*$/.match(text) || (w.size < LONGSTR_THRESHOLD) then w
|
140
|
+
else
|
141
|
+
w.scan(/.{,#{LONGSTR_THRESHOLD}}/o).map.with_index do |w1, i|
|
142
|
+
w1.size < LONGSTR_THRESHOLD ? w1 : break_up_long_str1(w1, i + 1)
|
143
|
+
end.join
|
144
|
+
end
|
145
|
+
end.join
|
146
|
+
end
|
147
|
+
|
148
|
+
STR_BREAKUP_RE = %r{
|
149
|
+
(?<=[=_—–\u2009→?+;]) | # break after any of these
|
150
|
+
(?<=[,.:])(?!\d) | # break on punct only if not preceding digit
|
151
|
+
(?<=[>])(?![>]) | # > not >->
|
152
|
+
(?<=[\]])(?![\]]) | # ] not ]-]
|
153
|
+
(?<=//) | # //
|
154
|
+
(?<=[/])(?![/]) | # / not /-/
|
155
|
+
(?<![<])(?=[<]) | # < not <-<
|
156
|
+
(?<=\p{L})(?=[(\{\[]\p{L}) # letter and bracket, followed by letter
|
157
|
+
}x.freeze
|
158
|
+
|
159
|
+
CAMEL_CASE_RE = %r{
|
160
|
+
(?<=\p{Ll}\p{Ll})(?=\p{Lu}\p{Ll}\p{Ll}) # 2 lowerc / upperc, 2 lowerc
|
161
|
+
}x.freeze
|
162
|
+
|
163
|
+
def break_up_long_str1(text, iteration)
|
164
|
+
s, separator = break_up_long_str2(text)
|
165
|
+
if s.size == 1 # could not break up
|
166
|
+
(iteration % LONGSTR_NOPUNCT).zero? and
|
167
|
+
text += "\u00ad" # force soft hyphen
|
168
|
+
text
|
169
|
+
else
|
170
|
+
s[-1] = "#{separator}#{s[-1]}"
|
171
|
+
s.join
|
157
172
|
end
|
158
|
-
ins
|
159
173
|
end
|
160
174
|
|
161
|
-
def
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
175
|
+
def break_up_long_str2(text)
|
176
|
+
s = text.split(STR_BREAKUP_RE, -1)
|
177
|
+
separator = "\u200b"
|
178
|
+
if s.size == 1
|
179
|
+
s = text.split(CAMEL_CASE_RE)
|
180
|
+
separator = "\u00ad"
|
181
|
+
end
|
182
|
+
[s, separator]
|
167
183
|
end
|
168
184
|
end
|
169
185
|
end
|
data/lib/utils/version.rb
CHANGED
data/lib/utils/xml.rb
CHANGED
@@ -21,8 +21,15 @@ module Metanorma
|
|
21
21
|
HERE
|
22
22
|
|
23
23
|
class << self
|
24
|
+
def attr_code(attributes)
|
25
|
+
attributes.compact.transform_values do |v|
|
26
|
+
v.is_a?(String) ? HTMLEntities.new.decode(v) : v
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
24
30
|
def to_ncname(tag, asciionly: true)
|
25
|
-
asciionly and tag = HTMLEntities.new.encode(tag, :basic,
|
31
|
+
asciionly and tag = HTMLEntities.new.encode(tag, :basic,
|
32
|
+
:hexadecimal)
|
26
33
|
start = tag[0]
|
27
34
|
ret1 = if %r([#{NAMECHAR}#])o.match?(start)
|
28
35
|
"_"
|
@@ -88,12 +95,46 @@ module Metanorma
|
|
88
95
|
end.join
|
89
96
|
end
|
90
97
|
|
98
|
+
# if the contents of node are blocks, output them to out;
|
99
|
+
# else, wrap them in <p>
|
100
|
+
def wrap_in_para(node, out)
|
101
|
+
if node.blocks? then out << node.content
|
102
|
+
else
|
103
|
+
out.p { |p| p << node.content }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
91
107
|
# all element/attribute pairs that are ID anchors in Metanorma
|
92
108
|
def anchor_attributes
|
93
109
|
[%w[* id], %w[* bibitemid], %w[review from],
|
94
110
|
%w[review to], %w[index to], %w[xref target],
|
95
111
|
%w[callout target], %w[location target]]
|
96
112
|
end
|
113
|
+
|
114
|
+
# convert definition list term/value pair into Nokogiri XML attribute
|
115
|
+
def dl_to_attrs(elem, dlist, name)
|
116
|
+
e = dlist.at("./dt[text()='#{name}']") or return
|
117
|
+
val = e.at("./following::dd/p") || e.at("./following::dd") or return
|
118
|
+
elem[name] = val.text
|
119
|
+
end
|
120
|
+
|
121
|
+
# convert definition list term/value pairs into Nokogiri XML elements
|
122
|
+
def dl_to_elems(ins, elem, dlist, name)
|
123
|
+
a = elem.at("./#{name}[last()]")
|
124
|
+
ins = a if a
|
125
|
+
dlist.xpath("./dt[text()='#{name}']").each do |e|
|
126
|
+
ins = dl_to_elems1(e, name, ins)
|
127
|
+
end
|
128
|
+
ins
|
129
|
+
end
|
130
|
+
|
131
|
+
def dl_to_elems1(term, name, ins)
|
132
|
+
v = term.at("./following::dd")
|
133
|
+
e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
|
134
|
+
v.name = name
|
135
|
+
ins.next = v
|
136
|
+
ins.next
|
137
|
+
end
|
97
138
|
end
|
98
139
|
end
|
99
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metanorma-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09
|
11
|
+
date: 2023-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|