metanorma-utils 1.5.5 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/utils/log.rb +105 -19
- data/lib/utils/main.rb +51 -35
- data/lib/utils/version.rb +1 -1
- data/lib/utils/xml.rb +42 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3234e3bc74a4ba02f150016c23db7e9b3d03386ae9f78ac42e3a64d78e31556e
|
4
|
+
data.tar.gz: 5ac7ae07a58d238b199f7e4cb9b79175e6ea0e99dfa5185358fffb579906de0f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc1e64c876f4d3bafd6996bd10421c810370ca3b512f702a1a09079a49d242773c90206f68a8f1187a574f13a7fbd9c7b4ec6dd122be18b9e7d6568785483066
|
7
|
+
data.tar.gz: dde8f1ea187e9cf6e9d80972c3b785c499a2c55fd6c8e9f3550e666a6b0e69ae711ba27c3e604d0c5e9f0fbe085a28ee45c240e5f969995497a5fd5b81c4aa59
|
data/lib/utils/log.rb
CHANGED
@@ -1,30 +1,57 @@
|
|
1
|
+
require "htmlentities"
|
2
|
+
|
1
3
|
module Metanorma
|
2
4
|
module Utils
|
3
5
|
class Log
|
6
|
+
attr_writer :xml
|
7
|
+
|
4
8
|
def initialize
|
5
9
|
@log = {}
|
10
|
+
@c = HTMLEntities.new
|
11
|
+
@mapid = {}
|
6
12
|
end
|
7
13
|
|
8
14
|
def add(category, loc, msg)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
@log[category] <<
|
13
|
-
context: context(loc) }
|
15
|
+
@novalid and return
|
16
|
+
@log[category] ||= []
|
17
|
+
item = create_entry(loc, msg)
|
18
|
+
@log[category] << item
|
14
19
|
loc = loc.nil? ? "" : "(#{current_location(loc)}): "
|
15
20
|
warn "#{category}: #{loc}#{msg}"
|
16
21
|
end
|
17
22
|
|
23
|
+
def create_entry(loc, msg)
|
24
|
+
msg = msg.encode("UTF-8", invalid: :replace, undef: :replace)
|
25
|
+
item = { location: current_location(loc),
|
26
|
+
message: msg, context: context(loc), line: line(loc, msg) }
|
27
|
+
if item[:message].include?(" :: ")
|
28
|
+
a = item[:message].split(" :: ", 2)
|
29
|
+
item[:context] = a[1]
|
30
|
+
item[:message] = a[0]
|
31
|
+
end
|
32
|
+
item
|
33
|
+
end
|
34
|
+
|
18
35
|
def current_location(node)
|
19
36
|
if node.nil? then ""
|
37
|
+
elsif node.respond_to?(:id) && !node.id.nil? then "ID #{node.id}"
|
38
|
+
elsif node.respond_to?(:id) && node.id.nil? && node.respond_to?(:parent)
|
39
|
+
while !node.nil? && node.id.nil?
|
40
|
+
node = node.parent
|
41
|
+
end
|
42
|
+
node.nil? ? "" : "ID #{node.id}"
|
43
|
+
elsif node.respond_to?(:to_xml) && node.respond_to?(:parent)
|
44
|
+
while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
|
45
|
+
node = node.parent
|
46
|
+
end
|
47
|
+
node.respond_to?(:parent) ? "ID #{node['id']}" : ""
|
20
48
|
elsif node.is_a? String then node
|
21
49
|
elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
|
22
50
|
!node.lineno.empty?
|
23
51
|
"Asciidoctor Line #{'%06d' % node.lineno}"
|
24
52
|
elsif node.respond_to?(:line) && !node.line.nil?
|
25
53
|
"XML Line #{'%06d' % node.line}"
|
26
|
-
elsif node.respond_to?(:
|
27
|
-
else
|
54
|
+
elsif node.respond_to?(:parent)
|
28
55
|
while !node.nil? &&
|
29
56
|
(!node.respond_to?(:level) || node.level.positive?) &&
|
30
57
|
(!node.respond_to?(:context) || node.context != :section)
|
@@ -33,6 +60,17 @@ module Metanorma
|
|
33
60
|
node&.context == :section
|
34
61
|
end
|
35
62
|
"??"
|
63
|
+
else "??"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def line(node, msg)
|
68
|
+
if node.respond_to?(:line) && !node.line.nil?
|
69
|
+
"#{'%06d' % node.line}"
|
70
|
+
elsif /^XML Line /.match?(msg)
|
71
|
+
msg.sub(/^XML Line /, "").sub(/:.*$/, "")
|
72
|
+
else
|
73
|
+
"000000"
|
36
74
|
end
|
37
75
|
end
|
38
76
|
|
@@ -54,25 +92,73 @@ module Metanorma
|
|
54
92
|
ret.to_xml
|
55
93
|
end
|
56
94
|
|
95
|
+
def log_hdr(file)
|
96
|
+
<<~HTML
|
97
|
+
<html><head><title>#{file} errors</title>
|
98
|
+
<style> pre { white-space: pre-wrap; } </style>
|
99
|
+
</head><body><h1>#{file} errors</h1>
|
100
|
+
HTML
|
101
|
+
end
|
102
|
+
|
57
103
|
def write(file)
|
104
|
+
@filename = file.sub(".err.html", ".html")
|
58
105
|
File.open(file, "w:UTF-8") do |f|
|
59
|
-
f.puts
|
60
|
-
@log.each_key
|
61
|
-
|
62
|
-
@log[key].sort_by { |a| a[:location] }.each do |n|
|
63
|
-
write1(f, n)
|
64
|
-
end
|
65
|
-
end
|
106
|
+
f.puts log_hdr(file)
|
107
|
+
@log.each_key { |key| write_key(f, key) }
|
108
|
+
f.puts "</body></html>\n"
|
66
109
|
end
|
67
110
|
end
|
68
111
|
|
112
|
+
def write_key(file, key)
|
113
|
+
file.puts <<~HTML
|
114
|
+
<h2>#{key}</h2>\n<table border="1">
|
115
|
+
<thead><th width="5%">Line</th><th width="20%">ID</th><th width="30%">Message</th><th width="45%">Context</th></thead>
|
116
|
+
<tbody>
|
117
|
+
HTML
|
118
|
+
@log[key].sort_by { |a| [a[:line], a[:location], a[:message]] }
|
119
|
+
.each do |n|
|
120
|
+
write1(file, n)
|
121
|
+
end
|
122
|
+
file.puts "</tbody></table>\n"
|
123
|
+
end
|
124
|
+
|
69
125
|
def write1(file, entry)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
entry[:
|
74
|
-
|
126
|
+
line = entry[:line]
|
127
|
+
line = nil if line == "000000"
|
128
|
+
loc = loc_link(entry)
|
129
|
+
msg = break_up_long_str(entry[:message], 10, 2)
|
130
|
+
.gsub(/`([^`]+)`/, "<code>\\1</code>")
|
131
|
+
entry[:context] and context = entry[:context].split("\n").first(5)
|
132
|
+
.join("\n").gsub("><", "> <")
|
133
|
+
write_entry(file, line, loc, msg, context)
|
134
|
+
end
|
135
|
+
|
136
|
+
def mapid(old, new)
|
137
|
+
@mapid[old] = new
|
138
|
+
end
|
139
|
+
|
140
|
+
def loc_link(entry)
|
141
|
+
loc = entry[:location]
|
142
|
+
loc.nil? || loc.empty? and loc = "--"
|
143
|
+
if /^ID /.match?(loc)
|
144
|
+
loc.sub!(/^ID /, "")
|
145
|
+
loc = @mapid[loc] while @mapid[loc]
|
146
|
+
url = "#{@filename}##{loc}"
|
75
147
|
end
|
148
|
+
loc &&= break_up_long_str(loc, 10, 2)
|
149
|
+
url and loc = "<a href='#{url}'>#{loc}</a>"
|
150
|
+
loc
|
151
|
+
end
|
152
|
+
|
153
|
+
def break_up_long_str(str, threshold, punct)
|
154
|
+
Metanorma::Utils.break_up_long_str(str, threshold, punct)
|
155
|
+
end
|
156
|
+
|
157
|
+
def write_entry(file, line, loc, msg, context)
|
158
|
+
context &&= @c.encode(break_up_long_str(context, 40, 2))
|
159
|
+
file.print <<~HTML
|
160
|
+
<tr><td>#{line}</td><th><code>#{loc}</code></th><td>#{msg}</td><td><pre>#{context}</pre></td></tr>
|
161
|
+
HTML
|
76
162
|
end
|
77
163
|
end
|
78
164
|
end
|
data/lib/utils/main.rb
CHANGED
@@ -8,12 +8,6 @@ require "csv"
|
|
8
8
|
module Metanorma
|
9
9
|
module Utils
|
10
10
|
class << self
|
11
|
-
def attr_code(attributes)
|
12
|
-
attributes.compact.transform_values do |v|
|
13
|
-
v.is_a?(String) ? HTMLEntities.new.decode(v) : v
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
11
|
# , " => ," : CSV definition does not deal with space followed by quote
|
18
12
|
# at start of field
|
19
13
|
def csv_split(text, delim = ";")
|
@@ -23,15 +17,6 @@ module Metanorma
|
|
23
17
|
col_sep: delim)&.compact&.map(&:strip)
|
24
18
|
end
|
25
19
|
|
26
|
-
# if the contents of node are blocks, output them to out;
|
27
|
-
# else, wrap them in <p>
|
28
|
-
def wrap_in_para(node, out)
|
29
|
-
if node.blocks? then out << node.content
|
30
|
-
else
|
31
|
-
out.p { |p| p << node.content }
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
20
|
def asciidoc_sub(text, flavour = :standoc)
|
36
21
|
return nil if text.nil?
|
37
22
|
return "" if text.empty?
|
@@ -141,29 +126,60 @@ module Metanorma
|
|
141
126
|
%w(Arab Aran Hebr).include? script
|
142
127
|
end
|
143
128
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
129
|
+
LONGSTR_THRESHOLD = 10
|
130
|
+
LONGSTR_NOPUNCT = 2
|
131
|
+
|
132
|
+
# break on punct every LONGSTRING_THRESHOLD chars, with zero width space
|
133
|
+
# if punct fails, try break on camel case, with soft hyphen
|
134
|
+
# break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
|
135
|
+
# with soft hyphen
|
136
|
+
def break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT)
|
137
|
+
/^\s*$/.match?(text) and return text
|
138
|
+
text.split(/(?=\s)/).map do |w|
|
139
|
+
if /^\s*$/.match(text) || (w.size < LONGSTR_THRESHOLD) then w
|
140
|
+
else
|
141
|
+
w.scan(/.{,#{LONGSTR_THRESHOLD}}/o).map.with_index do |w1, i|
|
142
|
+
w1.size < LONGSTR_THRESHOLD ? w1 : break_up_long_str1(w1, i + 1)
|
143
|
+
end.join
|
144
|
+
end
|
145
|
+
end.join
|
146
|
+
end
|
147
|
+
|
148
|
+
STR_BREAKUP_RE = %r{
|
149
|
+
(?<=[=_—–\u2009→?+;]) | # break after any of these
|
150
|
+
(?<=[,.:])(?!\d) | # break on punct only if not preceding digit
|
151
|
+
(?<=[>])(?![>]) | # > not >->
|
152
|
+
(?<=[\]])(?![\]]) | # ] not ]-]
|
153
|
+
(?<=//) | # //
|
154
|
+
(?<=[/])(?![/]) | # / not /-/
|
155
|
+
(?<![<])(?=[<]) | # < not <-<
|
156
|
+
(?<=\p{L})(?=[(\{\[]\p{L}) # letter and bracket, followed by letter
|
157
|
+
}x.freeze
|
158
|
+
|
159
|
+
CAMEL_CASE_RE = %r{
|
160
|
+
(?<=\p{Ll}\p{Ll})(?=\p{Lu}\p{Ll}\p{Ll}) # 2 lowerc / upperc, 2 lowerc
|
161
|
+
}x.freeze
|
162
|
+
|
163
|
+
def break_up_long_str1(text, iteration)
|
164
|
+
s, separator = break_up_long_str2(text)
|
165
|
+
if s.size == 1 # could not break up
|
166
|
+
(iteration % LONGSTR_NOPUNCT).zero? and
|
167
|
+
text += "\u00ad" # force soft hyphen
|
168
|
+
text
|
169
|
+
else
|
170
|
+
s[-1] = "#{separator}#{s[-1]}"
|
171
|
+
s.join
|
157
172
|
end
|
158
|
-
ins
|
159
173
|
end
|
160
174
|
|
161
|
-
def
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
175
|
+
def break_up_long_str2(text)
|
176
|
+
s = text.split(STR_BREAKUP_RE, -1)
|
177
|
+
separator = "\u200b"
|
178
|
+
if s.size == 1
|
179
|
+
s = text.split(CAMEL_CASE_RE)
|
180
|
+
separator = "\u00ad"
|
181
|
+
end
|
182
|
+
[s, separator]
|
167
183
|
end
|
168
184
|
end
|
169
185
|
end
|
data/lib/utils/version.rb
CHANGED
data/lib/utils/xml.rb
CHANGED
@@ -21,8 +21,15 @@ module Metanorma
|
|
21
21
|
HERE
|
22
22
|
|
23
23
|
class << self
|
24
|
+
def attr_code(attributes)
|
25
|
+
attributes.compact.transform_values do |v|
|
26
|
+
v.is_a?(String) ? HTMLEntities.new.decode(v) : v
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
24
30
|
def to_ncname(tag, asciionly: true)
|
25
|
-
asciionly and tag = HTMLEntities.new.encode(tag, :basic,
|
31
|
+
asciionly and tag = HTMLEntities.new.encode(tag, :basic,
|
32
|
+
:hexadecimal)
|
26
33
|
start = tag[0]
|
27
34
|
ret1 = if %r([#{NAMECHAR}#])o.match?(start)
|
28
35
|
"_"
|
@@ -88,12 +95,46 @@ module Metanorma
|
|
88
95
|
end.join
|
89
96
|
end
|
90
97
|
|
98
|
+
# if the contents of node are blocks, output them to out;
|
99
|
+
# else, wrap them in <p>
|
100
|
+
def wrap_in_para(node, out)
|
101
|
+
if node.blocks? then out << node.content
|
102
|
+
else
|
103
|
+
out.p { |p| p << node.content }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
91
107
|
# all element/attribute pairs that are ID anchors in Metanorma
|
92
108
|
def anchor_attributes
|
93
109
|
[%w[* id], %w[* bibitemid], %w[review from],
|
94
110
|
%w[review to], %w[index to], %w[xref target],
|
95
111
|
%w[callout target], %w[location target]]
|
96
112
|
end
|
113
|
+
|
114
|
+
# convert definition list term/value pair into Nokogiri XML attribute
|
115
|
+
def dl_to_attrs(elem, dlist, name)
|
116
|
+
e = dlist.at("./dt[text()='#{name}']") or return
|
117
|
+
val = e.at("./following::dd/p") || e.at("./following::dd") or return
|
118
|
+
elem[name] = val.text
|
119
|
+
end
|
120
|
+
|
121
|
+
# convert definition list term/value pairs into Nokogiri XML elements
|
122
|
+
def dl_to_elems(ins, elem, dlist, name)
|
123
|
+
a = elem.at("./#{name}[last()]")
|
124
|
+
ins = a if a
|
125
|
+
dlist.xpath("./dt[text()='#{name}']").each do |e|
|
126
|
+
ins = dl_to_elems1(e, name, ins)
|
127
|
+
end
|
128
|
+
ins
|
129
|
+
end
|
130
|
+
|
131
|
+
def dl_to_elems1(term, name, ins)
|
132
|
+
v = term.at("./following::dd")
|
133
|
+
e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
|
134
|
+
v.name = name
|
135
|
+
ins.next = v
|
136
|
+
ins.next
|
137
|
+
end
|
97
138
|
end
|
98
139
|
end
|
99
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metanorma-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09
|
11
|
+
date: 2023-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|