metanorma-utils 1.5.5 → 1.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 50f2a20d06ec82c5937a8d16a79b62b854785c9bfd3722bba88cf40025297658
4
- data.tar.gz: d86977e61ba4c279304676b6108b62f238aad069dde209061bc7699fb8481c74
3
+ metadata.gz: 4a36ca58dc687a7608f94df44a4f977a7571a687c196217f743af012f05e25b8
4
+ data.tar.gz: 6a3f38cf066b108a4ace75fa1946b696382963e63212567001eddabaa4a0dfe0
5
5
  SHA512:
6
- metadata.gz: 703b2fc422a54647b6a88807df7f7e6a6da30e78a1579cc7682e4e0a9dee19ec0a6886f78520002865ffad4151749543c96ceca7ee74d72f6b6988c087a3ae81
7
- data.tar.gz: 7f74c2033f7e5cf5fff92c58540402db7d745ee111206da68f0b69e9c29beaf88f8b7c7a90e8ce9c10f2b77791865d8ba9909b8dd1447b7b5e7f1aa07f1b251d
6
+ metadata.gz: 98a4fce07065b0b9c03a78d140c64313608ac89bf2f05d93d09f50b196bed93dfc83c54ddcb62474fbd965f22430ad4c5fbba975eb1442796e1e478f10d9433e
7
+ data.tar.gz: 890a86a2c8c1835e7a4d0bf41cc6b1f77667150b890a1340eaf1d1a8290c8a33560a0ec111af269771fddc0bfddca66c57902c5e7ba7faf1fbbb3f8907c6c24c
data/lib/utils/log.rb CHANGED
@@ -1,30 +1,57 @@
1
+ require "htmlentities"
2
+
1
3
  module Metanorma
2
4
  module Utils
3
5
  class Log
6
+ attr_writer :xml
7
+
4
8
  def initialize
5
9
  @log = {}
10
+ @c = HTMLEntities.new
11
+ @mapid = {}
6
12
  end
7
13
 
8
14
  def add(category, loc, msg)
9
- return if @novalid
10
-
11
- @log[category] = [] unless @log[category]
12
- @log[category] << { location: current_location(loc), message: msg,
13
- context: context(loc) }
15
+ @novalid and return
16
+ @log[category] ||= []
17
+ item = create_entry(loc, msg)
18
+ @log[category] << item
14
19
  loc = loc.nil? ? "" : "(#{current_location(loc)}): "
15
20
  warn "#{category}: #{loc}#{msg}"
16
21
  end
17
22
 
23
+ def create_entry(loc, msg)
24
+ msg = msg.encode("UTF-8", invalid: :replace, undef: :replace)
25
+ item = { location: current_location(loc),
26
+ message: msg, context: context(loc), line: line(loc, msg) }
27
+ if item[:message].include?(" :: ")
28
+ a = item[:message].split(" :: ", 2)
29
+ item[:context] = a[1]
30
+ item[:message] = a[0]
31
+ end
32
+ item
33
+ end
34
+
18
35
  def current_location(node)
19
36
  if node.nil? then ""
37
+ elsif node.respond_to?(:id) && !node.id.nil? then "ID #{node.id}"
38
+ elsif node.respond_to?(:id) && node.id.nil? && node.respond_to?(:parent)
39
+ while !node.nil? && node.id.nil?
40
+ node = node.parent
41
+ end
42
+ node.nil? ? "" : "ID #{node.id}"
43
+ elsif node.respond_to?(:to_xml) && node.respond_to?(:parent)
44
+ while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
45
+ node = node.parent
46
+ end
47
+ node.respond_to?(:parent) ? "ID #{node['id']}" : ""
20
48
  elsif node.is_a? String then node
21
49
  elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
22
50
  !node.lineno.empty?
23
51
  "Asciidoctor Line #{'%06d' % node.lineno}"
24
52
  elsif node.respond_to?(:line) && !node.line.nil?
25
53
  "XML Line #{'%06d' % node.line}"
26
- elsif node.respond_to?(:id) && !node.id.nil? then "ID #{node.id}"
27
- else
54
+ elsif node.respond_to?(:parent)
28
55
  while !node.nil? &&
29
56
  (!node.respond_to?(:level) || node.level.positive?) &&
30
57
  (!node.respond_to?(:context) || node.context != :section)
@@ -33,6 +60,17 @@ module Metanorma
33
60
  node&.context == :section
34
61
  end
35
62
  "??"
63
+ else "??"
64
+ end
65
+ end
66
+
67
+ def line(node, msg)
68
+ if node.respond_to?(:line) && !node.line.nil?
69
+ "#{'%06d' % node.line}"
70
+ elsif /^XML Line /.match?(msg)
71
+ msg.sub(/^XML Line /, "").sub(/:.*$/, "")
72
+ else
73
+ "000000"
36
74
  end
37
75
  end
38
76
 
@@ -54,25 +92,73 @@ module Metanorma
54
92
  ret.to_xml
55
93
  end
56
94
 
95
+ def log_hdr(file)
96
+ <<~HTML
97
+ <html><head><title>#{file} errors</title>
98
+ <style> pre { white-space: pre-wrap; } </style>
99
+ </head><body><h1>#{file} errors</h1>
100
+ HTML
101
+ end
102
+
57
103
  def write(file)
104
+ @filename = file.sub(".err.html", ".html")
58
105
  File.open(file, "w:UTF-8") do |f|
59
- f.puts "#{file} errors"
60
- @log.each_key do |key|
61
- f.puts "\n\n== #{key}\n\n"
62
- @log[key].sort_by { |a| a[:location] }.each do |n|
63
- write1(f, n)
64
- end
65
- end
106
+ f.puts log_hdr(file)
107
+ @log.each_key { |key| write_key(f, key) }
108
+ f.puts "</body></html>\n"
66
109
  end
67
110
  end
68
111
 
112
+ def write_key(file, key)
113
+ file.puts <<~HTML
114
+ <h2>#{key}</h2>\n<table border="1">
115
+ <thead><th width="5%">Line</th><th width="20%">ID</th><th width="30%">Message</th><th width="45%">Context</th></thead>
116
+ <tbody>
117
+ HTML
118
+ @log[key].sort_by { |a| [a[:line], a[:location], a[:message]] }
119
+ .each do |n|
120
+ write1(file, n)
121
+ end
122
+ file.puts "</tbody></table>\n"
123
+ end
124
+
69
125
  def write1(file, entry)
70
- loc = entry[:location] ? "(#{entry[:location]}): " : ""
71
- file.puts "#{loc}#{entry[:message]}"
72
- .encode("UTF-8", invalid: :replace, undef: :replace)
73
- entry[:context]&.split(/\n/)&.first(5)&.each do |l|
74
- file.puts "\t#{l}"
126
+ line = entry[:line]
127
+ line = nil if line == "000000"
128
+ loc = loc_link(entry)
129
+ msg = break_up_long_str(entry[:message], 10, 2)
130
+ .gsub(/`([^`]+)`/, "<code>\\1</code>")
131
+ entry[:context] and context = entry[:context].split("\n").first(5)
132
+ .join("\n").gsub("><", "> <")
133
+ write_entry(file, line, loc, msg, context)
134
+ end
135
+
136
+ def mapid(old, new)
137
+ @mapid[old] = new
138
+ end
139
+
140
+ def loc_link(entry)
141
+ loc = entry[:location]
142
+ loc.nil? || loc.empty? and loc = "--"
143
+ if /^ID /.match?(loc)
144
+ loc.sub!(/^ID /, "")
145
+ loc = @mapid[loc] while @mapid[loc]
146
+ url = "#{@filename}##{loc}"
75
147
  end
148
+ loc &&= break_up_long_str(loc, 10, 2)
149
+ url and loc = "<a href='#{url}'>#{loc}</a>"
150
+ loc
151
+ end
152
+
153
+ def break_up_long_str(str, threshold, punct)
154
+ Metanorma::Utils.break_up_long_str(str, threshold, punct)
155
+ end
156
+
157
+ def write_entry(file, line, loc, msg, context)
158
+ context &&= @c.encode(break_up_long_str(context, 40, 2))
159
+ file.print <<~HTML
160
+ <tr><td>#{line}</td><th><code>#{loc}</code></th><td>#{msg}</td><td><pre>#{context}</pre></td></tr>
161
+ HTML
76
162
  end
77
163
  end
78
164
  end
data/lib/utils/main.rb CHANGED
@@ -8,12 +8,6 @@ require "csv"
8
8
  module Metanorma
9
9
  module Utils
10
10
  class << self
11
- def attr_code(attributes)
12
- attributes.compact.transform_values do |v|
13
- v.is_a?(String) ? HTMLEntities.new.decode(v) : v
14
- end
15
- end
16
-
17
11
  # , " => ," : CSV definition does not deal with space followed by quote
18
12
  # at start of field
19
13
  def csv_split(text, delim = ";")
@@ -23,15 +17,6 @@ module Metanorma
23
17
  col_sep: delim)&.compact&.map(&:strip)
24
18
  end
25
19
 
26
- # if the contents of node are blocks, output them to out;
27
- # else, wrap them in <p>
28
- def wrap_in_para(node, out)
29
- if node.blocks? then out << node.content
30
- else
31
- out.p { |p| p << node.content }
32
- end
33
- end
34
-
35
20
  def asciidoc_sub(text, flavour = :standoc)
36
21
  return nil if text.nil?
37
22
  return "" if text.empty?
@@ -141,29 +126,60 @@ module Metanorma
141
126
  %w(Arab Aran Hebr).include? script
142
127
  end
143
128
 
144
- # convert definition list term/value pair into Nokogiri XML attribute
145
- def dl_to_attrs(elem, dlist, name)
146
- e = dlist.at("./dt[text()='#{name}']") or return
147
- val = e.at("./following::dd/p") || e.at("./following::dd") or return
148
- elem[name] = val.text
149
- end
150
-
151
- # convert definition list term/value pairs into Nokogiri XML elements
152
- def dl_to_elems(ins, elem, dlist, name)
153
- a = elem.at("./#{name}[last()]")
154
- ins = a if a
155
- dlist.xpath("./dt[text()='#{name}']").each do |e|
156
- ins = dl_to_elems1(e, name, ins)
129
+ LONGSTR_THRESHOLD = 10
130
+ LONGSTR_NOPUNCT = 2
131
+
132
+ # break on punct every LONGSTRING_THRESHOLD chars, with zero width space
133
+ # if punct fails, try break on camel case, with soft hyphen
134
+ # break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
135
+ # with soft hyphen
136
+ def break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT)
137
+ /^\s*$/.match?(text) and return text
138
+ text.split(/(?=\s)/).map do |w|
139
+ if /^\s*$/.match(text) || (w.size < threshold) then w
140
+ else
141
+ w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
142
+ w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
143
+ end.join
144
+ end
145
+ end.join
146
+ end
147
+
148
+ STR_BREAKUP_RE = %r{
149
+ (?<=[=_—–\u2009→?+;]) | # break after any of these
150
+ (?<=[,.:])(?!\d) | # break on punct only if not preceding digit
151
+ (?<=[>])(?![>]) | # > not >->
152
+ (?<=[\]])(?![\]]) | # ] not ]-]
153
+ (?<=//) | # //
154
+ (?<=[/])(?![/]) | # / not /-/
155
+ (?<![<])(?=[<]) | # < not <-<
156
+ (?<=\p{L})(?=[(\{\[]\p{L}) # letter and bracket, followed by letter
157
+ }x.freeze
158
+
159
+ CAMEL_CASE_RE = %r{
160
+ (?<=\p{Ll}\p{Ll})(?=\p{Lu}\p{Ll}\p{Ll}) # 2 lowerc / upperc, 2 lowerc
161
+ }x.freeze
162
+
163
+ def break_up_long_str1(text, iteration, nopunct)
164
+ s, separator = break_up_long_str2(text)
165
+ if s.size == 1 # could not break up
166
+ (iteration % nopunct).zero? and
167
+ text += "\u00ad" # force soft hyphen
168
+ text
169
+ else
170
+ s[-1] = "#{separator}#{s[-1]}"
171
+ s.join
157
172
  end
158
- ins
159
173
  end
160
174
 
161
- def dl_to_elems1(term, name, ins)
162
- v = term.at("./following::dd")
163
- e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
164
- v.name = name
165
- ins.next = v
166
- ins.next
175
+ def break_up_long_str2(text)
176
+ s = text.split(STR_BREAKUP_RE, -1)
177
+ separator = "\u200b"
178
+ if s.size == 1
179
+ s = text.split(CAMEL_CASE_RE)
180
+ separator = "\u00ad"
181
+ end
182
+ [s, separator]
167
183
  end
168
184
  end
169
185
  end
data/lib/utils/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module Metanorma
2
2
  module Utils
3
- VERSION = "1.5.5".freeze
3
+ VERSION = "1.6.1".freeze
4
4
  end
5
5
  end
data/lib/utils/xml.rb CHANGED
@@ -21,8 +21,15 @@ module Metanorma
21
21
  HERE
22
22
 
23
23
  class << self
24
+ def attr_code(attributes)
25
+ attributes.compact.transform_values do |v|
26
+ v.is_a?(String) ? HTMLEntities.new.decode(v) : v
27
+ end
28
+ end
29
+
24
30
  def to_ncname(tag, asciionly: true)
25
- asciionly and tag = HTMLEntities.new.encode(tag, :basic, :hexadecimal)
31
+ asciionly and tag = HTMLEntities.new.encode(tag, :basic,
32
+ :hexadecimal)
26
33
  start = tag[0]
27
34
  ret1 = if %r([#{NAMECHAR}#])o.match?(start)
28
35
  "_"
@@ -88,12 +95,46 @@ module Metanorma
88
95
  end.join
89
96
  end
90
97
 
98
+ # if the contents of node are blocks, output them to out;
99
+ # else, wrap them in <p>
100
+ def wrap_in_para(node, out)
101
+ if node.blocks? then out << node.content
102
+ else
103
+ out.p { |p| p << node.content }
104
+ end
105
+ end
106
+
91
107
  # all element/attribute pairs that are ID anchors in Metanorma
92
108
  def anchor_attributes
93
109
  [%w[* id], %w[* bibitemid], %w[review from],
94
110
  %w[review to], %w[index to], %w[xref target],
95
111
  %w[callout target], %w[location target]]
96
112
  end
113
+
114
+ # convert definition list term/value pair into Nokogiri XML attribute
115
+ def dl_to_attrs(elem, dlist, name)
116
+ e = dlist.at("./dt[text()='#{name}']") or return
117
+ val = e.at("./following::dd/p") || e.at("./following::dd") or return
118
+ elem[name] = val.text
119
+ end
120
+
121
+ # convert definition list term/value pairs into Nokogiri XML elements
122
+ def dl_to_elems(ins, elem, dlist, name)
123
+ a = elem.at("./#{name}[last()]")
124
+ ins = a if a
125
+ dlist.xpath("./dt[text()='#{name}']").each do |e|
126
+ ins = dl_to_elems1(e, name, ins)
127
+ end
128
+ ins
129
+ end
130
+
131
+ def dl_to_elems1(term, name, ins)
132
+ v = term.at("./following::dd")
133
+ e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
134
+ v.name = name
135
+ ins.next = v
136
+ ins.next
137
+ end
97
138
  end
98
139
  end
99
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.5
4
+ version: 1.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-28 00:00:00.000000000 Z
11
+ date: 2023-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor