metanorma-utils 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 50f2a20d06ec82c5937a8d16a79b62b854785c9bfd3722bba88cf40025297658
4
- data.tar.gz: d86977e61ba4c279304676b6108b62f238aad069dde209061bc7699fb8481c74
3
+ metadata.gz: 3234e3bc74a4ba02f150016c23db7e9b3d03386ae9f78ac42e3a64d78e31556e
4
+ data.tar.gz: 5ac7ae07a58d238b199f7e4cb9b79175e6ea0e99dfa5185358fffb579906de0f
5
5
  SHA512:
6
- metadata.gz: 703b2fc422a54647b6a88807df7f7e6a6da30e78a1579cc7682e4e0a9dee19ec0a6886f78520002865ffad4151749543c96ceca7ee74d72f6b6988c087a3ae81
7
- data.tar.gz: 7f74c2033f7e5cf5fff92c58540402db7d745ee111206da68f0b69e9c29beaf88f8b7c7a90e8ce9c10f2b77791865d8ba9909b8dd1447b7b5e7f1aa07f1b251d
6
+ metadata.gz: fc1e64c876f4d3bafd6996bd10421c810370ca3b512f702a1a09079a49d242773c90206f68a8f1187a574f13a7fbd9c7b4ec6dd122be18b9e7d6568785483066
7
+ data.tar.gz: dde8f1ea187e9cf6e9d80972c3b785c499a2c55fd6c8e9f3550e666a6b0e69ae711ba27c3e604d0c5e9f0fbe085a28ee45c240e5f969995497a5fd5b81c4aa59
data/lib/utils/log.rb CHANGED
@@ -1,30 +1,57 @@
1
+ require "htmlentities"
2
+
1
3
  module Metanorma
2
4
  module Utils
3
5
  class Log
6
+ attr_writer :xml
7
+
4
8
  def initialize
5
9
  @log = {}
10
+ @c = HTMLEntities.new
11
+ @mapid = {}
6
12
  end
7
13
 
8
14
  def add(category, loc, msg)
9
- return if @novalid
10
-
11
- @log[category] = [] unless @log[category]
12
- @log[category] << { location: current_location(loc), message: msg,
13
- context: context(loc) }
15
+ @novalid and return
16
+ @log[category] ||= []
17
+ item = create_entry(loc, msg)
18
+ @log[category] << item
14
19
  loc = loc.nil? ? "" : "(#{current_location(loc)}): "
15
20
  warn "#{category}: #{loc}#{msg}"
16
21
  end
17
22
 
23
+ def create_entry(loc, msg)
24
+ msg = msg.encode("UTF-8", invalid: :replace, undef: :replace)
25
+ item = { location: current_location(loc),
26
+ message: msg, context: context(loc), line: line(loc, msg) }
27
+ if item[:message].include?(" :: ")
28
+ a = item[:message].split(" :: ", 2)
29
+ item[:context] = a[1]
30
+ item[:message] = a[0]
31
+ end
32
+ item
33
+ end
34
+
18
35
  def current_location(node)
19
36
  if node.nil? then ""
37
+ elsif node.respond_to?(:id) && !node.id.nil? then "ID #{node.id}"
38
+ elsif node.respond_to?(:id) && node.id.nil? && node.respond_to?(:parent)
39
+ while !node.nil? && node.id.nil?
40
+ node = node.parent
41
+ end
42
+ node.nil? ? "" : "ID #{node.id}"
43
+ elsif node.respond_to?(:to_xml) && node.respond_to?(:parent)
44
+ while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
45
+ node = node.parent
46
+ end
47
+ node.respond_to?(:parent) ? "ID #{node['id']}" : ""
20
48
  elsif node.is_a? String then node
21
49
  elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
22
50
  !node.lineno.empty?
23
51
  "Asciidoctor Line #{'%06d' % node.lineno}"
24
52
  elsif node.respond_to?(:line) && !node.line.nil?
25
53
  "XML Line #{'%06d' % node.line}"
26
- elsif node.respond_to?(:id) && !node.id.nil? then "ID #{node.id}"
27
- else
54
+ elsif node.respond_to?(:parent)
28
55
  while !node.nil? &&
29
56
  (!node.respond_to?(:level) || node.level.positive?) &&
30
57
  (!node.respond_to?(:context) || node.context != :section)
@@ -33,6 +60,17 @@ module Metanorma
33
60
  node&.context == :section
34
61
  end
35
62
  "??"
63
+ else "??"
64
+ end
65
+ end
66
+
67
+ def line(node, msg)
68
+ if node.respond_to?(:line) && !node.line.nil?
69
+ "#{'%06d' % node.line}"
70
+ elsif /^XML Line /.match?(msg)
71
+ msg.sub(/^XML Line /, "").sub(/:.*$/, "")
72
+ else
73
+ "000000"
36
74
  end
37
75
  end
38
76
 
@@ -54,25 +92,73 @@ module Metanorma
54
92
  ret.to_xml
55
93
  end
56
94
 
95
+ def log_hdr(file)
96
+ <<~HTML
97
+ <html><head><title>#{file} errors</title>
98
+ <style> pre { white-space: pre-wrap; } </style>
99
+ </head><body><h1>#{file} errors</h1>
100
+ HTML
101
+ end
102
+
57
103
  def write(file)
104
+ @filename = file.sub(".err.html", ".html")
58
105
  File.open(file, "w:UTF-8") do |f|
59
- f.puts "#{file} errors"
60
- @log.each_key do |key|
61
- f.puts "\n\n== #{key}\n\n"
62
- @log[key].sort_by { |a| a[:location] }.each do |n|
63
- write1(f, n)
64
- end
65
- end
106
+ f.puts log_hdr(file)
107
+ @log.each_key { |key| write_key(f, key) }
108
+ f.puts "</body></html>\n"
66
109
  end
67
110
  end
68
111
 
112
+ def write_key(file, key)
113
+ file.puts <<~HTML
114
+ <h2>#{key}</h2>\n<table border="1">
115
+ <thead><th width="5%">Line</th><th width="20%">ID</th><th width="30%">Message</th><th width="45%">Context</th></thead>
116
+ <tbody>
117
+ HTML
118
+ @log[key].sort_by { |a| [a[:line], a[:location], a[:message]] }
119
+ .each do |n|
120
+ write1(file, n)
121
+ end
122
+ file.puts "</tbody></table>\n"
123
+ end
124
+
69
125
  def write1(file, entry)
70
- loc = entry[:location] ? "(#{entry[:location]}): " : ""
71
- file.puts "#{loc}#{entry[:message]}"
72
- .encode("UTF-8", invalid: :replace, undef: :replace)
73
- entry[:context]&.split(/\n/)&.first(5)&.each do |l|
74
- file.puts "\t#{l}"
126
+ line = entry[:line]
127
+ line = nil if line == "000000"
128
+ loc = loc_link(entry)
129
+ msg = break_up_long_str(entry[:message], 10, 2)
130
+ .gsub(/`([^`]+)`/, "<code>\\1</code>")
131
+ entry[:context] and context = entry[:context].split("\n").first(5)
132
+ .join("\n").gsub("><", "> <")
133
+ write_entry(file, line, loc, msg, context)
134
+ end
135
+
136
+ def mapid(old, new)
137
+ @mapid[old] = new
138
+ end
139
+
140
+ def loc_link(entry)
141
+ loc = entry[:location]
142
+ loc.nil? || loc.empty? and loc = "--"
143
+ if /^ID /.match?(loc)
144
+ loc.sub!(/^ID /, "")
145
+ loc = @mapid[loc] while @mapid[loc]
146
+ url = "#{@filename}##{loc}"
75
147
  end
148
+ loc &&= break_up_long_str(loc, 10, 2)
149
+ url and loc = "<a href='#{url}'>#{loc}</a>"
150
+ loc
151
+ end
152
+
153
+ def break_up_long_str(str, threshold, punct)
154
+ Metanorma::Utils.break_up_long_str(str, threshold, punct)
155
+ end
156
+
157
+ def write_entry(file, line, loc, msg, context)
158
+ context &&= @c.encode(break_up_long_str(context, 40, 2))
159
+ file.print <<~HTML
160
+ <tr><td>#{line}</td><th><code>#{loc}</code></th><td>#{msg}</td><td><pre>#{context}</pre></td></tr>
161
+ HTML
76
162
  end
77
163
  end
78
164
  end
data/lib/utils/main.rb CHANGED
@@ -8,12 +8,6 @@ require "csv"
8
8
  module Metanorma
9
9
  module Utils
10
10
  class << self
11
- def attr_code(attributes)
12
- attributes.compact.transform_values do |v|
13
- v.is_a?(String) ? HTMLEntities.new.decode(v) : v
14
- end
15
- end
16
-
17
11
  # , " => ," : CSV definition does not deal with space followed by quote
18
12
  # at start of field
19
13
  def csv_split(text, delim = ";")
@@ -23,15 +17,6 @@ module Metanorma
23
17
  col_sep: delim)&.compact&.map(&:strip)
24
18
  end
25
19
 
26
- # if the contents of node are blocks, output them to out;
27
- # else, wrap them in <p>
28
- def wrap_in_para(node, out)
29
- if node.blocks? then out << node.content
30
- else
31
- out.p { |p| p << node.content }
32
- end
33
- end
34
-
35
20
  def asciidoc_sub(text, flavour = :standoc)
36
21
  return nil if text.nil?
37
22
  return "" if text.empty?
@@ -141,29 +126,60 @@ module Metanorma
141
126
  %w(Arab Aran Hebr).include? script
142
127
  end
143
128
 
144
- # convert definition list term/value pair into Nokogiri XML attribute
145
- def dl_to_attrs(elem, dlist, name)
146
- e = dlist.at("./dt[text()='#{name}']") or return
147
- val = e.at("./following::dd/p") || e.at("./following::dd") or return
148
- elem[name] = val.text
149
- end
150
-
151
- # convert definition list term/value pairs into Nokogiri XML elements
152
- def dl_to_elems(ins, elem, dlist, name)
153
- a = elem.at("./#{name}[last()]")
154
- ins = a if a
155
- dlist.xpath("./dt[text()='#{name}']").each do |e|
156
- ins = dl_to_elems1(e, name, ins)
129
+ LONGSTR_THRESHOLD = 10
130
+ LONGSTR_NOPUNCT = 2
131
+
132
+ # break on punct every LONGSTRING_THRESHOLD chars, with zero width space
133
+ # if punct fails, try break on camel case, with soft hyphen
134
+ # break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
135
+ # with soft hyphen
136
+ def break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT)
137
+ /^\s*$/.match?(text) and return text
138
+ text.split(/(?=\s)/).map do |w|
139
+ if /^\s*$/.match(text) || (w.size < LONGSTR_THRESHOLD) then w
140
+ else
141
+ w.scan(/.{,#{LONGSTR_THRESHOLD}}/o).map.with_index do |w1, i|
142
+ w1.size < LONGSTR_THRESHOLD ? w1 : break_up_long_str1(w1, i + 1)
143
+ end.join
144
+ end
145
+ end.join
146
+ end
147
+
148
+ STR_BREAKUP_RE = %r{
149
+ (?<=[=_—–\u2009→?+;]) | # break after any of these
150
+ (?<=[,.:])(?!\d) | # break on punct only if not preceding digit
151
+ (?<=[>])(?![>]) | # > not >->
152
+ (?<=[\]])(?![\]]) | # ] not ]-]
153
+ (?<=//) | # //
154
+ (?<=[/])(?![/]) | # / not /-/
155
+ (?<![<])(?=[<]) | # < not <-<
156
+ (?<=\p{L})(?=[(\{\[]\p{L}) # letter and bracket, followed by letter
157
+ }x.freeze
158
+
159
+ CAMEL_CASE_RE = %r{
160
+ (?<=\p{Ll}\p{Ll})(?=\p{Lu}\p{Ll}\p{Ll}) # 2 lowerc / upperc, 2 lowerc
161
+ }x.freeze
162
+
163
+ def break_up_long_str1(text, iteration)
164
+ s, separator = break_up_long_str2(text)
165
+ if s.size == 1 # could not break up
166
+ (iteration % LONGSTR_NOPUNCT).zero? and
167
+ text += "\u00ad" # force soft hyphen
168
+ text
169
+ else
170
+ s[-1] = "#{separator}#{s[-1]}"
171
+ s.join
157
172
  end
158
- ins
159
173
  end
160
174
 
161
- def dl_to_elems1(term, name, ins)
162
- v = term.at("./following::dd")
163
- e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
164
- v.name = name
165
- ins.next = v
166
- ins.next
175
+ def break_up_long_str2(text)
176
+ s = text.split(STR_BREAKUP_RE, -1)
177
+ separator = "\u200b"
178
+ if s.size == 1
179
+ s = text.split(CAMEL_CASE_RE)
180
+ separator = "\u00ad"
181
+ end
182
+ [s, separator]
167
183
  end
168
184
  end
169
185
  end
data/lib/utils/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module Metanorma
2
2
  module Utils
3
- VERSION = "1.5.5".freeze
3
+ VERSION = "1.6.0".freeze
4
4
  end
5
5
  end
data/lib/utils/xml.rb CHANGED
@@ -21,8 +21,15 @@ module Metanorma
21
21
  HERE
22
22
 
23
23
  class << self
24
+ def attr_code(attributes)
25
+ attributes.compact.transform_values do |v|
26
+ v.is_a?(String) ? HTMLEntities.new.decode(v) : v
27
+ end
28
+ end
29
+
24
30
  def to_ncname(tag, asciionly: true)
25
- asciionly and tag = HTMLEntities.new.encode(tag, :basic, :hexadecimal)
31
+ asciionly and tag = HTMLEntities.new.encode(tag, :basic,
32
+ :hexadecimal)
26
33
  start = tag[0]
27
34
  ret1 = if %r([#{NAMECHAR}#])o.match?(start)
28
35
  "_"
@@ -88,12 +95,46 @@ module Metanorma
88
95
  end.join
89
96
  end
90
97
 
98
+ # if the contents of node are blocks, output them to out;
99
+ # else, wrap them in <p>
100
+ def wrap_in_para(node, out)
101
+ if node.blocks? then out << node.content
102
+ else
103
+ out.p { |p| p << node.content }
104
+ end
105
+ end
106
+
91
107
  # all element/attribute pairs that are ID anchors in Metanorma
92
108
  def anchor_attributes
93
109
  [%w[* id], %w[* bibitemid], %w[review from],
94
110
  %w[review to], %w[index to], %w[xref target],
95
111
  %w[callout target], %w[location target]]
96
112
  end
113
+
114
+ # convert definition list term/value pair into Nokogiri XML attribute
115
+ def dl_to_attrs(elem, dlist, name)
116
+ e = dlist.at("./dt[text()='#{name}']") or return
117
+ val = e.at("./following::dd/p") || e.at("./following::dd") or return
118
+ elem[name] = val.text
119
+ end
120
+
121
+ # convert definition list term/value pairs into Nokogiri XML elements
122
+ def dl_to_elems(ins, elem, dlist, name)
123
+ a = elem.at("./#{name}[last()]")
124
+ ins = a if a
125
+ dlist.xpath("./dt[text()='#{name}']").each do |e|
126
+ ins = dl_to_elems1(e, name, ins)
127
+ end
128
+ ins
129
+ end
130
+
131
+ def dl_to_elems1(term, name, ins)
132
+ v = term.at("./following::dd")
133
+ e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
134
+ v.name = name
135
+ ins.next = v
136
+ ins.next
137
+ end
97
138
  end
98
139
  end
99
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.5
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-28 00:00:00.000000000 Z
11
+ date: 2023-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor