metanorma-utils 1.10.2 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/utils/cjk.rb +110 -0
- data/lib/utils/log.rb +10 -2
- data/lib/utils/main.rb +2 -4
- data/lib/utils/version.rb +1 -1
- data/lib/utils/xml.rb +13 -5
- data/metanorma-utils.gemspec +2 -1
- metadata +23 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb7eb442187f8f1129b1e5e0a6ae9e05f504522cb55c2a3f75b784fc803717cf
|
4
|
+
data.tar.gz: 6819e04fc69ba93d601ce0053905db9ab10cea6d1e4af39e51a6d3b283603c02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f21d1d2c7431b3de60715d50848d1a9ae6314c0dce3918a4db1d020e81928fff965148c8de4865186ab470e2242f775d35184ea212aa9c96f988a9ee32fc06bf
|
7
|
+
data.tar.gz: cc86b4e88d4cb126ac2d7824a996341db7194724ff95242bf17f4d2743a56552bb01486fb122e674901012cfe6a7f361acba60dd9504b98af7b8a8f7bbf3dbb8
|
data/lib/utils/cjk.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Utils
|
3
|
+
class << self
|
4
|
+
# Basic CJK scripts
|
5
|
+
HAN = "\\p{Han}".freeze
|
6
|
+
BOPOMOFO = "\\p{Bopomofo}".freeze
|
7
|
+
HANGUL = "\\p{Hangul}".freeze
|
8
|
+
HIRAGANA = "\\p{Hiragana}".freeze
|
9
|
+
KATAKANA = "\\p{Katakana}".freeze
|
10
|
+
|
11
|
+
# Script extensions - characters shared between scripts
|
12
|
+
|
13
|
+
# CJK Symbols and Punctuation (U+3000–U+303F)
|
14
|
+
# Used across all CJK scripts
|
15
|
+
CJK_SYMBOLS = "[\\u3000-\\u303F]".freeze
|
16
|
+
|
17
|
+
# CJK Punctuation (subset of CJK Symbols commonly used)
|
18
|
+
CJK_PUNCTUATION = "[\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F]".freeze
|
19
|
+
|
20
|
+
# Halfwidth and Fullwidth Forms (U+FF00–U+FFEF)
|
21
|
+
# Used in all CJK contexts
|
22
|
+
CJK_HALFWIDTH_FULLWIDTH = "[\\uFF00-\\uFFEF]".freeze
|
23
|
+
|
24
|
+
# CJK Compatibility Forms (U+FE30–U+FE4F)
|
25
|
+
# Primarily used with Han but relevant for all CJK
|
26
|
+
CJK_COMPAT = "[\\uFE30-\\uFE4F]".freeze
|
27
|
+
|
28
|
+
# Vertical Forms (U+FE10–U+FE1F)
|
29
|
+
# Used in vertical text layout for all CJK
|
30
|
+
CJK_VERTICAL = "[\\uFE10-\\uFE1F]".freeze
|
31
|
+
|
32
|
+
# Small Form Variants (U+FE50–U+FE6F)
|
33
|
+
# Used in all CJK contexts
|
34
|
+
CJK_SMALL_FORMS = "[\\uFE50-\\uFE6F]".freeze
|
35
|
+
|
36
|
+
# Ideographic Description Characters (U+2FF0–U+2FFF)
|
37
|
+
# Used with Han script
|
38
|
+
HAN_IDC = "[\\u2FF0-\\u2FFF]".freeze
|
39
|
+
|
40
|
+
# Kanbun (U+3190–U+319F)
|
41
|
+
# Used with Han script for Japanese
|
42
|
+
KANBUN = "[\\u3190-\\u319F]".freeze
|
43
|
+
|
44
|
+
# CJK Compatibility (U+3300–U+33FF)
|
45
|
+
# Used with Han script
|
46
|
+
CJK_COMPAT_IDEOGRAPHS = "[\\u3300-\\u33FF]".freeze
|
47
|
+
|
48
|
+
# CJK Compatibility Ideographs (U+F900–U+FAFF)
|
49
|
+
HAN_COMPAT_IDEOGRAPHS = "[\\uF900-\\uFAFF]".freeze
|
50
|
+
|
51
|
+
# Script extensions by primary script
|
52
|
+
HAN_EXTENSIONS = [
|
53
|
+
HAN,
|
54
|
+
CJK_SYMBOLS,
|
55
|
+
CJK_PUNCTUATION,
|
56
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
57
|
+
CJK_COMPAT,
|
58
|
+
CJK_VERTICAL,
|
59
|
+
CJK_SMALL_FORMS,
|
60
|
+
HAN_IDC,
|
61
|
+
KANBUN,
|
62
|
+
CJK_COMPAT_IDEOGRAPHS,
|
63
|
+
HAN_COMPAT_IDEOGRAPHS
|
64
|
+
].join("|").freeze
|
65
|
+
|
66
|
+
HANGUL_EXTENSIONS = [
|
67
|
+
HANGUL,
|
68
|
+
CJK_SYMBOLS,
|
69
|
+
CJK_PUNCTUATION,
|
70
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
71
|
+
CJK_VERTICAL,
|
72
|
+
CJK_SMALL_FORMS
|
73
|
+
].join("|").freeze
|
74
|
+
|
75
|
+
HIRAGANA_EXTENSIONS = [
|
76
|
+
HIRAGANA,
|
77
|
+
CJK_SYMBOLS,
|
78
|
+
CJK_PUNCTUATION,
|
79
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
80
|
+
CJK_VERTICAL,
|
81
|
+
CJK_SMALL_FORMS
|
82
|
+
].join("|").freeze
|
83
|
+
|
84
|
+
KATAKANA_EXTENSIONS = [
|
85
|
+
KATAKANA,
|
86
|
+
CJK_SYMBOLS,
|
87
|
+
CJK_PUNCTUATION,
|
88
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
89
|
+
CJK_VERTICAL,
|
90
|
+
CJK_SMALL_FORMS
|
91
|
+
].join("|").freeze
|
92
|
+
|
93
|
+
BOPOMOFO_EXTENSIONS = [
|
94
|
+
BOPOMOFO,
|
95
|
+
CJK_SYMBOLS,
|
96
|
+
CJK_PUNCTUATION,
|
97
|
+
CJK_HALFWIDTH_FULLWIDTH
|
98
|
+
].join("|").freeze
|
99
|
+
|
100
|
+
# Combined CJK pattern including all script extensions
|
101
|
+
CJK = [
|
102
|
+
HAN_EXTENSIONS,
|
103
|
+
HANGUL_EXTENSIONS,
|
104
|
+
HIRAGANA_EXTENSIONS,
|
105
|
+
KATAKANA_EXTENSIONS,
|
106
|
+
BOPOMOFO_EXTENSIONS
|
107
|
+
].join("|").freeze
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/utils/log.rb
CHANGED
@@ -43,6 +43,14 @@ module Metanorma
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
+
def messages
|
47
|
+
@log.values.each_with_object([]) do |v, m|
|
48
|
+
v.each do |e|
|
49
|
+
m << e
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
46
54
|
def suppress_log?(category, severity, msg)
|
47
55
|
category == "Relaton" && /^Fetching /.match?(msg) ||
|
48
56
|
@suppress_log[:severity] <= severity ||
|
@@ -78,7 +86,7 @@ module Metanorma
|
|
78
86
|
while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
|
79
87
|
node = node.parent
|
80
88
|
end
|
81
|
-
node.respond_to?(:parent) ? "ID #{node['id']}" : ""
|
89
|
+
node.respond_to?(:parent) ? "ID #{node['anchor'] || node['id']}" : ""
|
82
90
|
elsif node.is_a? String then node
|
83
91
|
elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
|
84
92
|
!node.lineno.empty?
|
@@ -216,7 +224,7 @@ module Metanorma
|
|
216
224
|
/^ID /.match?(loc) or return [loc, nil]
|
217
225
|
loc.sub!(/^ID /, "")
|
218
226
|
loc = @mapid[loc] while @mapid[loc]
|
219
|
-
url = "#{@htmlfilename}##{loc}"
|
227
|
+
url = "#{@htmlfilename}##{to_ncname loc}"
|
220
228
|
[loc, url]
|
221
229
|
end
|
222
230
|
|
data/lib/utils/main.rb
CHANGED
@@ -5,6 +5,7 @@ require "htmlentities"
|
|
5
5
|
require "nokogiri"
|
6
6
|
require "csv"
|
7
7
|
require_relative "../sterile/sterile"
|
8
|
+
require_relative "cjk"
|
8
9
|
|
9
10
|
module Metanorma
|
10
11
|
module Utils
|
@@ -35,9 +36,6 @@ module Metanorma
|
|
35
36
|
docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
|
36
37
|
end
|
37
38
|
|
38
|
-
CJK = "\\p{Han}|\\p{Bopomofo}|\\p{Hangul}|\\p{Hiragana}|\\p{Katakana}"
|
39
|
-
.freeze
|
40
|
-
|
41
39
|
# TODO needs internationalisation of quote
|
42
40
|
def smartformat(text)
|
43
41
|
ret = HTMLEntities.new.decode(
|
@@ -143,7 +141,7 @@ module Metanorma
|
|
143
141
|
nopunct = LONGSTR_NOPUNCT)
|
144
142
|
/^\s*$/.match?(text) and return text
|
145
143
|
text.split(/(?=(?:\s|-))/).map do |w|
|
146
|
-
if /^\s*$/.match(
|
144
|
+
if /^\s*$/.match(w) || (w.size < threshold) then w
|
147
145
|
else
|
148
146
|
w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
|
149
147
|
w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
|
data/lib/utils/version.rb
CHANGED
data/lib/utils/xml.rb
CHANGED
@@ -69,7 +69,7 @@ module Metanorma
|
|
69
69
|
cjk2 = /#{CJK}/o.match?(nextfirst)
|
70
70
|
text1 = /[^\p{Z}\p{C}]/.match?(last)
|
71
71
|
text2 = /[^\p{Z}\p{C}]/.match?(nextfirst)
|
72
|
-
|
72
|
+
cjk1 && (cjk2 || !text2) and next
|
73
73
|
!text1 && cjk2 and next
|
74
74
|
ret[i] += " "
|
75
75
|
end
|
@@ -125,10 +125,18 @@ module Metanorma
|
|
125
125
|
end
|
126
126
|
|
127
127
|
# all element/attribute pairs that are ID anchors in Metanorma
|
128
|
-
def anchor_attributes
|
129
|
-
[%w
|
130
|
-
|
131
|
-
|
128
|
+
def anchor_attributes(presxml: false)
|
129
|
+
ret = [%w(review from), %w(review to), %w(callout target), %w(xref to),
|
130
|
+
%w(eref bibitemid), %w(citation bibitemid), %w(xref target),
|
131
|
+
%w(label for), %w(location target), %w(index to),
|
132
|
+
%w(termsource bibitemid), %w(admonition target)]
|
133
|
+
ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
|
134
|
+
%w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
|
135
|
+
%w(fmt-xref-label container), %w(fmt-fn-body target),
|
136
|
+
%w(fmt-review-start source), %w(fmt-review-start end),
|
137
|
+
%w(fmt-review-start target), %w(fmt-review-end source),
|
138
|
+
%w(fmt-review-end start), %w(fmt-review-end target)]
|
139
|
+
presxml ? ret + ret1 : ret
|
132
140
|
end
|
133
141
|
|
134
142
|
# convert definition list term/value pair into Nokogiri XML attribute
|
data/metanorma-utils.gemspec
CHANGED
@@ -41,7 +41,8 @@ Gem::Specification.new do |spec|
|
|
41
41
|
spec.add_development_dependency "guard-rspec", "~> 4.7"
|
42
42
|
spec.add_development_dependency "rake", "~> 13.0"
|
43
43
|
spec.add_development_dependency "rspec", "~> 3.6"
|
44
|
-
spec.add_development_dependency "rubocop", "~> 1
|
44
|
+
spec.add_development_dependency "rubocop", "~> 1"
|
45
|
+
spec.add_development_dependency "rubocop-performance"
|
45
46
|
spec.add_development_dependency "simplecov", "~> 0.15"
|
46
47
|
spec.add_development_dependency "timecop", "~> 0.9"
|
47
48
|
spec.add_development_dependency "vcr", "~> 6.1.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metanorma-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|
@@ -212,14 +212,28 @@ dependencies:
|
|
212
212
|
requirements:
|
213
213
|
- - "~>"
|
214
214
|
- !ruby/object:Gem::Version
|
215
|
-
version: 1
|
215
|
+
version: '1'
|
216
216
|
type: :development
|
217
217
|
prerelease: false
|
218
218
|
version_requirements: !ruby/object:Gem::Requirement
|
219
219
|
requirements:
|
220
220
|
- - "~>"
|
221
221
|
- !ruby/object:Gem::Version
|
222
|
-
version: 1
|
222
|
+
version: '1'
|
223
|
+
- !ruby/object:Gem::Dependency
|
224
|
+
name: rubocop-performance
|
225
|
+
requirement: !ruby/object:Gem::Requirement
|
226
|
+
requirements:
|
227
|
+
- - ">="
|
228
|
+
- !ruby/object:Gem::Version
|
229
|
+
version: '0'
|
230
|
+
type: :development
|
231
|
+
prerelease: false
|
232
|
+
version_requirements: !ruby/object:Gem::Requirement
|
233
|
+
requirements:
|
234
|
+
- - ">="
|
235
|
+
- !ruby/object:Gem::Version
|
236
|
+
version: '0'
|
223
237
|
- !ruby/object:Gem::Dependency
|
224
238
|
name: simplecov
|
225
239
|
requirement: !ruby/object:Gem::Requirement
|
@@ -307,6 +321,7 @@ files:
|
|
307
321
|
- README.adoc
|
308
322
|
- lib/metanorma-utils.rb
|
309
323
|
- lib/sterile/sterile.rb
|
324
|
+
- lib/utils/cjk.rb
|
310
325
|
- lib/utils/hash_transform_keys.rb
|
311
326
|
- lib/utils/image.rb
|
312
327
|
- lib/utils/linestatus.rb
|
@@ -320,7 +335,7 @@ homepage: https://github.com/metanorma/metanorma-utils
|
|
320
335
|
licenses:
|
321
336
|
- BSD-2-Clause
|
322
337
|
metadata: {}
|
323
|
-
post_install_message:
|
338
|
+
post_install_message:
|
324
339
|
rdoc_options: []
|
325
340
|
require_paths:
|
326
341
|
- lib
|
@@ -335,8 +350,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
335
350
|
- !ruby/object:Gem::Version
|
336
351
|
version: '0'
|
337
352
|
requirements: []
|
338
|
-
rubygems_version: 3.
|
339
|
-
signing_key:
|
353
|
+
rubygems_version: 3.5.22
|
354
|
+
signing_key:
|
340
355
|
specification_version: 4
|
341
356
|
summary: metanorma-utils
|
342
357
|
test_files: []
|