metanorma-utils 1.11.5 → 1.11.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/metanorma-utils.rb +1 -0
- data/lib/utils/anchor.rb +145 -0
- data/lib/utils/version.rb +1 -1
- data/lib/utils/xml.rb +15 -131
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aad2ada58dc0997b429f6bf703c4e1495b6a2e036477ceb757048ec38995b32c
|
4
|
+
data.tar.gz: 32c86b68d932fa0dba31c0fc41931632b2a3b8cc0e964c5f6b95e7b8d3e3cd27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be9ad9ecf0e96cb3999c53fbf205bfa624c4f1ce6bd2ad0ae1e90b0c8541d5f81a24db8b702471a778464dac86fdd6f299a739c04fd8de56aa6ff478717507cc
|
7
|
+
data.tar.gz: 056d8756d3fe2788550e795693a621dfca4a716280f62aa2d74530628a40cd3c3071b24a7105528285411afdfbd9ed9444e3cf00c190c02352e6595fa8bd554f
|
data/lib/metanorma-utils.rb
CHANGED
data/lib/utils/anchor.rb
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
require "uuidtools"
|
2
|
+
require "htmlentities"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Utils
|
6
|
+
class << self
|
7
|
+
# Following XML requirements for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
|
8
|
+
BASECHAR = "A-Za-z\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{FF}\u{100}-\u{131}\u{134}-\u{13E}" \
|
9
|
+
"\u{141}-\u{148}\u{14A}-\u{17E}\u{180}-\u{1C3}\u{1CD}-\u{1F0}\u{1F4}-\u{1F5}" \
|
10
|
+
"\u{1FA}-\u{217}\u{250}-\u{2A8}\u{2BB}-\u{2C1}\u{386}\u{388}-\u{38A}\u{38C}" \
|
11
|
+
"\u{38E}-\u{3A1}\u{3A3}-\u{3CE}\u{3D0}-\u{3D6}\u{3DA}\u{3DC}\u{3DE}\u{3E0}" \
|
12
|
+
"\u{3E2}-\u{3F3}\u{401}-\u{40C}\u{40E}-\u{44F}\u{451}-\u{45C}\u{45E}-\u{481}" \
|
13
|
+
"\u{490}-\u{4C4}\u{4C7}-\u{4C8}\u{4CB}-\u{4CC}\u{4D0}-\u{4EB}\u{4EE}-\u{4F5}" \
|
14
|
+
"\u{4F8}-\u{4F9}\u{531}-\u{556}\u{559}\u{561}-\u{586}\u{5D0}-\u{5EA}" \
|
15
|
+
"\u{5F0}-\u{5F2}\u{621}-\u{63A}\u{641}-\u{64A}\u{671}-\u{6B7}\u{6BA}-\u{6BE}" \
|
16
|
+
"\u{6C0}-\u{6CE}\u{6D0}-\u{6D3}\u{6D5}\u{6E5}-\u{6E6}\u{905}-\u{939}\u{93D}" \
|
17
|
+
"\u{958}-\u{961}\u{985}-\u{98C}\u{98F}-\u{990}\u{993}-\u{9A8}\u{9AA}-\u{9B0}" \
|
18
|
+
"\u{9B2}\u{9B6}-\u{9B9}\u{9DC}-\u{9DD}\u{9DF}-\u{9E1}\u{9F0}-\u{9F1}" \
|
19
|
+
"\u{A05}-\u{A0A}\u{A0F}-\u{A10}\u{A13}-\u{A28}\u{A2A}-\u{A30}\u{A32}-\u{A33}" \
|
20
|
+
"\u{A35}-\u{A36}\u{A38}-\u{A39}\u{A59}-\u{A5C}\u{A5E}\u{A72}-\u{A74}" \
|
21
|
+
"\u{A85}-\u{A8B}\u{A8D}\u{A8F}-\u{A91}\u{A93}-\u{AA8}\u{AAA}-\u{AB0}" \
|
22
|
+
"\u{AB2}-\u{AB3}\u{AB5}-\u{AB9}\u{ABD}\u{AE0}\u{B05}-\u{B0C}\u{B0F}-\u{B10}" \
|
23
|
+
"\u{B13}-\u{B28}\u{B2A}-\u{B30}\u{B32}-\u{B33}\u{B36}-\u{B39}\u{B3D}" \
|
24
|
+
"\u{B5C}-\u{B5D}\u{B5F}-\u{B61}\u{B85}-\u{B8A}\u{B8E}-\u{B90}\u{B92}-\u{B95}" \
|
25
|
+
"\u{B99}-\u{B9A}\u{B9C}\u{B9E}-\u{B9F}\u{BA3}-\u{BA4}\u{BA8}-\u{BAA}" \
|
26
|
+
"\u{BAE}-\u{BB5}\u{BB7}-\u{BB9}\u{C05}-\u{C0C}\u{C0E}-\u{C10}\u{C12}-\u{C28}" \
|
27
|
+
"\u{C2A}-\u{C33}\u{C35}-\u{C39}\u{C60}-\u{C61}\u{C85}-\u{C8C}\u{C8E}-\u{C90}" \
|
28
|
+
"\u{C92}-\u{CA8}\u{CAA}-\u{CB3}\u{CB5}-\u{CB9}\u{CDE}\u{CE0}-\u{CE1}" \
|
29
|
+
"\u{D05}-\u{D0C}\u{D0E}-\u{D10}\u{D12}-\u{D28}\u{D2A}-\u{D39}\u{D60}-\u{D61}" \
|
30
|
+
"\u{E01}-\u{E2E}\u{E30}\u{E32}-\u{E33}\u{E40}-\u{E45}\u{E81}-\u{E82}\u{E84}" \
|
31
|
+
"\u{E87}-\u{E88}\u{E8A}\u{E8D}\u{E94}-\u{E97}\u{E99}-\u{E9F}\u{EA1}-\u{EA3}" \
|
32
|
+
"\u{EA5}\u{EA7}\u{EAA}-\u{EAB}\u{EAD}-\u{EAE}\u{EB0}\u{EB2}-\u{EB3}\u{EBD}" \
|
33
|
+
"\u{EC0}-\u{EC4}\u{F40}-\u{F47}\u{F49}-\u{F69}\u{10A0}-\u{10C5}\u{10D0}-\u{10F6}" \
|
34
|
+
"\u{1100}\u{1102}-\u{1103}\u{1105}-\u{1107}\u{1109}\u{110B}-\u{110C}" \
|
35
|
+
"\u{110E}-\u{1112}\u{113C}\u{113E}\u{1140}\u{114C}\u{114E}\u{1150}" \
|
36
|
+
"\u{1154}-\u{1155}\u{1159}\u{115F}-\u{1161}\u{1163}\u{1165}\u{1167}\u{1169}" \
|
37
|
+
"\u{116D}-\u{116E}\u{1172}-\u{1173}\u{1175}\u{119E}\u{11A8}\u{11AB}" \
|
38
|
+
"\u{11AE}-\u{11AF}\u{11B7}-\u{11B8}\u{11BA}\u{11BC}-\u{11C2}\u{11EB}\u{11F0}" \
|
39
|
+
"\u{11F9}\u{1E00}-\u{1E9B}\u{1EA0}-\u{1EF9}\u{1F00}-\u{1F15}\u{1F18}-\u{1F1D}" \
|
40
|
+
"\u{1F20}-\u{1F45}\u{1F48}-\u{1F4D}\u{1F50}-\u{1F57}\u{1F59}\u{1F5B}\u{1F5D}" \
|
41
|
+
"\u{1F5F}-\u{1F7D}\u{1F80}-\u{1FB4}\u{1FB6}-\u{1FBC}\u{1FBE}\u{1FC2}-\u{1FC4}" \
|
42
|
+
"\u{1FC6}-\u{1FCC}\u{1FD0}-\u{1FD3}\u{1FD6}-\u{1FDB}\u{1FE0}-\u{1FEC}" \
|
43
|
+
"\u{1FF2}-\u{1FF4}\u{1FF6}-\u{1FFC}\u{2126}\u{212A}-\u{212B}\u{212E}" \
|
44
|
+
"\u{2180}-\u{2182}\u{3041}-\u{3094}\u{30A1}-\u{30FA}\u{3105}-\u{312C}" \
|
45
|
+
"\u{AC00}-\u{D7A3}".freeze
|
46
|
+
IDEOGRAPHIC = "\u{4E00}-\u{9FA5}\u{3007}\u{3021}-\u{3029}".freeze
|
47
|
+
LETTER = "#{BASECHAR}#{IDEOGRAPHIC}".freeze
|
48
|
+
DIGIT = "0-9\u{0660}-\u{0669}\u{06F0}-\u{06F9}\u{0966}-\u{096F}\u{09E6}-\u{09EF}" \
|
49
|
+
"\u{0A66}-\u{0A6F}\u{0AE6}-\u{0AEF}\u{0B66}-\u{0B6F}\u{0BE7}-\u{0BEF}" \
|
50
|
+
"\u{0C66}-\u{0C6F}\u{0CE6}-\u{0CEF}\u{0D66}-\u{0D6F}\u{0E50}-\u{0E59}" \
|
51
|
+
"\u{0ED0}-\u{0ED9}\u{0F20}-\u{0F29}".freeze
|
52
|
+
COMBINING_CHAR = "\u{0300}-\u{0345}\u{0360}-\u{0361}\u{0483}-\u{0486}\u{0591}-\u{05A1}" \
|
53
|
+
"\u{05A3}-\u{05B9}\u{05BB}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}" \
|
54
|
+
"\u{064B}-\u{0652}\u{0670}\u{06D6}-\u{06DC}\u{06DD}-\u{06DF}" \
|
55
|
+
"\u{06E0}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{0901}-\u{0903}" \
|
56
|
+
"\u{093C}\u{093E}-\u{094C}\u{094D}\u{0951}-\u{0954}\u{0962}-\u{0963}" \
|
57
|
+
"\u{0981}-\u{0983}\u{09BC}\u{09BE}\u{09BF}\u{09C0}-\u{09C4}" \
|
58
|
+
"\u{09C7}-\u{09C8}\u{09CB}-\u{09CD}\u{09D7}\u{09E2}-\u{09E3}\u{0A02}" \
|
59
|
+
"\u{0A3C}\u{0A3E}\u{0A3F}\u{0A40}-\u{0A42}\u{0A47}-\u{0A48}" \
|
60
|
+
"\u{0A4B}-\u{0A4D}\u{0A70}-\u{0A71}\u{0A81}-\u{0A83}\u{0ABC}" \
|
61
|
+
"\u{0ABE}-\u{0AC5}\u{0AC7}-\u{0AC9}\u{0ACB}-\u{0ACD}\u{0B01}-\u{0B03}" \
|
62
|
+
"\u{0B3C}\u{0B3E}-\u{0B43}\u{0B47}-\u{0B48}\u{0B4B}-\u{0B4D}" \
|
63
|
+
"\u{0B56}-\u{0B57}\u{0B82}-\u{0B83}\u{0BBE}-\u{0BC2}\u{0BC6}-\u{0BC8}" \
|
64
|
+
"\u{0BCA}-\u{0BCD}\u{0BD7}\u{0C01}-\u{0C03}\u{0C3E}-\u{0C44}" \
|
65
|
+
"\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C82}-\u{0C83}" \
|
66
|
+
"\u{0CBE}-\u{0CC4}\u{0CC6}-\u{0CC8}\u{0CCA}-\u{0CCD}\u{0CD5}-\u{0CD6}" \
|
67
|
+
"\u{0D02}-\u{0D03}\u{0D3E}-\u{0D43}\u{0D46}-\u{0D48}\u{0D4A}-\u{0D4D}" \
|
68
|
+
"\u{0D57}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}" \
|
69
|
+
"\u{0EB4}-\u{0EB9}\u{0EBB}-\u{0EBC}\u{0EC8}-\u{0ECD}\u{0F18}-\u{0F19}" \
|
70
|
+
"\u{0F35}\u{0F37}\u{0F39}\u{0F3E}\u{0F3F}\u{0F71}-\u{0F84}" \
|
71
|
+
"\u{0F86}-\u{0F8B}\u{0F90}-\u{0F95}\u{0F97}\u{0F99}-\u{0FAD}" \
|
72
|
+
"\u{0FB1}-\u{0FB7}\u{0FB9}\u{20D0}-\u{20DC}\u{20E1}\u{302A}-\u{302F}" \
|
73
|
+
"\u{3099}\u{309A}".freeze
|
74
|
+
EXTENDER = "\u{00B7}\u{02D0}\u{02D1}\u{0387}\u{0640}\u{0E46}\u{0EC6}\u{3005}" \
|
75
|
+
"\u{3031}-\u{3035}\u{309D}-\u{309E}\u{30FC}-\u{30FE}".freeze
|
76
|
+
|
77
|
+
# NCName specific constants - NCName is "an XML Name, minus the :"
|
78
|
+
# NCName = (Letter | '_') (NCNameChar)*
|
79
|
+
NCNAME_START_CHAR = "#{LETTER}_".freeze
|
80
|
+
NCNAME_CHAR = "#{LETTER}#{DIGIT}._\\-#{COMBINING_CHAR}#{EXTENDER}".freeze
|
81
|
+
INVALID_NCNAME_START_REGEXP = /[^#{NCNAME_START_CHAR}]/.freeze
|
82
|
+
INVALID_NCNAME_CHAR_REGEXP = /[^#{NCNAME_CHAR}]/.freeze
|
83
|
+
SAFE_NCNAME_REGEXP = /\A[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*\z/.freeze
|
84
|
+
NCNAME_INVALID = "_".freeze
|
85
|
+
|
86
|
+
# A utility method for escaping XML NCNames (XML Names without colons).
|
87
|
+
#
|
88
|
+
# to_ncname('1 < 2 & 3')
|
89
|
+
# # => "1___2___3"
|
90
|
+
#
|
91
|
+
# It follows the requirements of the specification for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
|
92
|
+
# NCName is "an XML Name, minus the :"
|
93
|
+
def to_ncname(name, asciionly: false)
|
94
|
+
name, valid = to_ncname_prep(name, asciionly)
|
95
|
+
valid and return name
|
96
|
+
starting_char = name[0]
|
97
|
+
starting_char.gsub!(INVALID_NCNAME_START_REGEXP, NCNAME_INVALID)
|
98
|
+
name.size == 1 and return starting_char
|
99
|
+
following_chars = name[1..-1]
|
100
|
+
following_chars.gsub!(INVALID_NCNAME_CHAR_REGEXP, NCNAME_INVALID)
|
101
|
+
following_chars.gsub!(":", NCNAME_INVALID)
|
102
|
+
starting_char << following_chars
|
103
|
+
end
|
104
|
+
|
105
|
+
def to_ncname_prep(name, asciionly)
|
106
|
+
name = name&.to_s
|
107
|
+
name.nil? and name = ""
|
108
|
+
asciionly and name = HTMLEntities.new.encode(name,
|
109
|
+
:basic, :hexadecimal)
|
110
|
+
[name, name.nil? || name.empty? || name.match?(SAFE_NCNAME_REGEXP)]
|
111
|
+
end
|
112
|
+
|
113
|
+
def anchor_or_uuid(node = nil)
|
114
|
+
uuid = UUIDTools::UUID.random_create
|
115
|
+
node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
|
116
|
+
end
|
117
|
+
|
118
|
+
# all element/attribute pairs that are ID anchors in Metanorma
|
119
|
+
def anchor_attributes(presxml: false)
|
120
|
+
ret = [%w(annotation from), %w(annotation to), %w(callout target),
|
121
|
+
%w(xref to), %w(eref bibitemid), %w(citation bibitemid),
|
122
|
+
%w(xref target), %w(label for), %w(location target),
|
123
|
+
%w(index to), %w(termsource bibitemid), %w(admonition target)]
|
124
|
+
ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
|
125
|
+
%w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
|
126
|
+
%w(fmt-xref-label container), %w(fmt-fn-body target),
|
127
|
+
%w(fmt-annotation-body from), %w(fmt-annotation-body to),
|
128
|
+
%w(fmt-annotation-start source), %w(fmt-annotation-start end),
|
129
|
+
%w(fmt-annotation-start target), %w(fmt-annotation-end source),
|
130
|
+
%w(fmt-annotation-end start), %w(fmt-annotation-end target)]
|
131
|
+
presxml ? ret + ret1 : ret
|
132
|
+
end
|
133
|
+
|
134
|
+
def guid_anchor?(id)
|
135
|
+
/^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
|
136
|
+
.match?(id)
|
137
|
+
end
|
138
|
+
|
139
|
+
def contenthash(elem)
|
140
|
+
Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
|
141
|
+
.sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/lib/utils/version.rb
CHANGED
data/lib/utils/xml.rb
CHANGED
@@ -4,6 +4,21 @@ require "uuidtools"
|
|
4
4
|
require "htmlentities"
|
5
5
|
require "nokogiri"
|
6
6
|
|
7
|
+
module Nokogiri
|
8
|
+
module XML
|
9
|
+
class Node
|
10
|
+
def add_first_child(content)
|
11
|
+
if children.empty?
|
12
|
+
add_child(content)
|
13
|
+
else
|
14
|
+
children.first.previous = content
|
15
|
+
end
|
16
|
+
self
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
7
22
|
module Metanorma
|
8
23
|
module Utils
|
9
24
|
NOKOHEAD = <<~HERE.freeze
|
@@ -21,117 +36,6 @@ module Metanorma
|
|
21
36
|
end
|
22
37
|
end
|
23
38
|
|
24
|
-
# Following XML requirements for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
|
25
|
-
BASECHAR = "A-Za-z\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{FF}\u{100}-\u{131}\u{134}-\u{13E}" \
|
26
|
-
"\u{141}-\u{148}\u{14A}-\u{17E}\u{180}-\u{1C3}\u{1CD}-\u{1F0}\u{1F4}-\u{1F5}" \
|
27
|
-
"\u{1FA}-\u{217}\u{250}-\u{2A8}\u{2BB}-\u{2C1}\u{386}\u{388}-\u{38A}\u{38C}" \
|
28
|
-
"\u{38E}-\u{3A1}\u{3A3}-\u{3CE}\u{3D0}-\u{3D6}\u{3DA}\u{3DC}\u{3DE}\u{3E0}" \
|
29
|
-
"\u{3E2}-\u{3F3}\u{401}-\u{40C}\u{40E}-\u{44F}\u{451}-\u{45C}\u{45E}-\u{481}" \
|
30
|
-
"\u{490}-\u{4C4}\u{4C7}-\u{4C8}\u{4CB}-\u{4CC}\u{4D0}-\u{4EB}\u{4EE}-\u{4F5}" \
|
31
|
-
"\u{4F8}-\u{4F9}\u{531}-\u{556}\u{559}\u{561}-\u{586}\u{5D0}-\u{5EA}" \
|
32
|
-
"\u{5F0}-\u{5F2}\u{621}-\u{63A}\u{641}-\u{64A}\u{671}-\u{6B7}\u{6BA}-\u{6BE}" \
|
33
|
-
"\u{6C0}-\u{6CE}\u{6D0}-\u{6D3}\u{6D5}\u{6E5}-\u{6E6}\u{905}-\u{939}\u{93D}" \
|
34
|
-
"\u{958}-\u{961}\u{985}-\u{98C}\u{98F}-\u{990}\u{993}-\u{9A8}\u{9AA}-\u{9B0}" \
|
35
|
-
"\u{9B2}\u{9B6}-\u{9B9}\u{9DC}-\u{9DD}\u{9DF}-\u{9E1}\u{9F0}-\u{9F1}" \
|
36
|
-
"\u{A05}-\u{A0A}\u{A0F}-\u{A10}\u{A13}-\u{A28}\u{A2A}-\u{A30}\u{A32}-\u{A33}" \
|
37
|
-
"\u{A35}-\u{A36}\u{A38}-\u{A39}\u{A59}-\u{A5C}\u{A5E}\u{A72}-\u{A74}" \
|
38
|
-
"\u{A85}-\u{A8B}\u{A8D}\u{A8F}-\u{A91}\u{A93}-\u{AA8}\u{AAA}-\u{AB0}" \
|
39
|
-
"\u{AB2}-\u{AB3}\u{AB5}-\u{AB9}\u{ABD}\u{AE0}\u{B05}-\u{B0C}\u{B0F}-\u{B10}" \
|
40
|
-
"\u{B13}-\u{B28}\u{B2A}-\u{B30}\u{B32}-\u{B33}\u{B36}-\u{B39}\u{B3D}" \
|
41
|
-
"\u{B5C}-\u{B5D}\u{B5F}-\u{B61}\u{B85}-\u{B8A}\u{B8E}-\u{B90}\u{B92}-\u{B95}" \
|
42
|
-
"\u{B99}-\u{B9A}\u{B9C}\u{B9E}-\u{B9F}\u{BA3}-\u{BA4}\u{BA8}-\u{BAA}" \
|
43
|
-
"\u{BAE}-\u{BB5}\u{BB7}-\u{BB9}\u{C05}-\u{C0C}\u{C0E}-\u{C10}\u{C12}-\u{C28}" \
|
44
|
-
"\u{C2A}-\u{C33}\u{C35}-\u{C39}\u{C60}-\u{C61}\u{C85}-\u{C8C}\u{C8E}-\u{C90}" \
|
45
|
-
"\u{C92}-\u{CA8}\u{CAA}-\u{CB3}\u{CB5}-\u{CB9}\u{CDE}\u{CE0}-\u{CE1}" \
|
46
|
-
"\u{D05}-\u{D0C}\u{D0E}-\u{D10}\u{D12}-\u{D28}\u{D2A}-\u{D39}\u{D60}-\u{D61}" \
|
47
|
-
"\u{E01}-\u{E2E}\u{E30}\u{E32}-\u{E33}\u{E40}-\u{E45}\u{E81}-\u{E82}\u{E84}" \
|
48
|
-
"\u{E87}-\u{E88}\u{E8A}\u{E8D}\u{E94}-\u{E97}\u{E99}-\u{E9F}\u{EA1}-\u{EA3}" \
|
49
|
-
"\u{EA5}\u{EA7}\u{EAA}-\u{EAB}\u{EAD}-\u{EAE}\u{EB0}\u{EB2}-\u{EB3}\u{EBD}" \
|
50
|
-
"\u{EC0}-\u{EC4}\u{F40}-\u{F47}\u{F49}-\u{F69}\u{10A0}-\u{10C5}\u{10D0}-\u{10F6}" \
|
51
|
-
"\u{1100}\u{1102}-\u{1103}\u{1105}-\u{1107}\u{1109}\u{110B}-\u{110C}" \
|
52
|
-
"\u{110E}-\u{1112}\u{113C}\u{113E}\u{1140}\u{114C}\u{114E}\u{1150}" \
|
53
|
-
"\u{1154}-\u{1155}\u{1159}\u{115F}-\u{1161}\u{1163}\u{1165}\u{1167}\u{1169}" \
|
54
|
-
"\u{116D}-\u{116E}\u{1172}-\u{1173}\u{1175}\u{119E}\u{11A8}\u{11AB}" \
|
55
|
-
"\u{11AE}-\u{11AF}\u{11B7}-\u{11B8}\u{11BA}\u{11BC}-\u{11C2}\u{11EB}\u{11F0}" \
|
56
|
-
"\u{11F9}\u{1E00}-\u{1E9B}\u{1EA0}-\u{1EF9}\u{1F00}-\u{1F15}\u{1F18}-\u{1F1D}" \
|
57
|
-
"\u{1F20}-\u{1F45}\u{1F48}-\u{1F4D}\u{1F50}-\u{1F57}\u{1F59}\u{1F5B}\u{1F5D}" \
|
58
|
-
"\u{1F5F}-\u{1F7D}\u{1F80}-\u{1FB4}\u{1FB6}-\u{1FBC}\u{1FBE}\u{1FC2}-\u{1FC4}" \
|
59
|
-
"\u{1FC6}-\u{1FCC}\u{1FD0}-\u{1FD3}\u{1FD6}-\u{1FDB}\u{1FE0}-\u{1FEC}" \
|
60
|
-
"\u{1FF2}-\u{1FF4}\u{1FF6}-\u{1FFC}\u{2126}\u{212A}-\u{212B}\u{212E}" \
|
61
|
-
"\u{2180}-\u{2182}\u{3041}-\u{3094}\u{30A1}-\u{30FA}\u{3105}-\u{312C}" \
|
62
|
-
"\u{AC00}-\u{D7A3}".freeze
|
63
|
-
IDEOGRAPHIC = "\u{4E00}-\u{9FA5}\u{3007}\u{3021}-\u{3029}".freeze
|
64
|
-
LETTER = "#{BASECHAR}#{IDEOGRAPHIC}".freeze
|
65
|
-
DIGIT = "0-9\u{0660}-\u{0669}\u{06F0}-\u{06F9}\u{0966}-\u{096F}\u{09E6}-\u{09EF}" \
|
66
|
-
"\u{0A66}-\u{0A6F}\u{0AE6}-\u{0AEF}\u{0B66}-\u{0B6F}\u{0BE7}-\u{0BEF}" \
|
67
|
-
"\u{0C66}-\u{0C6F}\u{0CE6}-\u{0CEF}\u{0D66}-\u{0D6F}\u{0E50}-\u{0E59}" \
|
68
|
-
"\u{0ED0}-\u{0ED9}\u{0F20}-\u{0F29}".freeze
|
69
|
-
COMBINING_CHAR = "\u{0300}-\u{0345}\u{0360}-\u{0361}\u{0483}-\u{0486}\u{0591}-\u{05A1}" \
|
70
|
-
"\u{05A3}-\u{05B9}\u{05BB}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}" \
|
71
|
-
"\u{064B}-\u{0652}\u{0670}\u{06D6}-\u{06DC}\u{06DD}-\u{06DF}" \
|
72
|
-
"\u{06E0}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{0901}-\u{0903}" \
|
73
|
-
"\u{093C}\u{093E}-\u{094C}\u{094D}\u{0951}-\u{0954}\u{0962}-\u{0963}" \
|
74
|
-
"\u{0981}-\u{0983}\u{09BC}\u{09BE}\u{09BF}\u{09C0}-\u{09C4}" \
|
75
|
-
"\u{09C7}-\u{09C8}\u{09CB}-\u{09CD}\u{09D7}\u{09E2}-\u{09E3}\u{0A02}" \
|
76
|
-
"\u{0A3C}\u{0A3E}\u{0A3F}\u{0A40}-\u{0A42}\u{0A47}-\u{0A48}" \
|
77
|
-
"\u{0A4B}-\u{0A4D}\u{0A70}-\u{0A71}\u{0A81}-\u{0A83}\u{0ABC}" \
|
78
|
-
"\u{0ABE}-\u{0AC5}\u{0AC7}-\u{0AC9}\u{0ACB}-\u{0ACD}\u{0B01}-\u{0B03}" \
|
79
|
-
"\u{0B3C}\u{0B3E}-\u{0B43}\u{0B47}-\u{0B48}\u{0B4B}-\u{0B4D}" \
|
80
|
-
"\u{0B56}-\u{0B57}\u{0B82}-\u{0B83}\u{0BBE}-\u{0BC2}\u{0BC6}-\u{0BC8}" \
|
81
|
-
"\u{0BCA}-\u{0BCD}\u{0BD7}\u{0C01}-\u{0C03}\u{0C3E}-\u{0C44}" \
|
82
|
-
"\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C82}-\u{0C83}" \
|
83
|
-
"\u{0CBE}-\u{0CC4}\u{0CC6}-\u{0CC8}\u{0CCA}-\u{0CCD}\u{0CD5}-\u{0CD6}" \
|
84
|
-
"\u{0D02}-\u{0D03}\u{0D3E}-\u{0D43}\u{0D46}-\u{0D48}\u{0D4A}-\u{0D4D}" \
|
85
|
-
"\u{0D57}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}" \
|
86
|
-
"\u{0EB4}-\u{0EB9}\u{0EBB}-\u{0EBC}\u{0EC8}-\u{0ECD}\u{0F18}-\u{0F19}" \
|
87
|
-
"\u{0F35}\u{0F37}\u{0F39}\u{0F3E}\u{0F3F}\u{0F71}-\u{0F84}" \
|
88
|
-
"\u{0F86}-\u{0F8B}\u{0F90}-\u{0F95}\u{0F97}\u{0F99}-\u{0FAD}" \
|
89
|
-
"\u{0FB1}-\u{0FB7}\u{0FB9}\u{20D0}-\u{20DC}\u{20E1}\u{302A}-\u{302F}" \
|
90
|
-
"\u{3099}\u{309A}".freeze
|
91
|
-
EXTENDER = "\u{00B7}\u{02D0}\u{02D1}\u{0387}\u{0640}\u{0E46}\u{0EC6}\u{3005}" \
|
92
|
-
"\u{3031}-\u{3035}\u{309D}-\u{309E}\u{30FC}-\u{30FE}".freeze
|
93
|
-
|
94
|
-
# NCName specific constants - NCName is "an XML Name, minus the :"
|
95
|
-
# NCName = (Letter | '_') (NCNameChar)*
|
96
|
-
NCNAME_START_CHAR = "#{LETTER}_".freeze
|
97
|
-
NCNAME_CHAR = "#{LETTER}#{DIGIT}._\\-#{COMBINING_CHAR}#{EXTENDER}".freeze
|
98
|
-
INVALID_NCNAME_START_REGEXP = /[^#{NCNAME_START_CHAR}]/.freeze
|
99
|
-
INVALID_NCNAME_CHAR_REGEXP = /[^#{NCNAME_CHAR}]/.freeze
|
100
|
-
SAFE_NCNAME_REGEXP = /\A[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*\z/.freeze
|
101
|
-
NCNAME_INVALID = "_".freeze
|
102
|
-
|
103
|
-
# A utility method for escaping XML NCNames (XML Names without colons).
|
104
|
-
#
|
105
|
-
# to_ncname('1 < 2 & 3')
|
106
|
-
# # => "1___2___3"
|
107
|
-
#
|
108
|
-
# It follows the requirements of the specification for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
|
109
|
-
# NCName is "an XML Name, minus the :"
|
110
|
-
def to_ncname(name, asciionly: false)
|
111
|
-
name, valid = to_ncname_prep(name, asciionly)
|
112
|
-
valid and return name
|
113
|
-
starting_char = name[0]
|
114
|
-
starting_char.gsub!(INVALID_NCNAME_START_REGEXP, NCNAME_INVALID)
|
115
|
-
name.size == 1 and return starting_char
|
116
|
-
following_chars = name[1..-1]
|
117
|
-
following_chars.gsub!(INVALID_NCNAME_CHAR_REGEXP, NCNAME_INVALID)
|
118
|
-
following_chars.gsub!(":", NCNAME_INVALID)
|
119
|
-
starting_char << following_chars
|
120
|
-
end
|
121
|
-
|
122
|
-
def to_ncname_prep(name, asciionly)
|
123
|
-
name = name&.to_s
|
124
|
-
name.nil? and name = ""
|
125
|
-
asciionly and name = HTMLEntities.new.encode(name,
|
126
|
-
:basic, :hexadecimal)
|
127
|
-
[name, name.nil? || name.empty? || name.match?(SAFE_NCNAME_REGEXP)]
|
128
|
-
end
|
129
|
-
|
130
|
-
def anchor_or_uuid(node = nil)
|
131
|
-
uuid = UUIDTools::UUID.random_create
|
132
|
-
node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
|
133
|
-
end
|
134
|
-
|
135
39
|
# block for processing XML document fragments as XHTML,
|
136
40
|
# to allow for HTMLentities
|
137
41
|
# Unescape special chars used in Asciidoctor substitution processing
|
@@ -211,21 +115,6 @@ module Metanorma
|
|
211
115
|
end
|
212
116
|
end
|
213
117
|
|
214
|
-
# all element/attribute pairs that are ID anchors in Metanorma
|
215
|
-
def anchor_attributes(presxml: false)
|
216
|
-
ret = [%w(review from), %w(review to), %w(callout target), %w(xref to),
|
217
|
-
%w(eref bibitemid), %w(citation bibitemid), %w(xref target),
|
218
|
-
%w(label for), %w(location target), %w(index to),
|
219
|
-
%w(termsource bibitemid), %w(admonition target)]
|
220
|
-
ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
|
221
|
-
%w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
|
222
|
-
%w(fmt-xref-label container), %w(fmt-fn-body target),
|
223
|
-
%w(fmt-annotation-start source), %w(fmt-annotation-start end),
|
224
|
-
%w(fmt-annotation-start target), %w(fmt-annotation-end source),
|
225
|
-
%w(fmt-annotation-end start), %w(fmt-annotation-end target)]
|
226
|
-
presxml ? ret + ret1 : ret
|
227
|
-
end
|
228
|
-
|
229
118
|
# convert definition list term/value pair into Nokogiri XML attribute
|
230
119
|
def dl_to_attrs(elem, dlist, name)
|
231
120
|
e = dlist.at("./dt[text()='#{name}']") or return
|
@@ -259,11 +148,6 @@ module Metanorma
|
|
259
148
|
end
|
260
149
|
x.root.children.to_xml
|
261
150
|
end
|
262
|
-
|
263
|
-
def guid_anchor?(id)
|
264
|
-
/^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
|
265
|
-
.match?(id)
|
266
|
-
end
|
267
151
|
end
|
268
152
|
end
|
269
153
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metanorma-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-07-
|
11
|
+
date: 2025-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|
@@ -307,6 +307,7 @@ files:
|
|
307
307
|
- README.adoc
|
308
308
|
- lib/metanorma-utils.rb
|
309
309
|
- lib/sterile/sterile.rb
|
310
|
+
- lib/utils/anchor.rb
|
310
311
|
- lib/utils/cjk.rb
|
311
312
|
- lib/utils/hash_transform_keys.rb
|
312
313
|
- lib/utils/image.rb
|