metanorma-utils 1.11.4 → 1.11.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8c2df70df3cd5a6bef631ad7d34473f3f1784a9f768d1888dc4ac97f11e9498
4
- data.tar.gz: dc3a32cdeed25c29f9e4a417f13f955b34ce7b71e20fa8ccfd5266dc507c8e89
3
+ metadata.gz: 9385f63b399977eedf8892504cd07689c75b850922fefee1125469963aa59589
4
+ data.tar.gz: 857f73c269aefe0f694cddb2fdee7af5760e9d1f2ac8c0c2c769ffeca0280311
5
5
  SHA512:
6
- metadata.gz: bc965c8edc5dc9cbfe074e27991a0866c94c237720c73e953352a471d214a542a515bef6f24d309c87d681ea6508d1d2183b0a9da89baeac37e1a97a75598db1
7
- data.tar.gz: a6744e87b03d506a040d6b4dde5c4855012f15a85b5a454cc234a8ad3103f6aaf46829859f8156d297e5a32fc86c407698d657c27a5fda3792a9d647f7744995
6
+ metadata.gz: 5278b1a966446d36e12fdead9ba9187795bd4cb65f1ba20e180be9fc79f156a32449ce9e5c0a2fb83f567d07081b432f3f7ad9a0d196d9e3f533d50ffe8e0cad
7
+ data.tar.gz: 2e4cd411aad3cd79e0ce04f17ef1ebe7112ac24d4fa9db13b41a8f41d5686f6f2dee89064d56b92945a38a469d04860c5a1bac7d2c1cfbee0c93458342fbd011
@@ -5,3 +5,4 @@ require_relative "utils/image"
5
5
  require_relative "utils/linestatus"
6
6
  require_relative "utils/log"
7
7
  require_relative "utils/xml"
8
+ require_relative "utils/anchor"
@@ -0,0 +1,144 @@
1
+ require "uuidtools"
2
+ require "htmlentities"
3
+
4
+ module Metanorma
5
+ module Utils
6
+ class << self
7
+ # Following XML requirements for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
8
+ BASECHAR = "A-Za-z\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{FF}\u{100}-\u{131}\u{134}-\u{13E}" \
9
+ "\u{141}-\u{148}\u{14A}-\u{17E}\u{180}-\u{1C3}\u{1CD}-\u{1F0}\u{1F4}-\u{1F5}" \
10
+ "\u{1FA}-\u{217}\u{250}-\u{2A8}\u{2BB}-\u{2C1}\u{386}\u{388}-\u{38A}\u{38C}" \
11
+ "\u{38E}-\u{3A1}\u{3A3}-\u{3CE}\u{3D0}-\u{3D6}\u{3DA}\u{3DC}\u{3DE}\u{3E0}" \
12
+ "\u{3E2}-\u{3F3}\u{401}-\u{40C}\u{40E}-\u{44F}\u{451}-\u{45C}\u{45E}-\u{481}" \
13
+ "\u{490}-\u{4C4}\u{4C7}-\u{4C8}\u{4CB}-\u{4CC}\u{4D0}-\u{4EB}\u{4EE}-\u{4F5}" \
14
+ "\u{4F8}-\u{4F9}\u{531}-\u{556}\u{559}\u{561}-\u{586}\u{5D0}-\u{5EA}" \
15
+ "\u{5F0}-\u{5F2}\u{621}-\u{63A}\u{641}-\u{64A}\u{671}-\u{6B7}\u{6BA}-\u{6BE}" \
16
+ "\u{6C0}-\u{6CE}\u{6D0}-\u{6D3}\u{6D5}\u{6E5}-\u{6E6}\u{905}-\u{939}\u{93D}" \
17
+ "\u{958}-\u{961}\u{985}-\u{98C}\u{98F}-\u{990}\u{993}-\u{9A8}\u{9AA}-\u{9B0}" \
18
+ "\u{9B2}\u{9B6}-\u{9B9}\u{9DC}-\u{9DD}\u{9DF}-\u{9E1}\u{9F0}-\u{9F1}" \
19
+ "\u{A05}-\u{A0A}\u{A0F}-\u{A10}\u{A13}-\u{A28}\u{A2A}-\u{A30}\u{A32}-\u{A33}" \
20
+ "\u{A35}-\u{A36}\u{A38}-\u{A39}\u{A59}-\u{A5C}\u{A5E}\u{A72}-\u{A74}" \
21
+ "\u{A85}-\u{A8B}\u{A8D}\u{A8F}-\u{A91}\u{A93}-\u{AA8}\u{AAA}-\u{AB0}" \
22
+ "\u{AB2}-\u{AB3}\u{AB5}-\u{AB9}\u{ABD}\u{AE0}\u{B05}-\u{B0C}\u{B0F}-\u{B10}" \
23
+ "\u{B13}-\u{B28}\u{B2A}-\u{B30}\u{B32}-\u{B33}\u{B36}-\u{B39}\u{B3D}" \
24
+ "\u{B5C}-\u{B5D}\u{B5F}-\u{B61}\u{B85}-\u{B8A}\u{B8E}-\u{B90}\u{B92}-\u{B95}" \
25
+ "\u{B99}-\u{B9A}\u{B9C}\u{B9E}-\u{B9F}\u{BA3}-\u{BA4}\u{BA8}-\u{BAA}" \
26
+ "\u{BAE}-\u{BB5}\u{BB7}-\u{BB9}\u{C05}-\u{C0C}\u{C0E}-\u{C10}\u{C12}-\u{C28}" \
27
+ "\u{C2A}-\u{C33}\u{C35}-\u{C39}\u{C60}-\u{C61}\u{C85}-\u{C8C}\u{C8E}-\u{C90}" \
28
+ "\u{C92}-\u{CA8}\u{CAA}-\u{CB3}\u{CB5}-\u{CB9}\u{CDE}\u{CE0}-\u{CE1}" \
29
+ "\u{D05}-\u{D0C}\u{D0E}-\u{D10}\u{D12}-\u{D28}\u{D2A}-\u{D39}\u{D60}-\u{D61}" \
30
+ "\u{E01}-\u{E2E}\u{E30}\u{E32}-\u{E33}\u{E40}-\u{E45}\u{E81}-\u{E82}\u{E84}" \
31
+ "\u{E87}-\u{E88}\u{E8A}\u{E8D}\u{E94}-\u{E97}\u{E99}-\u{E9F}\u{EA1}-\u{EA3}" \
32
+ "\u{EA5}\u{EA7}\u{EAA}-\u{EAB}\u{EAD}-\u{EAE}\u{EB0}\u{EB2}-\u{EB3}\u{EBD}" \
33
+ "\u{EC0}-\u{EC4}\u{F40}-\u{F47}\u{F49}-\u{F69}\u{10A0}-\u{10C5}\u{10D0}-\u{10F6}" \
34
+ "\u{1100}\u{1102}-\u{1103}\u{1105}-\u{1107}\u{1109}\u{110B}-\u{110C}" \
35
+ "\u{110E}-\u{1112}\u{113C}\u{113E}\u{1140}\u{114C}\u{114E}\u{1150}" \
36
+ "\u{1154}-\u{1155}\u{1159}\u{115F}-\u{1161}\u{1163}\u{1165}\u{1167}\u{1169}" \
37
+ "\u{116D}-\u{116E}\u{1172}-\u{1173}\u{1175}\u{119E}\u{11A8}\u{11AB}" \
38
+ "\u{11AE}-\u{11AF}\u{11B7}-\u{11B8}\u{11BA}\u{11BC}-\u{11C2}\u{11EB}\u{11F0}" \
39
+ "\u{11F9}\u{1E00}-\u{1E9B}\u{1EA0}-\u{1EF9}\u{1F00}-\u{1F15}\u{1F18}-\u{1F1D}" \
40
+ "\u{1F20}-\u{1F45}\u{1F48}-\u{1F4D}\u{1F50}-\u{1F57}\u{1F59}\u{1F5B}\u{1F5D}" \
41
+ "\u{1F5F}-\u{1F7D}\u{1F80}-\u{1FB4}\u{1FB6}-\u{1FBC}\u{1FBE}\u{1FC2}-\u{1FC4}" \
42
+ "\u{1FC6}-\u{1FCC}\u{1FD0}-\u{1FD3}\u{1FD6}-\u{1FDB}\u{1FE0}-\u{1FEC}" \
43
+ "\u{1FF2}-\u{1FF4}\u{1FF6}-\u{1FFC}\u{2126}\u{212A}-\u{212B}\u{212E}" \
44
+ "\u{2180}-\u{2182}\u{3041}-\u{3094}\u{30A1}-\u{30FA}\u{3105}-\u{312C}" \
45
+ "\u{AC00}-\u{D7A3}".freeze
46
+ IDEOGRAPHIC = "\u{4E00}-\u{9FA5}\u{3007}\u{3021}-\u{3029}".freeze
47
+ LETTER = "#{BASECHAR}#{IDEOGRAPHIC}".freeze
48
+ DIGIT = "0-9\u{0660}-\u{0669}\u{06F0}-\u{06F9}\u{0966}-\u{096F}\u{09E6}-\u{09EF}" \
49
+ "\u{0A66}-\u{0A6F}\u{0AE6}-\u{0AEF}\u{0B66}-\u{0B6F}\u{0BE7}-\u{0BEF}" \
50
+ "\u{0C66}-\u{0C6F}\u{0CE6}-\u{0CEF}\u{0D66}-\u{0D6F}\u{0E50}-\u{0E59}" \
51
+ "\u{0ED0}-\u{0ED9}\u{0F20}-\u{0F29}".freeze
52
+ COMBINING_CHAR = "\u{0300}-\u{0345}\u{0360}-\u{0361}\u{0483}-\u{0486}\u{0591}-\u{05A1}" \
53
+ "\u{05A3}-\u{05B9}\u{05BB}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}" \
54
+ "\u{064B}-\u{0652}\u{0670}\u{06D6}-\u{06DC}\u{06DD}-\u{06DF}" \
55
+ "\u{06E0}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{0901}-\u{0903}" \
56
+ "\u{093C}\u{093E}-\u{094C}\u{094D}\u{0951}-\u{0954}\u{0962}-\u{0963}" \
57
+ "\u{0981}-\u{0983}\u{09BC}\u{09BE}\u{09BF}\u{09C0}-\u{09C4}" \
58
+ "\u{09C7}-\u{09C8}\u{09CB}-\u{09CD}\u{09D7}\u{09E2}-\u{09E3}\u{0A02}" \
59
+ "\u{0A3C}\u{0A3E}\u{0A3F}\u{0A40}-\u{0A42}\u{0A47}-\u{0A48}" \
60
+ "\u{0A4B}-\u{0A4D}\u{0A70}-\u{0A71}\u{0A81}-\u{0A83}\u{0ABC}" \
61
+ "\u{0ABE}-\u{0AC5}\u{0AC7}-\u{0AC9}\u{0ACB}-\u{0ACD}\u{0B01}-\u{0B03}" \
62
+ "\u{0B3C}\u{0B3E}-\u{0B43}\u{0B47}-\u{0B48}\u{0B4B}-\u{0B4D}" \
63
+ "\u{0B56}-\u{0B57}\u{0B82}-\u{0B83}\u{0BBE}-\u{0BC2}\u{0BC6}-\u{0BC8}" \
64
+ "\u{0BCA}-\u{0BCD}\u{0BD7}\u{0C01}-\u{0C03}\u{0C3E}-\u{0C44}" \
65
+ "\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C82}-\u{0C83}" \
66
+ "\u{0CBE}-\u{0CC4}\u{0CC6}-\u{0CC8}\u{0CCA}-\u{0CCD}\u{0CD5}-\u{0CD6}" \
67
+ "\u{0D02}-\u{0D03}\u{0D3E}-\u{0D43}\u{0D46}-\u{0D48}\u{0D4A}-\u{0D4D}" \
68
+ "\u{0D57}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}" \
69
+ "\u{0EB4}-\u{0EB9}\u{0EBB}-\u{0EBC}\u{0EC8}-\u{0ECD}\u{0F18}-\u{0F19}" \
70
+ "\u{0F35}\u{0F37}\u{0F39}\u{0F3E}\u{0F3F}\u{0F71}-\u{0F84}" \
71
+ "\u{0F86}-\u{0F8B}\u{0F90}-\u{0F95}\u{0F97}\u{0F99}-\u{0FAD}" \
72
+ "\u{0FB1}-\u{0FB7}\u{0FB9}\u{20D0}-\u{20DC}\u{20E1}\u{302A}-\u{302F}" \
73
+ "\u{3099}\u{309A}".freeze
74
+ EXTENDER = "\u{00B7}\u{02D0}\u{02D1}\u{0387}\u{0640}\u{0E46}\u{0EC6}\u{3005}" \
75
+ "\u{3031}-\u{3035}\u{309D}-\u{309E}\u{30FC}-\u{30FE}".freeze
76
+
77
+ # NCName specific constants - NCName is "an XML Name, minus the :"
78
+ # NCName = (Letter | '_') (NCNameChar)*
79
+ NCNAME_START_CHAR = "#{LETTER}_".freeze
80
+ NCNAME_CHAR = "#{LETTER}#{DIGIT}._\\-#{COMBINING_CHAR}#{EXTENDER}".freeze
81
+ INVALID_NCNAME_START_REGEXP = /[^#{NCNAME_START_CHAR}]/.freeze
82
+ INVALID_NCNAME_CHAR_REGEXP = /[^#{NCNAME_CHAR}]/.freeze
83
+ SAFE_NCNAME_REGEXP = /\A[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*\z/.freeze
84
+ NCNAME_INVALID = "_".freeze
85
+
86
+ # A utility method for escaping XML NCNames (XML Names without colons).
87
+ #
88
+ # to_ncname('1 < 2 & 3')
89
+ # # => "1___2___3"
90
+ #
91
+ # It follows the requirements of the specification for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
92
+ # NCName is "an XML Name, minus the :"
93
+ def to_ncname(name, asciionly: false)
94
+ name, valid = to_ncname_prep(name, asciionly)
95
+ valid and return name
96
+ starting_char = name[0]
97
+ starting_char.gsub!(INVALID_NCNAME_START_REGEXP, NCNAME_INVALID)
98
+ name.size == 1 and return starting_char
99
+ following_chars = name[1..-1]
100
+ following_chars.gsub!(INVALID_NCNAME_CHAR_REGEXP, NCNAME_INVALID)
101
+ following_chars.gsub!(":", NCNAME_INVALID)
102
+ starting_char << following_chars
103
+ end
104
+
105
+ def to_ncname_prep(name, asciionly)
106
+ name = name&.to_s
107
+ name.nil? and name = ""
108
+ asciionly and name = HTMLEntities.new.encode(name,
109
+ :basic, :hexadecimal)
110
+ [name, name.nil? || name.empty? || name.match?(SAFE_NCNAME_REGEXP)]
111
+ end
112
+
113
+ def anchor_or_uuid(node = nil)
114
+ uuid = UUIDTools::UUID.random_create
115
+ node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
116
+ end
117
+
118
+ # all element/attribute pairs that are ID anchors in Metanorma
119
+ def anchor_attributes(presxml: false)
120
+ ret = [%w(review from), %w(review to), %w(callout target), %w(xref to),
121
+ %w(eref bibitemid), %w(citation bibitemid), %w(xref target),
122
+ %w(label for), %w(location target), %w(index to),
123
+ %w(termsource bibitemid), %w(admonition target)]
124
+ ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
125
+ %w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
126
+ %w(fmt-xref-label container), %w(fmt-fn-body target),
127
+ %w(fmt-annotation-start source), %w(fmt-annotation-start end),
128
+ %w(fmt-annotation-start target), %w(fmt-annotation-end source),
129
+ %w(fmt-annotation-end start), %w(fmt-annotation-end target)]
130
+ presxml ? ret + ret1 : ret
131
+ end
132
+
133
+ def guid_anchor?(id)
134
+ /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
135
+ .match?(id)
136
+ end
137
+
138
+ def contenthash(elem)
139
+ Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
140
+ .sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
141
+ end
142
+ end
143
+ end
144
+ end
data/lib/utils/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module Metanorma
2
2
  module Utils
3
- VERSION = "1.11.4".freeze
3
+ VERSION = "1.11.6".freeze
4
4
  end
5
5
  end
data/lib/utils/xml.rb CHANGED
@@ -4,6 +4,21 @@ require "uuidtools"
4
4
  require "htmlentities"
5
5
  require "nokogiri"
6
6
 
7
+ module Nokogiri
8
+ module XML
9
+ class Node
10
+ def add_first_child(content)
11
+ if children.empty?
12
+ add_child(content)
13
+ else
14
+ children.first.previous = content
15
+ end
16
+ self
17
+ end
18
+ end
19
+ end
20
+ end
21
+
7
22
  module Metanorma
8
23
  module Utils
9
24
  NOKOHEAD = <<~HERE.freeze
@@ -21,117 +36,6 @@ module Metanorma
21
36
  end
22
37
  end
23
38
 
24
- # Following XML requirements for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
25
- BASECHAR = "A-Za-z\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{FF}\u{100}-\u{131}\u{134}-\u{13E}" \
26
- "\u{141}-\u{148}\u{14A}-\u{17E}\u{180}-\u{1C3}\u{1CD}-\u{1F0}\u{1F4}-\u{1F5}" \
27
- "\u{1FA}-\u{217}\u{250}-\u{2A8}\u{2BB}-\u{2C1}\u{386}\u{388}-\u{38A}\u{38C}" \
28
- "\u{38E}-\u{3A1}\u{3A3}-\u{3CE}\u{3D0}-\u{3D6}\u{3DA}\u{3DC}\u{3DE}\u{3E0}" \
29
- "\u{3E2}-\u{3F3}\u{401}-\u{40C}\u{40E}-\u{44F}\u{451}-\u{45C}\u{45E}-\u{481}" \
30
- "\u{490}-\u{4C4}\u{4C7}-\u{4C8}\u{4CB}-\u{4CC}\u{4D0}-\u{4EB}\u{4EE}-\u{4F5}" \
31
- "\u{4F8}-\u{4F9}\u{531}-\u{556}\u{559}\u{561}-\u{586}\u{5D0}-\u{5EA}" \
32
- "\u{5F0}-\u{5F2}\u{621}-\u{63A}\u{641}-\u{64A}\u{671}-\u{6B7}\u{6BA}-\u{6BE}" \
33
- "\u{6C0}-\u{6CE}\u{6D0}-\u{6D3}\u{6D5}\u{6E5}-\u{6E6}\u{905}-\u{939}\u{93D}" \
34
- "\u{958}-\u{961}\u{985}-\u{98C}\u{98F}-\u{990}\u{993}-\u{9A8}\u{9AA}-\u{9B0}" \
35
- "\u{9B2}\u{9B6}-\u{9B9}\u{9DC}-\u{9DD}\u{9DF}-\u{9E1}\u{9F0}-\u{9F1}" \
36
- "\u{A05}-\u{A0A}\u{A0F}-\u{A10}\u{A13}-\u{A28}\u{A2A}-\u{A30}\u{A32}-\u{A33}" \
37
- "\u{A35}-\u{A36}\u{A38}-\u{A39}\u{A59}-\u{A5C}\u{A5E}\u{A72}-\u{A74}" \
38
- "\u{A85}-\u{A8B}\u{A8D}\u{A8F}-\u{A91}\u{A93}-\u{AA8}\u{AAA}-\u{AB0}" \
39
- "\u{AB2}-\u{AB3}\u{AB5}-\u{AB9}\u{ABD}\u{AE0}\u{B05}-\u{B0C}\u{B0F}-\u{B10}" \
40
- "\u{B13}-\u{B28}\u{B2A}-\u{B30}\u{B32}-\u{B33}\u{B36}-\u{B39}\u{B3D}" \
41
- "\u{B5C}-\u{B5D}\u{B5F}-\u{B61}\u{B85}-\u{B8A}\u{B8E}-\u{B90}\u{B92}-\u{B95}" \
42
- "\u{B99}-\u{B9A}\u{B9C}\u{B9E}-\u{B9F}\u{BA3}-\u{BA4}\u{BA8}-\u{BAA}" \
43
- "\u{BAE}-\u{BB5}\u{BB7}-\u{BB9}\u{C05}-\u{C0C}\u{C0E}-\u{C10}\u{C12}-\u{C28}" \
44
- "\u{C2A}-\u{C33}\u{C35}-\u{C39}\u{C60}-\u{C61}\u{C85}-\u{C8C}\u{C8E}-\u{C90}" \
45
- "\u{C92}-\u{CA8}\u{CAA}-\u{CB3}\u{CB5}-\u{CB9}\u{CDE}\u{CE0}-\u{CE1}" \
46
- "\u{D05}-\u{D0C}\u{D0E}-\u{D10}\u{D12}-\u{D28}\u{D2A}-\u{D39}\u{D60}-\u{D61}" \
47
- "\u{E01}-\u{E2E}\u{E30}\u{E32}-\u{E33}\u{E40}-\u{E45}\u{E81}-\u{E82}\u{E84}" \
48
- "\u{E87}-\u{E88}\u{E8A}\u{E8D}\u{E94}-\u{E97}\u{E99}-\u{E9F}\u{EA1}-\u{EA3}" \
49
- "\u{EA5}\u{EA7}\u{EAA}-\u{EAB}\u{EAD}-\u{EAE}\u{EB0}\u{EB2}-\u{EB3}\u{EBD}" \
50
- "\u{EC0}-\u{EC4}\u{F40}-\u{F47}\u{F49}-\u{F69}\u{10A0}-\u{10C5}\u{10D0}-\u{10F6}" \
51
- "\u{1100}\u{1102}-\u{1103}\u{1105}-\u{1107}\u{1109}\u{110B}-\u{110C}" \
52
- "\u{110E}-\u{1112}\u{113C}\u{113E}\u{1140}\u{114C}\u{114E}\u{1150}" \
53
- "\u{1154}-\u{1155}\u{1159}\u{115F}-\u{1161}\u{1163}\u{1165}\u{1167}\u{1169}" \
54
- "\u{116D}-\u{116E}\u{1172}-\u{1173}\u{1175}\u{119E}\u{11A8}\u{11AB}" \
55
- "\u{11AE}-\u{11AF}\u{11B7}-\u{11B8}\u{11BA}\u{11BC}-\u{11C2}\u{11EB}\u{11F0}" \
56
- "\u{11F9}\u{1E00}-\u{1E9B}\u{1EA0}-\u{1EF9}\u{1F00}-\u{1F15}\u{1F18}-\u{1F1D}" \
57
- "\u{1F20}-\u{1F45}\u{1F48}-\u{1F4D}\u{1F50}-\u{1F57}\u{1F59}\u{1F5B}\u{1F5D}" \
58
- "\u{1F5F}-\u{1F7D}\u{1F80}-\u{1FB4}\u{1FB6}-\u{1FBC}\u{1FBE}\u{1FC2}-\u{1FC4}" \
59
- "\u{1FC6}-\u{1FCC}\u{1FD0}-\u{1FD3}\u{1FD6}-\u{1FDB}\u{1FE0}-\u{1FEC}" \
60
- "\u{1FF2}-\u{1FF4}\u{1FF6}-\u{1FFC}\u{2126}\u{212A}-\u{212B}\u{212E}" \
61
- "\u{2180}-\u{2182}\u{3041}-\u{3094}\u{30A1}-\u{30FA}\u{3105}-\u{312C}" \
62
- "\u{AC00}-\u{D7A3}".freeze
63
- IDEOGRAPHIC = "\u{4E00}-\u{9FA5}\u{3007}\u{3021}-\u{3029}".freeze
64
- LETTER = "#{BASECHAR}#{IDEOGRAPHIC}".freeze
65
- DIGIT = "0-9\u{0660}-\u{0669}\u{06F0}-\u{06F9}\u{0966}-\u{096F}\u{09E6}-\u{09EF}" \
66
- "\u{0A66}-\u{0A6F}\u{0AE6}-\u{0AEF}\u{0B66}-\u{0B6F}\u{0BE7}-\u{0BEF}" \
67
- "\u{0C66}-\u{0C6F}\u{0CE6}-\u{0CEF}\u{0D66}-\u{0D6F}\u{0E50}-\u{0E59}" \
68
- "\u{0ED0}-\u{0ED9}\u{0F20}-\u{0F29}".freeze
69
- COMBINING_CHAR = "\u{0300}-\u{0345}\u{0360}-\u{0361}\u{0483}-\u{0486}\u{0591}-\u{05A1}" \
70
- "\u{05A3}-\u{05B9}\u{05BB}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}" \
71
- "\u{064B}-\u{0652}\u{0670}\u{06D6}-\u{06DC}\u{06DD}-\u{06DF}" \
72
- "\u{06E0}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{0901}-\u{0903}" \
73
- "\u{093C}\u{093E}-\u{094C}\u{094D}\u{0951}-\u{0954}\u{0962}-\u{0963}" \
74
- "\u{0981}-\u{0983}\u{09BC}\u{09BE}\u{09BF}\u{09C0}-\u{09C4}" \
75
- "\u{09C7}-\u{09C8}\u{09CB}-\u{09CD}\u{09D7}\u{09E2}-\u{09E3}\u{0A02}" \
76
- "\u{0A3C}\u{0A3E}\u{0A3F}\u{0A40}-\u{0A42}\u{0A47}-\u{0A48}" \
77
- "\u{0A4B}-\u{0A4D}\u{0A70}-\u{0A71}\u{0A81}-\u{0A83}\u{0ABC}" \
78
- "\u{0ABE}-\u{0AC5}\u{0AC7}-\u{0AC9}\u{0ACB}-\u{0ACD}\u{0B01}-\u{0B03}" \
79
- "\u{0B3C}\u{0B3E}-\u{0B43}\u{0B47}-\u{0B48}\u{0B4B}-\u{0B4D}" \
80
- "\u{0B56}-\u{0B57}\u{0B82}-\u{0B83}\u{0BBE}-\u{0BC2}\u{0BC6}-\u{0BC8}" \
81
- "\u{0BCA}-\u{0BCD}\u{0BD7}\u{0C01}-\u{0C03}\u{0C3E}-\u{0C44}" \
82
- "\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C82}-\u{0C83}" \
83
- "\u{0CBE}-\u{0CC4}\u{0CC6}-\u{0CC8}\u{0CCA}-\u{0CCD}\u{0CD5}-\u{0CD6}" \
84
- "\u{0D02}-\u{0D03}\u{0D3E}-\u{0D43}\u{0D46}-\u{0D48}\u{0D4A}-\u{0D4D}" \
85
- "\u{0D57}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}" \
86
- "\u{0EB4}-\u{0EB9}\u{0EBB}-\u{0EBC}\u{0EC8}-\u{0ECD}\u{0F18}-\u{0F19}" \
87
- "\u{0F35}\u{0F37}\u{0F39}\u{0F3E}\u{0F3F}\u{0F71}-\u{0F84}" \
88
- "\u{0F86}-\u{0F8B}\u{0F90}-\u{0F95}\u{0F97}\u{0F99}-\u{0FAD}" \
89
- "\u{0FB1}-\u{0FB7}\u{0FB9}\u{20D0}-\u{20DC}\u{20E1}\u{302A}-\u{302F}" \
90
- "\u{3099}\u{309A}".freeze
91
- EXTENDER = "\u{00B7}\u{02D0}\u{02D1}\u{0387}\u{0640}\u{0E46}\u{0EC6}\u{3005}" \
92
- "\u{3031}-\u{3035}\u{309D}-\u{309E}\u{30FC}-\u{30FE}".freeze
93
-
94
- # NCName specific constants - NCName is "an XML Name, minus the :"
95
- # NCName = (Letter | '_') (NCNameChar)*
96
- NCNAME_START_CHAR = "#{LETTER}_".freeze
97
- NCNAME_CHAR = "#{LETTER}#{DIGIT}._\\-#{COMBINING_CHAR}#{EXTENDER}".freeze
98
- INVALID_NCNAME_START_REGEXP = /[^#{NCNAME_START_CHAR}]/.freeze
99
- INVALID_NCNAME_CHAR_REGEXP = /[^#{NCNAME_CHAR}]/.freeze
100
- SAFE_NCNAME_REGEXP = /\A[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*\z/.freeze
101
- NCNAME_INVALID = "_".freeze
102
-
103
- # A utility method for escaping XML NCNames (XML Names without colons).
104
- #
105
- # to_ncname('1 < 2 & 3')
106
- # # => "1___2___3"
107
- #
108
- # It follows the requirements of the specification for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
109
- # NCName is "an XML Name, minus the :"
110
- def to_ncname(name, asciionly: false)
111
- name, valid = to_ncname_prep(name, asciionly)
112
- valid and return name
113
- starting_char = name[0]
114
- starting_char.gsub!(INVALID_NCNAME_START_REGEXP, NCNAME_INVALID)
115
- name.size == 1 and return starting_char
116
- following_chars = name[1..-1]
117
- following_chars.gsub!(INVALID_NCNAME_CHAR_REGEXP, NCNAME_INVALID)
118
- following_chars.gsub!(":", NCNAME_INVALID)
119
- starting_char << following_chars
120
- end
121
-
122
- def to_ncname_prep(name, asciionly)
123
- name = name&.to_s
124
- name.nil? and name = ""
125
- asciionly and name = HTMLEntities.new.encode(name,
126
- :basic, :hexadecimal)
127
- [name, name.nil? || name.empty? || name.match?(SAFE_NCNAME_REGEXP)]
128
- end
129
-
130
- def anchor_or_uuid(node = nil)
131
- uuid = UUIDTools::UUID.random_create
132
- node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
133
- end
134
-
135
39
  # block for processing XML document fragments as XHTML,
136
40
  # to allow for HTMLentities
137
41
  # Unescape special chars used in Asciidoctor substitution processing
@@ -211,21 +115,6 @@ module Metanorma
211
115
  end
212
116
  end
213
117
 
214
- # all element/attribute pairs that are ID anchors in Metanorma
215
- def anchor_attributes(presxml: false)
216
- ret = [%w(review from), %w(review to), %w(callout target), %w(xref to),
217
- %w(eref bibitemid), %w(citation bibitemid), %w(xref target),
218
- %w(label for), %w(location target), %w(index to),
219
- %w(termsource bibitemid), %w(admonition target)]
220
- ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
221
- %w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
222
- %w(fmt-xref-label container), %w(fmt-fn-body target),
223
- %w(fmt-review-start source), %w(fmt-review-start end),
224
- %w(fmt-review-start target), %w(fmt-review-end source),
225
- %w(fmt-review-end start), %w(fmt-review-end target)]
226
- presxml ? ret + ret1 : ret
227
- end
228
-
229
118
  # convert definition list term/value pair into Nokogiri XML attribute
230
119
  def dl_to_attrs(elem, dlist, name)
231
120
  e = dlist.at("./dt[text()='#{name}']") or return
@@ -259,11 +148,6 @@ module Metanorma
259
148
  end
260
149
  x.root.children.to_xml
261
150
  end
262
-
263
- def guid_anchor?(id)
264
- /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
265
- .match?(id)
266
- end
267
151
  end
268
152
  end
269
153
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.4
4
+ version: 1.11.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-06-23 00:00:00.000000000 Z
11
+ date: 2025-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -307,6 +307,7 @@ files:
307
307
  - README.adoc
308
308
  - lib/metanorma-utils.rb
309
309
  - lib/sterile/sterile.rb
310
+ - lib/utils/anchor.rb
310
311
  - lib/utils/cjk.rb
311
312
  - lib/utils/hash_transform_keys.rb
312
313
  - lib/utils/image.rb