metanorma-utils 1.10.3 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/utils/cjk.rb +110 -0
- data/lib/utils/log.rb +2 -2
- data/lib/utils/main.rb +1 -3
- data/lib/utils/version.rb +1 -1
- data/lib/utils/xml.rb +13 -5
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb7eb442187f8f1129b1e5e0a6ae9e05f504522cb55c2a3f75b784fc803717cf
|
4
|
+
data.tar.gz: 6819e04fc69ba93d601ce0053905db9ab10cea6d1e4af39e51a6d3b283603c02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f21d1d2c7431b3de60715d50848d1a9ae6314c0dce3918a4db1d020e81928fff965148c8de4865186ab470e2242f775d35184ea212aa9c96f988a9ee32fc06bf
|
7
|
+
data.tar.gz: cc86b4e88d4cb126ac2d7824a996341db7194724ff95242bf17f4d2743a56552bb01486fb122e674901012cfe6a7f361acba60dd9504b98af7b8a8f7bbf3dbb8
|
data/lib/utils/cjk.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Utils
|
3
|
+
class << self
|
4
|
+
# Basic CJK scripts
|
5
|
+
HAN = "\\p{Han}".freeze
|
6
|
+
BOPOMOFO = "\\p{Bopomofo}".freeze
|
7
|
+
HANGUL = "\\p{Hangul}".freeze
|
8
|
+
HIRAGANA = "\\p{Hiragana}".freeze
|
9
|
+
KATAKANA = "\\p{Katakana}".freeze
|
10
|
+
|
11
|
+
# Script extensions - characters shared between scripts
|
12
|
+
|
13
|
+
# CJK Symbols and Punctuation (U+3000–U+303F)
|
14
|
+
# Used across all CJK scripts
|
15
|
+
CJK_SYMBOLS = "[\\u3000-\\u303F]".freeze
|
16
|
+
|
17
|
+
# CJK Punctuation (subset of CJK Symbols commonly used)
|
18
|
+
CJK_PUNCTUATION = "[\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F]".freeze
|
19
|
+
|
20
|
+
# Halfwidth and Fullwidth Forms (U+FF00–U+FFEF)
|
21
|
+
# Used in all CJK contexts
|
22
|
+
CJK_HALFWIDTH_FULLWIDTH = "[\\uFF00-\\uFFEF]".freeze
|
23
|
+
|
24
|
+
# CJK Compatibility Forms (U+FE30–U+FE4F)
|
25
|
+
# Primarily used with Han but relevant for all CJK
|
26
|
+
CJK_COMPAT = "[\\uFE30-\\uFE4F]".freeze
|
27
|
+
|
28
|
+
# Vertical Forms (U+FE10–U+FE1F)
|
29
|
+
# Used in vertical text layout for all CJK
|
30
|
+
CJK_VERTICAL = "[\\uFE10-\\uFE1F]".freeze
|
31
|
+
|
32
|
+
# Small Form Variants (U+FE50–U+FE6F)
|
33
|
+
# Used in all CJK contexts
|
34
|
+
CJK_SMALL_FORMS = "[\\uFE50-\\uFE6F]".freeze
|
35
|
+
|
36
|
+
# Ideographic Description Characters (U+2FF0–U+2FFF)
|
37
|
+
# Used with Han script
|
38
|
+
HAN_IDC = "[\\u2FF0-\\u2FFF]".freeze
|
39
|
+
|
40
|
+
# Kanbun (U+3190–U+319F)
|
41
|
+
# Used with Han script for Japanese
|
42
|
+
KANBUN = "[\\u3190-\\u319F]".freeze
|
43
|
+
|
44
|
+
# CJK Compatibility (U+3300–U+33FF)
|
45
|
+
# Used with Han script
|
46
|
+
CJK_COMPAT_IDEOGRAPHS = "[\\u3300-\\u33FF]".freeze
|
47
|
+
|
48
|
+
# CJK Compatibility Ideographs (U+F900–U+FAFF)
|
49
|
+
HAN_COMPAT_IDEOGRAPHS = "[\\uF900-\\uFAFF]".freeze
|
50
|
+
|
51
|
+
# Script extensions by primary script
|
52
|
+
HAN_EXTENSIONS = [
|
53
|
+
HAN,
|
54
|
+
CJK_SYMBOLS,
|
55
|
+
CJK_PUNCTUATION,
|
56
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
57
|
+
CJK_COMPAT,
|
58
|
+
CJK_VERTICAL,
|
59
|
+
CJK_SMALL_FORMS,
|
60
|
+
HAN_IDC,
|
61
|
+
KANBUN,
|
62
|
+
CJK_COMPAT_IDEOGRAPHS,
|
63
|
+
HAN_COMPAT_IDEOGRAPHS
|
64
|
+
].join("|").freeze
|
65
|
+
|
66
|
+
HANGUL_EXTENSIONS = [
|
67
|
+
HANGUL,
|
68
|
+
CJK_SYMBOLS,
|
69
|
+
CJK_PUNCTUATION,
|
70
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
71
|
+
CJK_VERTICAL,
|
72
|
+
CJK_SMALL_FORMS
|
73
|
+
].join("|").freeze
|
74
|
+
|
75
|
+
HIRAGANA_EXTENSIONS = [
|
76
|
+
HIRAGANA,
|
77
|
+
CJK_SYMBOLS,
|
78
|
+
CJK_PUNCTUATION,
|
79
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
80
|
+
CJK_VERTICAL,
|
81
|
+
CJK_SMALL_FORMS
|
82
|
+
].join("|").freeze
|
83
|
+
|
84
|
+
KATAKANA_EXTENSIONS = [
|
85
|
+
KATAKANA,
|
86
|
+
CJK_SYMBOLS,
|
87
|
+
CJK_PUNCTUATION,
|
88
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
89
|
+
CJK_VERTICAL,
|
90
|
+
CJK_SMALL_FORMS
|
91
|
+
].join("|").freeze
|
92
|
+
|
93
|
+
BOPOMOFO_EXTENSIONS = [
|
94
|
+
BOPOMOFO,
|
95
|
+
CJK_SYMBOLS,
|
96
|
+
CJK_PUNCTUATION,
|
97
|
+
CJK_HALFWIDTH_FULLWIDTH
|
98
|
+
].join("|").freeze
|
99
|
+
|
100
|
+
# Combined CJK pattern including all script extensions
|
101
|
+
CJK = [
|
102
|
+
HAN_EXTENSIONS,
|
103
|
+
HANGUL_EXTENSIONS,
|
104
|
+
HIRAGANA_EXTENSIONS,
|
105
|
+
KATAKANA_EXTENSIONS,
|
106
|
+
BOPOMOFO_EXTENSIONS
|
107
|
+
].join("|").freeze
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/utils/log.rb
CHANGED
@@ -86,7 +86,7 @@ module Metanorma
|
|
86
86
|
while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
|
87
87
|
node = node.parent
|
88
88
|
end
|
89
|
-
node.respond_to?(:parent) ? "ID #{node['id']}" : ""
|
89
|
+
node.respond_to?(:parent) ? "ID #{node['anchor'] || node['id']}" : ""
|
90
90
|
elsif node.is_a? String then node
|
91
91
|
elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
|
92
92
|
!node.lineno.empty?
|
@@ -224,7 +224,7 @@ module Metanorma
|
|
224
224
|
/^ID /.match?(loc) or return [loc, nil]
|
225
225
|
loc.sub!(/^ID /, "")
|
226
226
|
loc = @mapid[loc] while @mapid[loc]
|
227
|
-
url = "#{@htmlfilename}##{loc}"
|
227
|
+
url = "#{@htmlfilename}##{to_ncname loc}"
|
228
228
|
[loc, url]
|
229
229
|
end
|
230
230
|
|
data/lib/utils/main.rb
CHANGED
@@ -5,6 +5,7 @@ require "htmlentities"
|
|
5
5
|
require "nokogiri"
|
6
6
|
require "csv"
|
7
7
|
require_relative "../sterile/sterile"
|
8
|
+
require_relative "cjk"
|
8
9
|
|
9
10
|
module Metanorma
|
10
11
|
module Utils
|
@@ -35,9 +36,6 @@ module Metanorma
|
|
35
36
|
docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
|
36
37
|
end
|
37
38
|
|
38
|
-
CJK = "\\p{Han}|\\p{Bopomofo}|\\p{Hangul}|\\p{Hiragana}|\\p{Katakana}"
|
39
|
-
.freeze
|
40
|
-
|
41
39
|
# TODO needs internationalisation of quote
|
42
40
|
def smartformat(text)
|
43
41
|
ret = HTMLEntities.new.decode(
|
data/lib/utils/version.rb
CHANGED
data/lib/utils/xml.rb
CHANGED
@@ -69,7 +69,7 @@ module Metanorma
|
|
69
69
|
cjk2 = /#{CJK}/o.match?(nextfirst)
|
70
70
|
text1 = /[^\p{Z}\p{C}]/.match?(last)
|
71
71
|
text2 = /[^\p{Z}\p{C}]/.match?(nextfirst)
|
72
|
-
|
72
|
+
cjk1 && (cjk2 || !text2) and next
|
73
73
|
!text1 && cjk2 and next
|
74
74
|
ret[i] += " "
|
75
75
|
end
|
@@ -125,10 +125,18 @@ module Metanorma
|
|
125
125
|
end
|
126
126
|
|
127
127
|
# all element/attribute pairs that are ID anchors in Metanorma
|
128
|
-
def anchor_attributes
|
129
|
-
[%w
|
130
|
-
|
131
|
-
|
128
|
+
def anchor_attributes(presxml: false)
|
129
|
+
ret = [%w(review from), %w(review to), %w(callout target), %w(xref to),
|
130
|
+
%w(eref bibitemid), %w(citation bibitemid), %w(xref target),
|
131
|
+
%w(label for), %w(location target), %w(index to),
|
132
|
+
%w(termsource bibitemid), %w(admonition target)]
|
133
|
+
ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
|
134
|
+
%w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
|
135
|
+
%w(fmt-xref-label container), %w(fmt-fn-body target),
|
136
|
+
%w(fmt-review-start source), %w(fmt-review-start end),
|
137
|
+
%w(fmt-review-start target), %w(fmt-review-end source),
|
138
|
+
%w(fmt-review-end start), %w(fmt-review-end target)]
|
139
|
+
presxml ? ret + ret1 : ret
|
132
140
|
end
|
133
141
|
|
134
142
|
# convert definition list term/value pair into Nokogiri XML attribute
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metanorma-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|
@@ -321,6 +321,7 @@ files:
|
|
321
321
|
- README.adoc
|
322
322
|
- lib/metanorma-utils.rb
|
323
323
|
- lib/sterile/sterile.rb
|
324
|
+
- lib/utils/cjk.rb
|
324
325
|
- lib/utils/hash_transform_keys.rb
|
325
326
|
- lib/utils/image.rb
|
326
327
|
- lib/utils/linestatus.rb
|