metanorma-utils 1.10.3 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1529d7af0a046a40b3b079adbc36786882be1abce4b515b2ec644c9ade8fb6eb
4
- data.tar.gz: 756653fafb5a833206d2f60e7cbcc64c8c0f7dc2d94149655efbd06395694be7
3
+ metadata.gz: fb7eb442187f8f1129b1e5e0a6ae9e05f504522cb55c2a3f75b784fc803717cf
4
+ data.tar.gz: 6819e04fc69ba93d601ce0053905db9ab10cea6d1e4af39e51a6d3b283603c02
5
5
  SHA512:
6
- metadata.gz: f62f4879f665181f06d1102785dac3e8481cd4d15c9e982c481af56bd079a4440205a7d18549dd1b53ed0c8bc127119742904b088e145ee4f0710f5745b129da
7
- data.tar.gz: 8234bac72f1dd5f432af91442c3802a2c03f3ba7f928ef89c2feba5c5a6838176fd2d271f2beb63a70a0c613b24d94a5b6816e594e40c7851070f1125b336716
6
+ metadata.gz: f21d1d2c7431b3de60715d50848d1a9ae6314c0dce3918a4db1d020e81928fff965148c8de4865186ab470e2242f775d35184ea212aa9c96f988a9ee32fc06bf
7
+ data.tar.gz: cc86b4e88d4cb126ac2d7824a996341db7194724ff95242bf17f4d2743a56552bb01486fb122e674901012cfe6a7f361acba60dd9504b98af7b8a8f7bbf3dbb8
data/lib/utils/cjk.rb ADDED
@@ -0,0 +1,110 @@
1
+ module Metanorma
2
+ module Utils
3
+ class << self
4
+ # Basic CJK scripts
5
+ HAN = "\\p{Han}".freeze
6
+ BOPOMOFO = "\\p{Bopomofo}".freeze
7
+ HANGUL = "\\p{Hangul}".freeze
8
+ HIRAGANA = "\\p{Hiragana}".freeze
9
+ KATAKANA = "\\p{Katakana}".freeze
10
+
11
+ # Script extensions - characters shared between scripts
12
+
13
+ # CJK Symbols and Punctuation (U+3000–U+303F)
14
+ # Used across all CJK scripts
15
+ CJK_SYMBOLS = "[\\u3000-\\u303F]".freeze
16
+
17
+ # CJK Punctuation (subset of CJK Symbols commonly used)
18
+ CJK_PUNCTUATION = "[\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F]".freeze
19
+
20
+ # Halfwidth and Fullwidth Forms (U+FF00–U+FFEF)
21
+ # Used in all CJK contexts
22
+ CJK_HALFWIDTH_FULLWIDTH = "[\\uFF00-\\uFFEF]".freeze
23
+
24
+ # CJK Compatibility Forms (U+FE30–U+FE4F)
25
+ # Primarily used with Han but relevant for all CJK
26
+ CJK_COMPAT = "[\\uFE30-\\uFE4F]".freeze
27
+
28
+ # Vertical Forms (U+FE10–U+FE1F)
29
+ # Used in vertical text layout for all CJK
30
+ CJK_VERTICAL = "[\\uFE10-\\uFE1F]".freeze
31
+
32
+ # Small Form Variants (U+FE50–U+FE6F)
33
+ # Used in all CJK contexts
34
+ CJK_SMALL_FORMS = "[\\uFE50-\\uFE6F]".freeze
35
+
36
+ # Ideographic Description Characters (U+2FF0–U+2FFF)
37
+ # Used with Han script
38
+ HAN_IDC = "[\\u2FF0-\\u2FFF]".freeze
39
+
40
+ # Kanbun (U+3190–U+319F)
41
+ # Used with Han script for Japanese
42
+ KANBUN = "[\\u3190-\\u319F]".freeze
43
+
44
+ # CJK Compatibility (U+3300–U+33FF)
45
+ # Used with Han script
46
+ CJK_COMPAT_IDEOGRAPHS = "[\\u3300-\\u33FF]".freeze
47
+
48
+ # CJK Compatibility Ideographs (U+F900–U+FAFF)
49
+ HAN_COMPAT_IDEOGRAPHS = "[\\uF900-\\uFAFF]".freeze
50
+
51
+ # Script extensions by primary script
52
+ HAN_EXTENSIONS = [
53
+ HAN,
54
+ CJK_SYMBOLS,
55
+ CJK_PUNCTUATION,
56
+ CJK_HALFWIDTH_FULLWIDTH,
57
+ CJK_COMPAT,
58
+ CJK_VERTICAL,
59
+ CJK_SMALL_FORMS,
60
+ HAN_IDC,
61
+ KANBUN,
62
+ CJK_COMPAT_IDEOGRAPHS,
63
+ HAN_COMPAT_IDEOGRAPHS
64
+ ].join("|").freeze
65
+
66
+ HANGUL_EXTENSIONS = [
67
+ HANGUL,
68
+ CJK_SYMBOLS,
69
+ CJK_PUNCTUATION,
70
+ CJK_HALFWIDTH_FULLWIDTH,
71
+ CJK_VERTICAL,
72
+ CJK_SMALL_FORMS
73
+ ].join("|").freeze
74
+
75
+ HIRAGANA_EXTENSIONS = [
76
+ HIRAGANA,
77
+ CJK_SYMBOLS,
78
+ CJK_PUNCTUATION,
79
+ CJK_HALFWIDTH_FULLWIDTH,
80
+ CJK_VERTICAL,
81
+ CJK_SMALL_FORMS
82
+ ].join("|").freeze
83
+
84
+ KATAKANA_EXTENSIONS = [
85
+ KATAKANA,
86
+ CJK_SYMBOLS,
87
+ CJK_PUNCTUATION,
88
+ CJK_HALFWIDTH_FULLWIDTH,
89
+ CJK_VERTICAL,
90
+ CJK_SMALL_FORMS
91
+ ].join("|").freeze
92
+
93
+ BOPOMOFO_EXTENSIONS = [
94
+ BOPOMOFO,
95
+ CJK_SYMBOLS,
96
+ CJK_PUNCTUATION,
97
+ CJK_HALFWIDTH_FULLWIDTH
98
+ ].join("|").freeze
99
+
100
+ # Combined CJK pattern including all script extensions
101
+ CJK = [
102
+ HAN_EXTENSIONS,
103
+ HANGUL_EXTENSIONS,
104
+ HIRAGANA_EXTENSIONS,
105
+ KATAKANA_EXTENSIONS,
106
+ BOPOMOFO_EXTENSIONS
107
+ ].join("|").freeze
108
+ end
109
+ end
110
+ end
data/lib/utils/log.rb CHANGED
@@ -86,7 +86,7 @@ module Metanorma
86
86
  while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
87
87
  node = node.parent
88
88
  end
89
- node.respond_to?(:parent) ? "ID #{node['id']}" : ""
89
+ node.respond_to?(:parent) ? "ID #{node['anchor'] || node['id']}" : ""
90
90
  elsif node.is_a? String then node
91
91
  elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
92
92
  !node.lineno.empty?
@@ -224,7 +224,7 @@ module Metanorma
224
224
  /^ID /.match?(loc) or return [loc, nil]
225
225
  loc.sub!(/^ID /, "")
226
226
  loc = @mapid[loc] while @mapid[loc]
227
- url = "#{@htmlfilename}##{loc}"
227
+ url = "#{@htmlfilename}##{to_ncname loc}"
228
228
  [loc, url]
229
229
  end
230
230
 
data/lib/utils/main.rb CHANGED
@@ -5,6 +5,7 @@ require "htmlentities"
5
5
  require "nokogiri"
6
6
  require "csv"
7
7
  require_relative "../sterile/sterile"
8
+ require_relative "cjk"
8
9
 
9
10
  module Metanorma
10
11
  module Utils
@@ -35,9 +36,6 @@ module Metanorma
35
36
  docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
36
37
  end
37
38
 
38
- CJK = "\\p{Han}|\\p{Bopomofo}|\\p{Hangul}|\\p{Hiragana}|\\p{Katakana}"
39
- .freeze
40
-
41
39
  # TODO needs internationalisation of quote
42
40
  def smartformat(text)
43
41
  ret = HTMLEntities.new.decode(
data/lib/utils/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module Metanorma
2
2
  module Utils
3
- VERSION = "1.10.3".freeze
3
+ VERSION = "1.11.2".freeze
4
4
  end
5
5
  end
data/lib/utils/xml.rb CHANGED
@@ -69,7 +69,7 @@ module Metanorma
69
69
  cjk2 = /#{CJK}/o.match?(nextfirst)
70
70
  text1 = /[^\p{Z}\p{C}]/.match?(last)
71
71
  text2 = /[^\p{Z}\p{C}]/.match?(nextfirst)
72
- (cjk1 && (cjk2 || !text2)) and next
72
+ cjk1 && (cjk2 || !text2) and next
73
73
  !text1 && cjk2 and next
74
74
  ret[i] += " "
75
75
  end
@@ -125,10 +125,18 @@ module Metanorma
125
125
  end
126
126
 
127
127
  # all element/attribute pairs that are ID anchors in Metanorma
128
- def anchor_attributes
129
- [%w[* id], %w[* bibitemid], %w[review from],
130
- %w[review to], %w[index to], %w[xref target],
131
- %w[callout target], %w[location target]]
128
+ def anchor_attributes(presxml: false)
129
+ ret = [%w(review from), %w(review to), %w(callout target), %w(xref to),
130
+ %w(eref bibitemid), %w(citation bibitemid), %w(xref target),
131
+ %w(label for), %w(location target), %w(index to),
132
+ %w(termsource bibitemid), %w(admonition target)]
133
+ ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
134
+ %w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
135
+ %w(fmt-xref-label container), %w(fmt-fn-body target),
136
+ %w(fmt-review-start source), %w(fmt-review-start end),
137
+ %w(fmt-review-start target), %w(fmt-review-end source),
138
+ %w(fmt-review-end start), %w(fmt-review-end target)]
139
+ presxml ? ret + ret1 : ret
132
140
  end
133
141
 
134
142
  # convert definition list term/value pair into Nokogiri XML attribute
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.3
4
+ version: 1.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-14 00:00:00.000000000 Z
11
+ date: 2025-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -321,6 +321,7 @@ files:
321
321
  - README.adoc
322
322
  - lib/metanorma-utils.rb
323
323
  - lib/sterile/sterile.rb
324
+ - lib/utils/cjk.rb
324
325
  - lib/utils/hash_transform_keys.rb
325
326
  - lib/utils/image.rb
326
327
  - lib/utils/linestatus.rb