metanorma-utils 1.11.0 → 1.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d20b8cee0b06766e6832c5751087c03147e751b034a6b9c4c5f503cc72035f3
4
- data.tar.gz: 9fce9d155961bf671181f497f6922e2e04ab2b758fa96f3db496bcbab620eefb
3
+ metadata.gz: 34aeaad34071231372ad169fc5b3af498af9fbd9e37ae516f7965ec7d04d367a
4
+ data.tar.gz: 17b4f64083b4438cae7bf3aee84a969fc2040e4cd80b025eedd11efd5d1ecddc
5
5
  SHA512:
6
- metadata.gz: 2a383dfa442ef5b6915a226a0865b9ec6f65aaf60a404f8a0e70c7e8d2482f8ea4e08bd4172c628db359711882e9a7f3df0e1ef66b1e99683f326cc300aa294e
7
- data.tar.gz: 030a5f340137c076344da6d7172fa1092bb24f2d1c6985fe0683b64ce875c708687107ac3f2e3aa4cbb4876a0c3f34a9ea2874688f92cbcab8d2cf394fb378e7
6
+ metadata.gz: bf8bb532bc23c04acda2d67626caab768b254bc678ee2266abd94b6bdc410aebd05bfab73d978af6e25f2a808ddaa987200e0fcd73009170df72194f4e265bb0
7
+ data.tar.gz: e06eac78d98e20cc5eb271244f9fc877b7829d4dbd598142b385fa2b426ce06311a74b015469687a813e20fb67a66be29296046ab1fe7630bbcaa0b8d38a3c0d
data/lib/utils/cjk.rb ADDED
@@ -0,0 +1,110 @@
1
+ module Metanorma
2
+ module Utils
3
+ class << self
4
+ # Basic CJK scripts
5
+ HAN = "\\p{Han}".freeze
6
+ BOPOMOFO = "\\p{Bopomofo}".freeze
7
+ HANGUL = "\\p{Hangul}".freeze
8
+ HIRAGANA = "\\p{Hiragana}".freeze
9
+ KATAKANA = "\\p{Katakana}".freeze
10
+
11
+ # Script extensions - characters shared between scripts
12
+
13
+ # CJK Symbols and Punctuation (U+3000–U+303F)
14
+ # Used across all CJK scripts
15
+ CJK_SYMBOLS = "[\\u3000-\\u303F]".freeze
16
+
17
+ # CJK Punctuation (subset of CJK Symbols commonly used)
18
+ CJK_PUNCTUATION = "[\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F]".freeze
19
+
20
+ # Halfwidth and Fullwidth Forms (U+FF00–U+FFEF)
21
+ # Used in all CJK contexts
22
+ CJK_HALFWIDTH_FULLWIDTH = "[\\uFF00-\\uFFEF]".freeze
23
+
24
+ # CJK Compatibility Forms (U+FE30–U+FE4F)
25
+ # Primarily used with Han but relevant for all CJK
26
+ CJK_COMPAT = "[\\uFE30-\\uFE4F]".freeze
27
+
28
+ # Vertical Forms (U+FE10–U+FE1F)
29
+ # Used in vertical text layout for all CJK
30
+ CJK_VERTICAL = "[\\uFE10-\\uFE1F]".freeze
31
+
32
+ # Small Form Variants (U+FE50–U+FE6F)
33
+ # Used in all CJK contexts
34
+ CJK_SMALL_FORMS = "[\\uFE50-\\uFE6F]".freeze
35
+
36
+ # Ideographic Description Characters (U+2FF0–U+2FFF)
37
+ # Used with Han script
38
+ HAN_IDC = "[\\u2FF0-\\u2FFF]".freeze
39
+
40
+ # Kanbun (U+3190–U+319F)
41
+ # Used with Han script for Japanese
42
+ KANBUN = "[\\u3190-\\u319F]".freeze
43
+
44
+ # CJK Compatibility (U+3300–U+33FF)
45
+ # Used with Han script
46
+ CJK_COMPAT_IDEOGRAPHS = "[\\u3300-\\u33FF]".freeze
47
+
48
+ # CJK Compatibility Ideographs (U+F900–U+FAFF)
49
+ HAN_COMPAT_IDEOGRAPHS = "[\\uF900-\\uFAFF]".freeze
50
+
51
+ # Script extensions by primary script
52
+ HAN_EXTENSIONS = [
53
+ HAN,
54
+ CJK_SYMBOLS,
55
+ CJK_PUNCTUATION,
56
+ CJK_HALFWIDTH_FULLWIDTH,
57
+ CJK_COMPAT,
58
+ CJK_VERTICAL,
59
+ CJK_SMALL_FORMS,
60
+ HAN_IDC,
61
+ KANBUN,
62
+ CJK_COMPAT_IDEOGRAPHS,
63
+ HAN_COMPAT_IDEOGRAPHS
64
+ ].join("|").freeze
65
+
66
+ HANGUL_EXTENSIONS = [
67
+ HANGUL,
68
+ CJK_SYMBOLS,
69
+ CJK_PUNCTUATION,
70
+ CJK_HALFWIDTH_FULLWIDTH,
71
+ CJK_VERTICAL,
72
+ CJK_SMALL_FORMS
73
+ ].join("|").freeze
74
+
75
+ HIRAGANA_EXTENSIONS = [
76
+ HIRAGANA,
77
+ CJK_SYMBOLS,
78
+ CJK_PUNCTUATION,
79
+ CJK_HALFWIDTH_FULLWIDTH,
80
+ CJK_VERTICAL,
81
+ CJK_SMALL_FORMS
82
+ ].join("|").freeze
83
+
84
+ KATAKANA_EXTENSIONS = [
85
+ KATAKANA,
86
+ CJK_SYMBOLS,
87
+ CJK_PUNCTUATION,
88
+ CJK_HALFWIDTH_FULLWIDTH,
89
+ CJK_VERTICAL,
90
+ CJK_SMALL_FORMS
91
+ ].join("|").freeze
92
+
93
+ BOPOMOFO_EXTENSIONS = [
94
+ BOPOMOFO,
95
+ CJK_SYMBOLS,
96
+ CJK_PUNCTUATION,
97
+ CJK_HALFWIDTH_FULLWIDTH
98
+ ].join("|").freeze
99
+
100
+ # Combined CJK pattern including all script extensions
101
+ CJK = [
102
+ HAN_EXTENSIONS,
103
+ HANGUL_EXTENSIONS,
104
+ HIRAGANA_EXTENSIONS,
105
+ KATAKANA_EXTENSIONS,
106
+ BOPOMOFO_EXTENSIONS
107
+ ].join("|").freeze
108
+ end
109
+ end
110
+ end
data/lib/utils/main.rb CHANGED
@@ -5,6 +5,7 @@ require "htmlentities"
5
5
  require "nokogiri"
6
6
  require "csv"
7
7
  require_relative "../sterile/sterile"
8
+ require_relative "cjk"
8
9
 
9
10
  module Metanorma
10
11
  module Utils
@@ -35,9 +36,6 @@ module Metanorma
35
36
  docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
36
37
  end
37
38
 
38
- CJK = "\\p{Han}|\\p{Bopomofo}|\\p{Hangul}|\\p{Hiragana}|\\p{Katakana}"
39
- .freeze
40
-
41
39
  # TODO needs internationalisation of quote
42
40
  def smartformat(text)
43
41
  ret = HTMLEntities.new.decode(
data/lib/utils/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module Metanorma
2
2
  module Utils
3
- VERSION = "1.11.0".freeze
3
+ VERSION = "1.11.1".freeze
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.0
4
+ version: 1.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-05-08 00:00:00.000000000 Z
11
+ date: 2025-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -321,6 +321,7 @@ files:
321
321
  - README.adoc
322
322
  - lib/metanorma-utils.rb
323
323
  - lib/sterile/sterile.rb
324
+ - lib/utils/cjk.rb
324
325
  - lib/utils/hash_transform_keys.rb
325
326
  - lib/utils/image.rb
326
327
  - lib/utils/linestatus.rb