metanorma-utils 1.11.0 → 1.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of metanorma-utils might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/lib/utils/cjk.rb +110 -0
- data/lib/utils/main.rb +1 -3
- data/lib/utils/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34aeaad34071231372ad169fc5b3af498af9fbd9e37ae516f7965ec7d04d367a
|
4
|
+
data.tar.gz: 17b4f64083b4438cae7bf3aee84a969fc2040e4cd80b025eedd11efd5d1ecddc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf8bb532bc23c04acda2d67626caab768b254bc678ee2266abd94b6bdc410aebd05bfab73d978af6e25f2a808ddaa987200e0fcd73009170df72194f4e265bb0
|
7
|
+
data.tar.gz: e06eac78d98e20cc5eb271244f9fc877b7829d4dbd598142b385fa2b426ce06311a74b015469687a813e20fb67a66be29296046ab1fe7630bbcaa0b8d38a3c0d
|
data/lib/utils/cjk.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Utils
|
3
|
+
class << self
|
4
|
+
# Basic CJK scripts
|
5
|
+
HAN = "\\p{Han}".freeze
|
6
|
+
BOPOMOFO = "\\p{Bopomofo}".freeze
|
7
|
+
HANGUL = "\\p{Hangul}".freeze
|
8
|
+
HIRAGANA = "\\p{Hiragana}".freeze
|
9
|
+
KATAKANA = "\\p{Katakana}".freeze
|
10
|
+
|
11
|
+
# Script extensions - characters shared between scripts
|
12
|
+
|
13
|
+
# CJK Symbols and Punctuation (U+3000–U+303F)
|
14
|
+
# Used across all CJK scripts
|
15
|
+
CJK_SYMBOLS = "[\\u3000-\\u303F]".freeze
|
16
|
+
|
17
|
+
# CJK Punctuation (subset of CJK Symbols commonly used)
|
18
|
+
CJK_PUNCTUATION = "[\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F]".freeze
|
19
|
+
|
20
|
+
# Halfwidth and Fullwidth Forms (U+FF00–U+FFEF)
|
21
|
+
# Used in all CJK contexts
|
22
|
+
CJK_HALFWIDTH_FULLWIDTH = "[\\uFF00-\\uFFEF]".freeze
|
23
|
+
|
24
|
+
# CJK Compatibility Forms (U+FE30–U+FE4F)
|
25
|
+
# Primarily used with Han but relevant for all CJK
|
26
|
+
CJK_COMPAT = "[\\uFE30-\\uFE4F]".freeze
|
27
|
+
|
28
|
+
# Vertical Forms (U+FE10–U+FE1F)
|
29
|
+
# Used in vertical text layout for all CJK
|
30
|
+
CJK_VERTICAL = "[\\uFE10-\\uFE1F]".freeze
|
31
|
+
|
32
|
+
# Small Form Variants (U+FE50–U+FE6F)
|
33
|
+
# Used in all CJK contexts
|
34
|
+
CJK_SMALL_FORMS = "[\\uFE50-\\uFE6F]".freeze
|
35
|
+
|
36
|
+
# Ideographic Description Characters (U+2FF0–U+2FFF)
|
37
|
+
# Used with Han script
|
38
|
+
HAN_IDC = "[\\u2FF0-\\u2FFF]".freeze
|
39
|
+
|
40
|
+
# Kanbun (U+3190–U+319F)
|
41
|
+
# Used with Han script for Japanese
|
42
|
+
KANBUN = "[\\u3190-\\u319F]".freeze
|
43
|
+
|
44
|
+
# CJK Compatibility (U+3300–U+33FF)
|
45
|
+
# Used with Han script
|
46
|
+
CJK_COMPAT_IDEOGRAPHS = "[\\u3300-\\u33FF]".freeze
|
47
|
+
|
48
|
+
# CJK Compatibility Ideographs (U+F900–U+FAFF)
|
49
|
+
HAN_COMPAT_IDEOGRAPHS = "[\\uF900-\\uFAFF]".freeze
|
50
|
+
|
51
|
+
# Script extensions by primary script
|
52
|
+
HAN_EXTENSIONS = [
|
53
|
+
HAN,
|
54
|
+
CJK_SYMBOLS,
|
55
|
+
CJK_PUNCTUATION,
|
56
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
57
|
+
CJK_COMPAT,
|
58
|
+
CJK_VERTICAL,
|
59
|
+
CJK_SMALL_FORMS,
|
60
|
+
HAN_IDC,
|
61
|
+
KANBUN,
|
62
|
+
CJK_COMPAT_IDEOGRAPHS,
|
63
|
+
HAN_COMPAT_IDEOGRAPHS
|
64
|
+
].join("|").freeze
|
65
|
+
|
66
|
+
HANGUL_EXTENSIONS = [
|
67
|
+
HANGUL,
|
68
|
+
CJK_SYMBOLS,
|
69
|
+
CJK_PUNCTUATION,
|
70
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
71
|
+
CJK_VERTICAL,
|
72
|
+
CJK_SMALL_FORMS
|
73
|
+
].join("|").freeze
|
74
|
+
|
75
|
+
HIRAGANA_EXTENSIONS = [
|
76
|
+
HIRAGANA,
|
77
|
+
CJK_SYMBOLS,
|
78
|
+
CJK_PUNCTUATION,
|
79
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
80
|
+
CJK_VERTICAL,
|
81
|
+
CJK_SMALL_FORMS
|
82
|
+
].join("|").freeze
|
83
|
+
|
84
|
+
KATAKANA_EXTENSIONS = [
|
85
|
+
KATAKANA,
|
86
|
+
CJK_SYMBOLS,
|
87
|
+
CJK_PUNCTUATION,
|
88
|
+
CJK_HALFWIDTH_FULLWIDTH,
|
89
|
+
CJK_VERTICAL,
|
90
|
+
CJK_SMALL_FORMS
|
91
|
+
].join("|").freeze
|
92
|
+
|
93
|
+
BOPOMOFO_EXTENSIONS = [
|
94
|
+
BOPOMOFO,
|
95
|
+
CJK_SYMBOLS,
|
96
|
+
CJK_PUNCTUATION,
|
97
|
+
CJK_HALFWIDTH_FULLWIDTH
|
98
|
+
].join("|").freeze
|
99
|
+
|
100
|
+
# Combined CJK pattern including all script extensions
|
101
|
+
CJK = [
|
102
|
+
HAN_EXTENSIONS,
|
103
|
+
HANGUL_EXTENSIONS,
|
104
|
+
HIRAGANA_EXTENSIONS,
|
105
|
+
KATAKANA_EXTENSIONS,
|
106
|
+
BOPOMOFO_EXTENSIONS
|
107
|
+
].join("|").freeze
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/utils/main.rb
CHANGED
@@ -5,6 +5,7 @@ require "htmlentities"
|
|
5
5
|
require "nokogiri"
|
6
6
|
require "csv"
|
7
7
|
require_relative "../sterile/sterile"
|
8
|
+
require_relative "cjk"
|
8
9
|
|
9
10
|
module Metanorma
|
10
11
|
module Utils
|
@@ -35,9 +36,6 @@ module Metanorma
|
|
35
36
|
docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
|
36
37
|
end
|
37
38
|
|
38
|
-
CJK = "\\p{Han}|\\p{Bopomofo}|\\p{Hangul}|\\p{Hiragana}|\\p{Katakana}"
|
39
|
-
.freeze
|
40
|
-
|
41
39
|
# TODO needs internationalisation of quote
|
42
40
|
def smartformat(text)
|
43
41
|
ret = HTMLEntities.new.decode(
|
data/lib/utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metanorma-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-05-
|
11
|
+
date: 2025-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|
@@ -321,6 +321,7 @@ files:
|
|
321
321
|
- README.adoc
|
322
322
|
- lib/metanorma-utils.rb
|
323
323
|
- lib/sterile/sterile.rb
|
324
|
+
- lib/utils/cjk.rb
|
324
325
|
- lib/utils/hash_transform_keys.rb
|
325
326
|
- lib/utils/image.rb
|
326
327
|
- lib/utils/linestatus.rb
|