isodoc-i18n 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/isodoc-i18n.gemspec +1 -0
- data/lib/isodoc/i18n/version.rb +1 -1
- data/lib/isodoc/l10n.rb +24 -11
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 490a22f13264a470afa34644c450651239bb43409aa8e579bae72229d165fe38
|
4
|
+
data.tar.gz: 9be5c331d23f732e37e2588ff3269e867231f56b6b074417258f5e01fa55aeba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 024f04dedc8bdef757f52d1ae35b69af155d786bb5e1075b6be8c11e2a2039b0bf9ee4e39a0bbca260e3edee85aafb70360240286a38ce12cb41fef88db70e02
|
7
|
+
data.tar.gz: 6952b1cf007e02b5e7fcb9ca3507cda68e22e27facee436fafc41daaa65b6b5eed3b5e0b9fbbde57d8c10361ab55dcc870a25b92a0f624597b02bb3a912c7586
|
data/isodoc-i18n.gemspec
CHANGED
@@ -37,5 +37,6 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.add_development_dependency "simplecov", "~> 0.15"
|
38
38
|
spec.add_development_dependency "timecop", "~> 0.9"
|
39
39
|
spec.add_development_dependency "webmock"
|
40
|
+
spec.add_development_dependency "xml-c14n"
|
40
41
|
# spec.metadata["rubygems_mfa_required"] = "true"
|
41
42
|
end
|
data/lib/isodoc/i18n/version.rb
CHANGED
data/lib/isodoc/l10n.rb
CHANGED
@@ -5,9 +5,8 @@ module IsoDoc
|
|
5
5
|
end
|
6
6
|
|
7
7
|
# function localising spaces and punctuation.
|
8
|
-
# Not clear if period needs to be localised for zh
|
9
8
|
def l10n(text, lang = @lang, script = @script, locale = @locale)
|
10
|
-
|
9
|
+
%w(zh ja ko).include?(lang) and text = l10n_zh(text, script)
|
11
10
|
lang == "fr" && text = l10n_fr(text, locale || "FR")
|
12
11
|
bidiwrap(text, lang, script)
|
13
12
|
end
|
@@ -30,11 +29,11 @@ module IsoDoc
|
|
30
29
|
.default_script(@lang))]
|
31
30
|
end
|
32
31
|
|
32
|
+
# CJK
|
33
33
|
def l10n_zh(text, script = "Hans")
|
34
34
|
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
35
35
|
xml.traverse do |n|
|
36
|
-
|
37
|
-
|
36
|
+
n.text? or next
|
38
37
|
n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
|
39
38
|
end
|
40
39
|
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
|
@@ -56,19 +55,32 @@ module IsoDoc
|
|
56
55
|
|
57
56
|
# note: we can't differentiate comma from enumeration comma 、
|
58
57
|
def l10_zh1(text, _script)
|
59
|
-
l10n_zh_remove_space(l10n_zh_punct(text))
|
58
|
+
l10n_zh_dash(l10n_zh_remove_space(l10n_zh_punct(text)))
|
60
59
|
end
|
61
60
|
|
61
|
+
# CJK punct if (^|CJK).($|CJK)
|
62
62
|
def l10n_zh_punct(text)
|
63
|
-
["::", ",,", "..", "))", "]]", "
|
64
|
-
text = text.gsub(
|
65
|
-
|
66
|
-
|
67
|
-
|
63
|
+
["::", ",,", "..", "))", "]]", ";;", "??", "!!", "((", "[["].each do |m|
|
64
|
+
text = text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
|
65
|
+
(\s*) # Latin spaces optional
|
66
|
+
#{Regexp.quote(m[0])} # Latin punctuation we want to convert to CJK
|
67
|
+
(?= \s* # followed (lookahead) by ignorable Latin spaces
|
68
|
+
[:,.()\[\];?!-]* # Latin punctuation which we will also convert to CJK
|
69
|
+
(#{ZH_CHAR}|$) # CJK character, or end of string
|
70
|
+
) /x, "\\1#{m[1]}")
|
68
71
|
end
|
69
72
|
text
|
70
73
|
end
|
71
74
|
|
75
|
+
def l10n_zh_dash(text)
|
76
|
+
text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
|
77
|
+
(\d*) # optional digits
|
78
|
+
– # en-dash
|
79
|
+
(\d*) # optional digits
|
80
|
+
(#{ZH_CHAR}|$) # CJK character, or end of string
|
81
|
+
/xo, "\\1~\\2\\3")
|
82
|
+
end
|
83
|
+
|
72
84
|
def l10n_zh_remove_space(text)
|
73
85
|
text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
|
74
86
|
.gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
|
@@ -102,7 +114,8 @@ module IsoDoc
|
|
102
114
|
|
103
115
|
def interleave_space_cjk?(text)
|
104
116
|
text.size == 2 or return
|
105
|
-
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
|
117
|
+
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
|
118
|
+
"\u22ef\u22ef"].include?(text) ||
|
106
119
|
/\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
|
107
120
|
/^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
|
108
121
|
/[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isodoc-i18n
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -206,6 +206,20 @@ dependencies:
|
|
206
206
|
- - ">="
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: '0'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: xml-c14n
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - ">="
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
type: :development
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - ">="
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0'
|
209
223
|
description: 'Internationalisation for Metanorma rendering
|
210
224
|
|
211
225
|
'
|
@@ -249,7 +263,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
249
263
|
- !ruby/object:Gem::Version
|
250
264
|
version: '0'
|
251
265
|
requirements: []
|
252
|
-
rubygems_version: 3.3.
|
266
|
+
rubygems_version: 3.3.27
|
253
267
|
signing_key:
|
254
268
|
specification_version: 4
|
255
269
|
summary: isodoc-i18n
|