isodoc-i18n 1.2.1 → 1.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/isodoc-i18n.gemspec +1 -0
- data/lib/isodoc/i18n/version.rb +1 -1
- data/lib/isodoc/l10n.rb +24 -11
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 490a22f13264a470afa34644c450651239bb43409aa8e579bae72229d165fe38
|
4
|
+
data.tar.gz: 9be5c331d23f732e37e2588ff3269e867231f56b6b074417258f5e01fa55aeba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 024f04dedc8bdef757f52d1ae35b69af155d786bb5e1075b6be8c11e2a2039b0bf9ee4e39a0bbca260e3edee85aafb70360240286a38ce12cb41fef88db70e02
|
7
|
+
data.tar.gz: 6952b1cf007e02b5e7fcb9ca3507cda68e22e27facee436fafc41daaa65b6b5eed3b5e0b9fbbde57d8c10361ab55dcc870a25b92a0f624597b02bb3a912c7586
|
data/isodoc-i18n.gemspec
CHANGED
@@ -37,5 +37,6 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.add_development_dependency "simplecov", "~> 0.15"
|
38
38
|
spec.add_development_dependency "timecop", "~> 0.9"
|
39
39
|
spec.add_development_dependency "webmock"
|
40
|
+
spec.add_development_dependency "xml-c14n"
|
40
41
|
# spec.metadata["rubygems_mfa_required"] = "true"
|
41
42
|
end
|
data/lib/isodoc/i18n/version.rb
CHANGED
data/lib/isodoc/l10n.rb
CHANGED
@@ -5,9 +5,8 @@ module IsoDoc
|
|
5
5
|
end
|
6
6
|
|
7
7
|
# function localising spaces and punctuation.
|
8
|
-
# Not clear if period needs to be localised for zh
|
9
8
|
def l10n(text, lang = @lang, script = @script, locale = @locale)
|
10
|
-
|
9
|
+
%w(zh ja ko).include?(lang) and text = l10n_zh(text, script)
|
11
10
|
lang == "fr" && text = l10n_fr(text, locale || "FR")
|
12
11
|
bidiwrap(text, lang, script)
|
13
12
|
end
|
@@ -30,11 +29,11 @@ module IsoDoc
|
|
30
29
|
.default_script(@lang))]
|
31
30
|
end
|
32
31
|
|
32
|
+
# CJK
|
33
33
|
def l10n_zh(text, script = "Hans")
|
34
34
|
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
35
35
|
xml.traverse do |n|
|
36
|
-
|
37
|
-
|
36
|
+
n.text? or next
|
38
37
|
n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
|
39
38
|
end
|
40
39
|
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
|
@@ -56,19 +55,32 @@ module IsoDoc
|
|
56
55
|
|
57
56
|
# note: we can't differentiate comma from enumeration comma 、
|
58
57
|
def l10_zh1(text, _script)
|
59
|
-
l10n_zh_remove_space(l10n_zh_punct(text))
|
58
|
+
l10n_zh_dash(l10n_zh_remove_space(l10n_zh_punct(text)))
|
60
59
|
end
|
61
60
|
|
61
|
+
# CJK punct if (^|CJK).($|CJK)
|
62
62
|
def l10n_zh_punct(text)
|
63
|
-
["::", ",,", "..", "))", "]]", "
|
64
|
-
text = text.gsub(
|
65
|
-
|
66
|
-
|
67
|
-
|
63
|
+
["::", ",,", "..", "))", "]]", ";;", "??", "!!", "((", "[["].each do |m|
|
64
|
+
text = text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
|
65
|
+
(\s*) # Latin spaces optional
|
66
|
+
#{Regexp.quote(m[0])} # Latin punctuation we want to convert to CJK
|
67
|
+
(?= \s* # followed (lookahead) by ignorable Latin spaces
|
68
|
+
[:,.()\[\];?!-]* # Latin punctuation which we will also convert to CJK
|
69
|
+
(#{ZH_CHAR}|$) # CJK character, or end of string
|
70
|
+
) /x, "\\1#{m[1]}")
|
68
71
|
end
|
69
72
|
text
|
70
73
|
end
|
71
74
|
|
75
|
+
def l10n_zh_dash(text)
|
76
|
+
text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
|
77
|
+
(\d*) # optional digits
|
78
|
+
– # en-dash
|
79
|
+
(\d*) # optional digits
|
80
|
+
(#{ZH_CHAR}|$) # CJK character, or end of string
|
81
|
+
/xo, "\\1~\\2\\3")
|
82
|
+
end
|
83
|
+
|
72
84
|
def l10n_zh_remove_space(text)
|
73
85
|
text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
|
74
86
|
.gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
|
@@ -102,7 +114,8 @@ module IsoDoc
|
|
102
114
|
|
103
115
|
def interleave_space_cjk?(text)
|
104
116
|
text.size == 2 or return
|
105
|
-
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
|
117
|
+
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
|
118
|
+
"\u22ef\u22ef"].include?(text) ||
|
106
119
|
/\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
|
107
120
|
/^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
|
108
121
|
/[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isodoc-i18n
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -206,6 +206,20 @@ dependencies:
|
|
206
206
|
- - ">="
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: '0'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: xml-c14n
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - ">="
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
type: :development
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - ">="
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0'
|
209
223
|
description: 'Internationalisation for Metanorma rendering
|
210
224
|
|
211
225
|
'
|
@@ -249,7 +263,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
249
263
|
- !ruby/object:Gem::Version
|
250
264
|
version: '0'
|
251
265
|
requirements: []
|
252
|
-
rubygems_version: 3.3.
|
266
|
+
rubygems_version: 3.3.27
|
253
267
|
signing_key:
|
254
268
|
specification_version: 4
|
255
269
|
summary: isodoc-i18n
|