isodoc-i18n 1.1.10 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -3
- data/isodoc-i18n.gemspec +2 -1
- data/lib/isodoc/i18n/version.rb +1 -1
- data/lib/isodoc/i18n.rb +19 -113
- data/lib/isodoc/l10n.rb +113 -0
- data/lib/isodoc/liquid/liquid.rb +38 -0
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7552e0a62364e47c4f8ea7b2810e1ba5d11e39d2a3711792c826412b1aef661f
|
4
|
+
data.tar.gz: 29d4eef4f2a07bcf6ad1de7ddc229cf7ff9b0ffa8e55c81652d2174165cf2214
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 220fddc821f5f00d4f1eecca1f7583b0236254bb0a5141c50d7583f4c725be2b4aca949ffc4f89ed390c01850f2912d86b9562959773efabf1a3df4838e8ec60
|
7
|
+
data.tar.gz: 59373ef3a3688bf4f382da1e2a5df55b3b7ae1615385bc04504faf9421e9c5e66333ed1859243f0e8a86d4f42724692ab1cd9b8f9f0bd62d4c4e70cceb7e93cd
|
data/Gemfile
CHANGED
data/isodoc-i18n.gemspec
CHANGED
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
24
24
|
|
25
25
|
spec.add_dependency "htmlentities", "~> 4.3.4"
|
26
|
-
spec.add_dependency "
|
26
|
+
spec.add_dependency "liquid", "~> 5"
|
27
|
+
spec.add_dependency "metanorma-utils", ">= 1.7.0"
|
27
28
|
spec.add_dependency "twitter_cldr"
|
28
29
|
|
29
30
|
spec.add_development_dependency "debug"
|
data/lib/isodoc/i18n/version.rb
CHANGED
data/lib/isodoc/i18n.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
require "htmlentities"
|
2
2
|
require "twitter_cldr"
|
3
|
-
require_relative "i18n/version"
|
4
3
|
require_relative "i18n-yaml"
|
5
4
|
require_relative "date"
|
5
|
+
require_relative "l10n"
|
6
|
+
require_relative "liquid/liquid"
|
7
|
+
require "liquid"
|
8
|
+
require_relative "i18n/version"
|
6
9
|
|
7
10
|
module IsoDoc
|
8
11
|
class I18n
|
12
|
+
attr_accessor :labels
|
13
|
+
|
9
14
|
def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
|
10
15
|
@lang = lang
|
11
16
|
@script = script
|
@@ -14,6 +19,12 @@ module IsoDoc
|
|
14
19
|
@cal_en = TwitterCldr::Shared::Calendar.new(:en)
|
15
20
|
@c = HTMLEntities.new
|
16
21
|
init_labels(i18nyaml, i18nhash)
|
22
|
+
liquid_init
|
23
|
+
end
|
24
|
+
|
25
|
+
def liquid_init
|
26
|
+
::IsoDoc::I18n::Liquid.set(self)
|
27
|
+
::Liquid::Template.register_filter(::IsoDoc::I18n::Liquid)
|
17
28
|
end
|
18
29
|
|
19
30
|
def calendar_data
|
@@ -26,119 +37,15 @@ module IsoDoc
|
|
26
37
|
@labels = load_yaml(@lang, @script, i18nyaml, i18nhash)
|
27
38
|
@labels["language"] = @lang
|
28
39
|
@labels["script"] = @script
|
29
|
-
@labels.
|
40
|
+
@labels.each_key do |k|
|
30
41
|
self.class.send(:define_method, k.downcase) { get[k] }
|
31
42
|
end
|
32
43
|
end
|
33
44
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
# function localising spaces and punctuation.
|
39
|
-
# Not clear if period needs to be localised for zh
|
40
|
-
def l10n(text, lang = @lang, script = @script, locale = @locale)
|
41
|
-
lang == "zh" and text = l10n_zh(text, script)
|
42
|
-
lang == "fr" && text = l10n_fr(text, locale || "FR")
|
43
|
-
bidiwrap(text, lang, script)
|
44
|
-
end
|
45
|
-
|
46
|
-
def bidiwrap(text, lang, script)
|
47
|
-
my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
|
48
|
-
if my_rtl && !outer_rtl
|
49
|
-
mark = %w(Arab Aran).include?(my_script) ? "؜" : "‏"
|
50
|
-
"#{mark}#{text}#{mark}"
|
51
|
-
elsif !my_rtl && outer_rtl then "‎#{text}‎"
|
52
|
-
else text
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def bidiwrap_vars(lang, script)
|
57
|
-
my_script = script || Metanorma::Utils.default_script(lang)
|
58
|
-
[my_script,
|
59
|
-
Metanorma::Utils.rtl_script?(my_script),
|
60
|
-
Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
|
61
|
-
.default_script(@lang))]
|
62
|
-
end
|
63
|
-
|
64
|
-
def l10n_zh(text, script = "Hans")
|
65
|
-
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
66
|
-
xml.traverse do |n|
|
67
|
-
next unless n.text?
|
68
|
-
|
69
|
-
n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
|
70
|
-
end
|
71
|
-
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
|
72
|
-
.gsub(/<\?[^>]+>/, "")
|
73
|
-
end
|
74
|
-
|
75
|
-
def l10n_fr(text, locale)
|
76
|
-
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
77
|
-
xml.traverse do |n|
|
78
|
-
next unless n.text?
|
79
|
-
|
80
|
-
n.replace(l10n_fr1(cleanup_entities(n.text, is_xml: false), locale))
|
81
|
-
end
|
82
|
-
xml.to_xml(encoding: "UTF-8")
|
83
|
-
end
|
84
|
-
|
85
|
-
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
|
86
|
-
"\\p{In Halfwidth And Fullwidth Forms}".freeze
|
87
|
-
|
88
|
-
# note: we can't differentiate comma from enumeration comma 、
|
89
|
-
def l10_zh1(text, _script)
|
90
|
-
l10n_zh_remove_space(l10n_zh_punct(text))
|
91
|
-
end
|
92
|
-
|
93
|
-
def l10n_zh_punct(text)
|
94
|
-
["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
|
95
|
-
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
96
|
-
end
|
97
|
-
["((", "[["].each do |m|
|
98
|
-
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
99
|
-
end
|
100
|
-
text
|
101
|
-
end
|
102
|
-
|
103
|
-
def l10n_zh_remove_space(text)
|
104
|
-
text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
|
105
|
-
.gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
|
106
|
-
.gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
|
107
|
-
.gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
|
108
|
-
end
|
109
|
-
|
110
|
-
def l10n_fr1(text, locale)
|
111
|
-
text = text.gsub(/(?<=\p{Alnum})([»›;?!])(?=\s)/, "\u202f\\1")
|
112
|
-
text = text.gsub(/(?<=\p{Alnum})([»›;?!])$/, "\u202f\\1")
|
113
|
-
text = text.gsub(/^([»›;?!])/, "\u202f\\1")
|
114
|
-
text = text.gsub(/([«‹])/, "\\1\u202f")
|
115
|
-
colonsp = locale == "CH" ? "\u202f" : "\u00a0"
|
116
|
-
text = text.gsub(/(?<=\p{Alnum})(:)(?=\s)/, "#{colonsp}\\1")
|
117
|
-
text = text.gsub(/(?<=\p{Alnum})(:)$/, "#{colonsp}\\1")
|
118
|
-
text.gsub(/^(:\s)/, "#{colonsp}\\1")
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.cjk_extend(text)
|
122
|
-
cjk_extend(text)
|
123
|
-
end
|
124
|
-
|
125
|
-
def cjk_extend(title)
|
126
|
-
@c.decode(title).chars.map.with_index do |n, i|
|
127
|
-
if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
|
128
|
-
n
|
129
|
-
else "\u3000#{n}"
|
130
|
-
end
|
131
|
-
end.join
|
132
|
-
end
|
133
|
-
|
134
|
-
def interleave_space_cjk?(text)
|
135
|
-
text.size == 2 or return
|
136
|
-
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
|
137
|
-
/\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
|
138
|
-
/^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
|
139
|
-
/[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
|
140
|
-
/[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
|
141
|
-
true
|
45
|
+
# populate with variables, Liquid, inflections, ordinals/spellout
|
46
|
+
def populate(keys, vars = {})
|
47
|
+
::Liquid::Template.parse(@labels.dig(*Array(keys)))
|
48
|
+
.render(vars.merge("labels" => @labels))
|
142
49
|
end
|
143
50
|
|
144
51
|
def boolean_conj(list, conn)
|
@@ -190,7 +97,7 @@ module IsoDoc
|
|
190
97
|
INFLECTIONS = {
|
191
98
|
number: "sg",
|
192
99
|
case: "nom",
|
193
|
-
gender: "
|
100
|
+
gender: "m",
|
194
101
|
person: "3rd",
|
195
102
|
voice: "act",
|
196
103
|
mood: "ind",
|
@@ -201,7 +108,7 @@ module IsoDoc
|
|
201
108
|
|
202
109
|
def ordinal_key(term)
|
203
110
|
@labels["ordinal_keys"].each_with_object([]) do |k, m|
|
204
|
-
m << (term[k] || INFLECTIONS[k.to_sym])
|
111
|
+
m << (term[k.to_s] || INFLECTIONS[k.to_sym])
|
205
112
|
end.join(".")
|
206
113
|
end
|
207
114
|
|
@@ -216,7 +123,6 @@ module IsoDoc
|
|
216
123
|
def inflect(word, options)
|
217
124
|
i = @labels.dig("inflection", word) or return word
|
218
125
|
i.is_a? String and return i
|
219
|
-
|
220
126
|
INFLECTION_ORDER.each do |x|
|
221
127
|
infl = options[x] || INFLECTIONS[x]
|
222
128
|
i = i[infl] if i[infl]
|
data/lib/isodoc/l10n.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class I18n
|
3
|
+
def self.l10n(text, lang = @lang, script = @script, locale = @locale)
|
4
|
+
l10n(text, lang, script, locale)
|
5
|
+
end
|
6
|
+
|
7
|
+
# function localising spaces and punctuation.
|
8
|
+
# Not clear if period needs to be localised for zh
|
9
|
+
def l10n(text, lang = @lang, script = @script, locale = @locale)
|
10
|
+
lang == "zh" and text = l10n_zh(text, script)
|
11
|
+
lang == "fr" && text = l10n_fr(text, locale || "FR")
|
12
|
+
bidiwrap(text, lang, script)
|
13
|
+
end
|
14
|
+
|
15
|
+
def bidiwrap(text, lang, script)
|
16
|
+
my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
|
17
|
+
if my_rtl && !outer_rtl
|
18
|
+
mark = %w(Arab Aran).include?(my_script) ? "؜" : "‏"
|
19
|
+
"#{mark}#{text}#{mark}"
|
20
|
+
elsif !my_rtl && outer_rtl then "‎#{text}‎"
|
21
|
+
else text
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def bidiwrap_vars(lang, script)
|
26
|
+
my_script = script || Metanorma::Utils.default_script(lang)
|
27
|
+
[my_script,
|
28
|
+
Metanorma::Utils.rtl_script?(my_script),
|
29
|
+
Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
|
30
|
+
.default_script(@lang))]
|
31
|
+
end
|
32
|
+
|
33
|
+
def l10n_zh(text, script = "Hans")
|
34
|
+
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
35
|
+
xml.traverse do |n|
|
36
|
+
next unless n.text?
|
37
|
+
|
38
|
+
n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
|
39
|
+
end
|
40
|
+
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
|
41
|
+
.gsub(/<\?[^>]+>/, "")
|
42
|
+
end
|
43
|
+
|
44
|
+
def l10n_fr(text, locale)
|
45
|
+
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
46
|
+
xml.traverse do |n|
|
47
|
+
next unless n.text?
|
48
|
+
|
49
|
+
n.replace(l10n_fr1(cleanup_entities(n.text, is_xml: false), locale))
|
50
|
+
end
|
51
|
+
xml.to_xml(encoding: "UTF-8")
|
52
|
+
end
|
53
|
+
|
54
|
+
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
|
55
|
+
"\\p{In Halfwidth And Fullwidth Forms}".freeze
|
56
|
+
|
57
|
+
# note: we can't differentiate comma from enumeration comma 、
|
58
|
+
def l10_zh1(text, _script)
|
59
|
+
l10n_zh_remove_space(l10n_zh_punct(text))
|
60
|
+
end
|
61
|
+
|
62
|
+
def l10n_zh_punct(text)
|
63
|
+
["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
|
64
|
+
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
65
|
+
end
|
66
|
+
["((", "[["].each do |m|
|
67
|
+
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
68
|
+
end
|
69
|
+
text
|
70
|
+
end
|
71
|
+
|
72
|
+
def l10n_zh_remove_space(text)
|
73
|
+
text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
|
74
|
+
.gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
|
75
|
+
.gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
|
76
|
+
.gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
|
77
|
+
end
|
78
|
+
|
79
|
+
def l10n_fr1(text, locale)
|
80
|
+
text = text.gsub(/(?<=\p{Alnum})([»›;?!])(?=\s)/, "\u202f\\1")
|
81
|
+
text = text.gsub(/(?<=\p{Alnum})([»›;?!])$/, "\u202f\\1")
|
82
|
+
text = text.gsub(/^([»›;?!])/, "\u202f\\1")
|
83
|
+
text = text.gsub(/([«‹])/, "\\1\u202f")
|
84
|
+
colonsp = locale == "CH" ? "\u202f" : "\u00a0"
|
85
|
+
text = text.gsub(/(?<=\p{Alnum})(:)(?=\s)/, "#{colonsp}\\1")
|
86
|
+
text = text.gsub(/(?<=\p{Alnum})(:)$/, "#{colonsp}\\1")
|
87
|
+
text.gsub(/^(:\s)/, "#{colonsp}\\1")
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.cjk_extend(text)
|
91
|
+
cjk_extend(text)
|
92
|
+
end
|
93
|
+
|
94
|
+
def cjk_extend(title)
|
95
|
+
@c.decode(title).chars.map.with_index do |n, i|
|
96
|
+
if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
|
97
|
+
n
|
98
|
+
else "\u3000#{n}"
|
99
|
+
end
|
100
|
+
end.join
|
101
|
+
end
|
102
|
+
|
103
|
+
def interleave_space_cjk?(text)
|
104
|
+
text.size == 2 or return
|
105
|
+
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
|
106
|
+
/\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
|
107
|
+
/^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
|
108
|
+
/[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
|
109
|
+
/[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
|
110
|
+
true
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class I18n
|
3
|
+
module Liquid
|
4
|
+
def self.set(i18n)
|
5
|
+
@@i18n = i18n
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse_hash(str, symbol: true)
|
9
|
+
str.split(/,\s*/).each_with_object({}) do |e, m|
|
10
|
+
k, v = e.split(/:/, 2)
|
11
|
+
m[symbol ? k.to_sym : k.to_s] = v
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# word | inflect: nom,sg,neut
|
16
|
+
def inflect(word, options)
|
17
|
+
@@i18n.inflect(word, parse_hash(options))
|
18
|
+
end
|
19
|
+
|
20
|
+
# num | ordinal_num: label, inflection
|
21
|
+
# e.g. "3" | ordinal: "edition", "case:acc"
|
22
|
+
# combines inflection information from label and inflection
|
23
|
+
def ordinal_num(num, label = "", infl = "")
|
24
|
+
l = @@i18n.labels[label]
|
25
|
+
h = @@i18n.labels["inflection"]&.dig(l, "grammar") || {}
|
26
|
+
grammar = h.merge(parse_hash(infl, symbol: false))
|
27
|
+
@@i18n.inflect_ordinal(num.to_i, grammar, "OrdinalRules")
|
28
|
+
end
|
29
|
+
|
30
|
+
def ordinal_word(num, label = "", infl = "")
|
31
|
+
l = @@i18n.labels[label]
|
32
|
+
h = @@i18n.labels["inflection"]&.dig(l, "grammar") || {}
|
33
|
+
grammar = h.merge(parse_hash(infl, symbol: false))
|
34
|
+
@@i18n.inflect_ordinal(num.to_i, grammar, "SpelloutRules")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isodoc-i18n
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -24,20 +24,34 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.3.4
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: liquid
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: metanorma-utils
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - ">="
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
47
|
+
version: 1.7.0
|
34
48
|
type: :runtime
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.
|
54
|
+
version: 1.7.0
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: twitter_cldr
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -214,6 +228,8 @@ files:
|
|
214
228
|
- lib/isodoc/i18n-yaml.rb
|
215
229
|
- lib/isodoc/i18n.rb
|
216
230
|
- lib/isodoc/i18n/version.rb
|
231
|
+
- lib/isodoc/l10n.rb
|
232
|
+
- lib/isodoc/liquid/liquid.rb
|
217
233
|
homepage: https://github.com/metanorma/isodoc-i18n
|
218
234
|
licenses:
|
219
235
|
- BSD-2-Clause
|