isodoc-i18n 1.1.10 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -3
- data/isodoc-i18n.gemspec +2 -1
- data/lib/isodoc/i18n/version.rb +1 -1
- data/lib/isodoc/i18n.rb +19 -113
- data/lib/isodoc/l10n.rb +113 -0
- data/lib/isodoc/liquid/liquid.rb +38 -0
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7552e0a62364e47c4f8ea7b2810e1ba5d11e39d2a3711792c826412b1aef661f
|
4
|
+
data.tar.gz: 29d4eef4f2a07bcf6ad1de7ddc229cf7ff9b0ffa8e55c81652d2174165cf2214
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 220fddc821f5f00d4f1eecca1f7583b0236254bb0a5141c50d7583f4c725be2b4aca949ffc4f89ed390c01850f2912d86b9562959773efabf1a3df4838e8ec60
|
7
|
+
data.tar.gz: 59373ef3a3688bf4f382da1e2a5df55b3b7ae1615385bc04504faf9421e9c5e66333ed1859243f0e8a86d4f42724692ab1cd9b8f9f0bd62d4c4e70cceb7e93cd
|
data/Gemfile
CHANGED
data/isodoc-i18n.gemspec
CHANGED
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
24
24
|
|
25
25
|
spec.add_dependency "htmlentities", "~> 4.3.4"
|
26
|
-
spec.add_dependency "
|
26
|
+
spec.add_dependency "liquid", "~> 5"
|
27
|
+
spec.add_dependency "metanorma-utils", ">= 1.7.0"
|
27
28
|
spec.add_dependency "twitter_cldr"
|
28
29
|
|
29
30
|
spec.add_development_dependency "debug"
|
data/lib/isodoc/i18n/version.rb
CHANGED
data/lib/isodoc/i18n.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
require "htmlentities"
|
2
2
|
require "twitter_cldr"
|
3
|
-
require_relative "i18n/version"
|
4
3
|
require_relative "i18n-yaml"
|
5
4
|
require_relative "date"
|
5
|
+
require_relative "l10n"
|
6
|
+
require_relative "liquid/liquid"
|
7
|
+
require "liquid"
|
8
|
+
require_relative "i18n/version"
|
6
9
|
|
7
10
|
module IsoDoc
|
8
11
|
class I18n
|
12
|
+
attr_accessor :labels
|
13
|
+
|
9
14
|
def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
|
10
15
|
@lang = lang
|
11
16
|
@script = script
|
@@ -14,6 +19,12 @@ module IsoDoc
|
|
14
19
|
@cal_en = TwitterCldr::Shared::Calendar.new(:en)
|
15
20
|
@c = HTMLEntities.new
|
16
21
|
init_labels(i18nyaml, i18nhash)
|
22
|
+
liquid_init
|
23
|
+
end
|
24
|
+
|
25
|
+
def liquid_init
|
26
|
+
::IsoDoc::I18n::Liquid.set(self)
|
27
|
+
::Liquid::Template.register_filter(::IsoDoc::I18n::Liquid)
|
17
28
|
end
|
18
29
|
|
19
30
|
def calendar_data
|
@@ -26,119 +37,15 @@ module IsoDoc
|
|
26
37
|
@labels = load_yaml(@lang, @script, i18nyaml, i18nhash)
|
27
38
|
@labels["language"] = @lang
|
28
39
|
@labels["script"] = @script
|
29
|
-
@labels.
|
40
|
+
@labels.each_key do |k|
|
30
41
|
self.class.send(:define_method, k.downcase) { get[k] }
|
31
42
|
end
|
32
43
|
end
|
33
44
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
# function localising spaces and punctuation.
|
39
|
-
# Not clear if period needs to be localised for zh
|
40
|
-
def l10n(text, lang = @lang, script = @script, locale = @locale)
|
41
|
-
lang == "zh" and text = l10n_zh(text, script)
|
42
|
-
lang == "fr" && text = l10n_fr(text, locale || "FR")
|
43
|
-
bidiwrap(text, lang, script)
|
44
|
-
end
|
45
|
-
|
46
|
-
def bidiwrap(text, lang, script)
|
47
|
-
my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
|
48
|
-
if my_rtl && !outer_rtl
|
49
|
-
mark = %w(Arab Aran).include?(my_script) ? "؜" : "‏"
|
50
|
-
"#{mark}#{text}#{mark}"
|
51
|
-
elsif !my_rtl && outer_rtl then "‎#{text}‎"
|
52
|
-
else text
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def bidiwrap_vars(lang, script)
|
57
|
-
my_script = script || Metanorma::Utils.default_script(lang)
|
58
|
-
[my_script,
|
59
|
-
Metanorma::Utils.rtl_script?(my_script),
|
60
|
-
Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
|
61
|
-
.default_script(@lang))]
|
62
|
-
end
|
63
|
-
|
64
|
-
def l10n_zh(text, script = "Hans")
|
65
|
-
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
66
|
-
xml.traverse do |n|
|
67
|
-
next unless n.text?
|
68
|
-
|
69
|
-
n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
|
70
|
-
end
|
71
|
-
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
|
72
|
-
.gsub(/<\?[^>]+>/, "")
|
73
|
-
end
|
74
|
-
|
75
|
-
def l10n_fr(text, locale)
|
76
|
-
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
77
|
-
xml.traverse do |n|
|
78
|
-
next unless n.text?
|
79
|
-
|
80
|
-
n.replace(l10n_fr1(cleanup_entities(n.text, is_xml: false), locale))
|
81
|
-
end
|
82
|
-
xml.to_xml(encoding: "UTF-8")
|
83
|
-
end
|
84
|
-
|
85
|
-
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
|
86
|
-
"\\p{In Halfwidth And Fullwidth Forms}".freeze
|
87
|
-
|
88
|
-
# note: we can't differentiate comma from enumeration comma 、
|
89
|
-
def l10_zh1(text, _script)
|
90
|
-
l10n_zh_remove_space(l10n_zh_punct(text))
|
91
|
-
end
|
92
|
-
|
93
|
-
def l10n_zh_punct(text)
|
94
|
-
["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
|
95
|
-
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
96
|
-
end
|
97
|
-
["((", "[["].each do |m|
|
98
|
-
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
99
|
-
end
|
100
|
-
text
|
101
|
-
end
|
102
|
-
|
103
|
-
def l10n_zh_remove_space(text)
|
104
|
-
text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
|
105
|
-
.gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
|
106
|
-
.gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
|
107
|
-
.gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
|
108
|
-
end
|
109
|
-
|
110
|
-
def l10n_fr1(text, locale)
|
111
|
-
text = text.gsub(/(?<=\p{Alnum})([»›;?!])(?=\s)/, "\u202f\\1")
|
112
|
-
text = text.gsub(/(?<=\p{Alnum})([»›;?!])$/, "\u202f\\1")
|
113
|
-
text = text.gsub(/^([»›;?!])/, "\u202f\\1")
|
114
|
-
text = text.gsub(/([«‹])/, "\\1\u202f")
|
115
|
-
colonsp = locale == "CH" ? "\u202f" : "\u00a0"
|
116
|
-
text = text.gsub(/(?<=\p{Alnum})(:)(?=\s)/, "#{colonsp}\\1")
|
117
|
-
text = text.gsub(/(?<=\p{Alnum})(:)$/, "#{colonsp}\\1")
|
118
|
-
text.gsub(/^(:\s)/, "#{colonsp}\\1")
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.cjk_extend(text)
|
122
|
-
cjk_extend(text)
|
123
|
-
end
|
124
|
-
|
125
|
-
def cjk_extend(title)
|
126
|
-
@c.decode(title).chars.map.with_index do |n, i|
|
127
|
-
if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
|
128
|
-
n
|
129
|
-
else "\u3000#{n}"
|
130
|
-
end
|
131
|
-
end.join
|
132
|
-
end
|
133
|
-
|
134
|
-
def interleave_space_cjk?(text)
|
135
|
-
text.size == 2 or return
|
136
|
-
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
|
137
|
-
/\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
|
138
|
-
/^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
|
139
|
-
/[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
|
140
|
-
/[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
|
141
|
-
true
|
45
|
+
# populate with variables, Liquid, inflections, ordinals/spellout
|
46
|
+
def populate(keys, vars = {})
|
47
|
+
::Liquid::Template.parse(@labels.dig(*Array(keys)))
|
48
|
+
.render(vars.merge("labels" => @labels))
|
142
49
|
end
|
143
50
|
|
144
51
|
def boolean_conj(list, conn)
|
@@ -190,7 +97,7 @@ module IsoDoc
|
|
190
97
|
INFLECTIONS = {
|
191
98
|
number: "sg",
|
192
99
|
case: "nom",
|
193
|
-
gender: "
|
100
|
+
gender: "m",
|
194
101
|
person: "3rd",
|
195
102
|
voice: "act",
|
196
103
|
mood: "ind",
|
@@ -201,7 +108,7 @@ module IsoDoc
|
|
201
108
|
|
202
109
|
def ordinal_key(term)
|
203
110
|
@labels["ordinal_keys"].each_with_object([]) do |k, m|
|
204
|
-
m << (term[k] || INFLECTIONS[k.to_sym])
|
111
|
+
m << (term[k.to_s] || INFLECTIONS[k.to_sym])
|
205
112
|
end.join(".")
|
206
113
|
end
|
207
114
|
|
@@ -216,7 +123,6 @@ module IsoDoc
|
|
216
123
|
def inflect(word, options)
|
217
124
|
i = @labels.dig("inflection", word) or return word
|
218
125
|
i.is_a? String and return i
|
219
|
-
|
220
126
|
INFLECTION_ORDER.each do |x|
|
221
127
|
infl = options[x] || INFLECTIONS[x]
|
222
128
|
i = i[infl] if i[infl]
|
data/lib/isodoc/l10n.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class I18n
|
3
|
+
def self.l10n(text, lang = @lang, script = @script, locale = @locale)
|
4
|
+
l10n(text, lang, script, locale)
|
5
|
+
end
|
6
|
+
|
7
|
+
# function localising spaces and punctuation.
|
8
|
+
# Not clear if period needs to be localised for zh
|
9
|
+
def l10n(text, lang = @lang, script = @script, locale = @locale)
|
10
|
+
lang == "zh" and text = l10n_zh(text, script)
|
11
|
+
lang == "fr" && text = l10n_fr(text, locale || "FR")
|
12
|
+
bidiwrap(text, lang, script)
|
13
|
+
end
|
14
|
+
|
15
|
+
def bidiwrap(text, lang, script)
|
16
|
+
my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
|
17
|
+
if my_rtl && !outer_rtl
|
18
|
+
mark = %w(Arab Aran).include?(my_script) ? "؜" : "‏"
|
19
|
+
"#{mark}#{text}#{mark}"
|
20
|
+
elsif !my_rtl && outer_rtl then "‎#{text}‎"
|
21
|
+
else text
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def bidiwrap_vars(lang, script)
|
26
|
+
my_script = script || Metanorma::Utils.default_script(lang)
|
27
|
+
[my_script,
|
28
|
+
Metanorma::Utils.rtl_script?(my_script),
|
29
|
+
Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
|
30
|
+
.default_script(@lang))]
|
31
|
+
end
|
32
|
+
|
33
|
+
def l10n_zh(text, script = "Hans")
|
34
|
+
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
35
|
+
xml.traverse do |n|
|
36
|
+
next unless n.text?
|
37
|
+
|
38
|
+
n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
|
39
|
+
end
|
40
|
+
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
|
41
|
+
.gsub(/<\?[^>]+>/, "")
|
42
|
+
end
|
43
|
+
|
44
|
+
def l10n_fr(text, locale)
|
45
|
+
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
46
|
+
xml.traverse do |n|
|
47
|
+
next unless n.text?
|
48
|
+
|
49
|
+
n.replace(l10n_fr1(cleanup_entities(n.text, is_xml: false), locale))
|
50
|
+
end
|
51
|
+
xml.to_xml(encoding: "UTF-8")
|
52
|
+
end
|
53
|
+
|
54
|
+
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
|
55
|
+
"\\p{In Halfwidth And Fullwidth Forms}".freeze
|
56
|
+
|
57
|
+
# note: we can't differentiate comma from enumeration comma 、
|
58
|
+
def l10_zh1(text, _script)
|
59
|
+
l10n_zh_remove_space(l10n_zh_punct(text))
|
60
|
+
end
|
61
|
+
|
62
|
+
def l10n_zh_punct(text)
|
63
|
+
["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
|
64
|
+
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
65
|
+
end
|
66
|
+
["((", "[["].each do |m|
|
67
|
+
text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
|
68
|
+
end
|
69
|
+
text
|
70
|
+
end
|
71
|
+
|
72
|
+
def l10n_zh_remove_space(text)
|
73
|
+
text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
|
74
|
+
.gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
|
75
|
+
.gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
|
76
|
+
.gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
|
77
|
+
end
|
78
|
+
|
79
|
+
def l10n_fr1(text, locale)
|
80
|
+
text = text.gsub(/(?<=\p{Alnum})([»›;?!])(?=\s)/, "\u202f\\1")
|
81
|
+
text = text.gsub(/(?<=\p{Alnum})([»›;?!])$/, "\u202f\\1")
|
82
|
+
text = text.gsub(/^([»›;?!])/, "\u202f\\1")
|
83
|
+
text = text.gsub(/([«‹])/, "\\1\u202f")
|
84
|
+
colonsp = locale == "CH" ? "\u202f" : "\u00a0"
|
85
|
+
text = text.gsub(/(?<=\p{Alnum})(:)(?=\s)/, "#{colonsp}\\1")
|
86
|
+
text = text.gsub(/(?<=\p{Alnum})(:)$/, "#{colonsp}\\1")
|
87
|
+
text.gsub(/^(:\s)/, "#{colonsp}\\1")
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.cjk_extend(text)
|
91
|
+
cjk_extend(text)
|
92
|
+
end
|
93
|
+
|
94
|
+
def cjk_extend(title)
|
95
|
+
@c.decode(title).chars.map.with_index do |n, i|
|
96
|
+
if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
|
97
|
+
n
|
98
|
+
else "\u3000#{n}"
|
99
|
+
end
|
100
|
+
end.join
|
101
|
+
end
|
102
|
+
|
103
|
+
def interleave_space_cjk?(text)
|
104
|
+
text.size == 2 or return
|
105
|
+
["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
|
106
|
+
/\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
|
107
|
+
/^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
|
108
|
+
/[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
|
109
|
+
/[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
|
110
|
+
true
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class I18n
|
3
|
+
module Liquid
|
4
|
+
def self.set(i18n)
|
5
|
+
@@i18n = i18n
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse_hash(str, symbol: true)
|
9
|
+
str.split(/,\s*/).each_with_object({}) do |e, m|
|
10
|
+
k, v = e.split(/:/, 2)
|
11
|
+
m[symbol ? k.to_sym : k.to_s] = v
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# word | inflect: nom,sg,neut
|
16
|
+
def inflect(word, options)
|
17
|
+
@@i18n.inflect(word, parse_hash(options))
|
18
|
+
end
|
19
|
+
|
20
|
+
# num | ordinal_num: label, inflection
|
21
|
+
# e.g. "3" | ordinal: "edition", "case:acc"
|
22
|
+
# combines inflection information from label and inflection
|
23
|
+
def ordinal_num(num, label = "", infl = "")
|
24
|
+
l = @@i18n.labels[label]
|
25
|
+
h = @@i18n.labels["inflection"]&.dig(l, "grammar") || {}
|
26
|
+
grammar = h.merge(parse_hash(infl, symbol: false))
|
27
|
+
@@i18n.inflect_ordinal(num.to_i, grammar, "OrdinalRules")
|
28
|
+
end
|
29
|
+
|
30
|
+
def ordinal_word(num, label = "", infl = "")
|
31
|
+
l = @@i18n.labels[label]
|
32
|
+
h = @@i18n.labels["inflection"]&.dig(l, "grammar") || {}
|
33
|
+
grammar = h.merge(parse_hash(infl, symbol: false))
|
34
|
+
@@i18n.inflect_ordinal(num.to_i, grammar, "SpelloutRules")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isodoc-i18n
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -24,20 +24,34 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.3.4
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: liquid
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: metanorma-utils
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - ">="
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
47
|
+
version: 1.7.0
|
34
48
|
type: :runtime
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.
|
54
|
+
version: 1.7.0
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: twitter_cldr
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -214,6 +228,8 @@ files:
|
|
214
228
|
- lib/isodoc/i18n-yaml.rb
|
215
229
|
- lib/isodoc/i18n.rb
|
216
230
|
- lib/isodoc/i18n/version.rb
|
231
|
+
- lib/isodoc/l10n.rb
|
232
|
+
- lib/isodoc/liquid/liquid.rb
|
217
233
|
homepage: https://github.com/metanorma/isodoc-i18n
|
218
234
|
licenses:
|
219
235
|
- BSD-2-Clause
|