isodoc-i18n 1.1.1 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/isodoc/i18n.rb +39 -14
- data/lib/isodoc/version.rb +1 -1
- data/spec/assets/new.yaml +46 -0
- data/spec/isodoc/base_spec.rb +23 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 573397fe43411b36723793fd50893ce1fd5eda84ca84f8e00ba608f6e50f3cd5
|
4
|
+
data.tar.gz: 3117720a218de8fe171ee8c4d3237c97408a87beac521d4bb13d0d1021d329a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 865a347ccd8f837f4a88f950179d375a79db880ca61de6880d545163ad5e8ca19488410f7dc0a03cf8205a3d948256b58dae49909a442427ffb409635ed7d9c5
|
7
|
+
data.tar.gz: a0a3ba98de7b47388e81f9bbdc8dac86e95f9c91aaca0290b1952d9bf7b9f98d23b5725bc70c75f8018c65d61bbaaa629fe65ee2edb868f6cb1dab1223afa8a3
|
data/lib/isodoc/i18n.rb
CHANGED
@@ -5,10 +5,13 @@ require "twitter_cldr"
|
|
5
5
|
|
6
6
|
module IsoDoc
|
7
7
|
class I18n
|
8
|
+
Hash.include Metanorma::Utils::Hash
|
9
|
+
|
8
10
|
def load_yaml(lang, script, i18nyaml = nil, i18nhash = nil)
|
9
11
|
ret = load_yaml1(lang, script)
|
10
|
-
|
11
|
-
|
12
|
+
i18nyaml and
|
13
|
+
return normalise_hash(ret.deep_merge(YAML.load_file(i18nyaml)))
|
14
|
+
i18nhash and return normalise_hash(ret.deep_merge(i18nhash))
|
12
15
|
|
13
16
|
normalise_hash(ret)
|
14
17
|
end
|
@@ -98,26 +101,27 @@ module IsoDoc
|
|
98
101
|
end
|
99
102
|
|
100
103
|
def l10n_zh(text)
|
101
|
-
xml = Nokogiri::
|
104
|
+
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
102
105
|
xml.traverse do |n|
|
103
106
|
next unless n.text?
|
104
107
|
|
105
108
|
n.replace(cleanup_entities(l10_zh1(n.text), is_xml: false))
|
106
109
|
end
|
107
|
-
xml.to_xml.gsub(/<b>/, "").gsub("</b>", "")
|
110
|
+
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
|
111
|
+
.gsub(/<\?[^>]+>/, "")
|
108
112
|
end
|
109
113
|
|
110
114
|
def l10n_fr(text, locale)
|
111
|
-
xml = Nokogiri::
|
115
|
+
xml = Nokogiri::XML::DocumentFragment.parse(text)
|
112
116
|
xml.traverse do |n|
|
113
117
|
next unless n.text?
|
114
118
|
|
115
119
|
n.replace(cleanup_entities(l10n_fr1(n.text, locale), is_xml: false))
|
116
120
|
end
|
117
|
-
xml.to_xml
|
121
|
+
xml.to_xml(encoding: "UTF-8")
|
118
122
|
end
|
119
123
|
|
120
|
-
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|"\
|
124
|
+
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
|
121
125
|
"\\p{In Halfwidth And Fullwidth Forms}".freeze
|
122
126
|
|
123
127
|
# note: we can't differentiate comma from enumeration comma 、
|
@@ -171,11 +175,11 @@ module IsoDoc
|
|
171
175
|
c = HTMLEntities.new
|
172
176
|
if is_xml
|
173
177
|
text.split(/([<>])/).each_slice(4).map do |a|
|
174
|
-
a[0] = c.
|
178
|
+
a[0] = c.decode(a[0])
|
175
179
|
a
|
176
180
|
end.join
|
177
181
|
else
|
178
|
-
c.
|
182
|
+
c.decode(text)
|
179
183
|
end
|
180
184
|
end
|
181
185
|
|
@@ -195,13 +199,21 @@ module IsoDoc
|
|
195
199
|
num.localize(:en)
|
196
200
|
end
|
197
201
|
|
202
|
+
INFLECTIONS = {
|
203
|
+
number: "sg",
|
204
|
+
case: "nom",
|
205
|
+
gender: "masc",
|
206
|
+
person: "3rd",
|
207
|
+
voice: "act",
|
208
|
+
mood: "ind",
|
209
|
+
tense: "pres",
|
210
|
+
}.freeze
|
211
|
+
|
212
|
+
INFLECTION_ORDER = %i(voice mood tense number case gender person).freeze
|
213
|
+
|
198
214
|
def ordinal_key(term)
|
199
215
|
@labels["ordinal_keys"].each_with_object([]) do |k, m|
|
200
|
-
m <<
|
201
|
-
when "gender" then term["gender"]
|
202
|
-
when "number" then term["number"] || "sg"
|
203
|
-
when "case" then term["case"] || "nom"
|
204
|
-
end
|
216
|
+
m << (term[k] || INFLECTIONS[k.to_sym])
|
205
217
|
end.join(".")
|
206
218
|
end
|
207
219
|
|
@@ -211,5 +223,18 @@ module IsoDoc
|
|
211
223
|
else @lang.to_sym
|
212
224
|
end
|
213
225
|
end
|
226
|
+
|
227
|
+
# can skip category if not present
|
228
|
+
def inflect(word, options)
|
229
|
+
i = @labels.dig("inflection", word) or return word
|
230
|
+
i.is_a? String and return i
|
231
|
+
|
232
|
+
INFLECTION_ORDER.each do |x|
|
233
|
+
infl = options[x] || INFLECTIONS[x]
|
234
|
+
i = i[infl] if i[infl]
|
235
|
+
i.is_a? String and return i
|
236
|
+
end
|
237
|
+
word
|
238
|
+
end
|
214
239
|
end
|
215
240
|
end
|
data/lib/isodoc/version.rb
CHANGED
data/spec/assets/new.yaml
CHANGED
@@ -5,3 +5,49 @@ arr:
|
|
5
5
|
- arr1
|
6
6
|
- arr2
|
7
7
|
text: "text2"
|
8
|
+
inflection:
|
9
|
+
Fred:
|
10
|
+
sg: Fred
|
11
|
+
pl: Freds
|
12
|
+
Man:
|
13
|
+
dat: viri
|
14
|
+
acc: virem
|
15
|
+
Woman:
|
16
|
+
sg:
|
17
|
+
nom: mulier
|
18
|
+
gen: mulieris
|
19
|
+
pl:
|
20
|
+
nom: mulieres
|
21
|
+
gen: mulierum
|
22
|
+
Good:
|
23
|
+
sg:
|
24
|
+
nom:
|
25
|
+
masc: bonus
|
26
|
+
fem: bona
|
27
|
+
neut: bonum
|
28
|
+
gen:
|
29
|
+
masc: boni
|
30
|
+
fem: bonae
|
31
|
+
neut: boni
|
32
|
+
pl:
|
33
|
+
nom:
|
34
|
+
masc: boni
|
35
|
+
fem: bonae
|
36
|
+
neut: bona
|
37
|
+
gen:
|
38
|
+
masc: bonorum
|
39
|
+
fem: bonarum
|
40
|
+
neut: bonorum
|
41
|
+
Walk:
|
42
|
+
act:
|
43
|
+
ind:
|
44
|
+
pres:
|
45
|
+
sg:
|
46
|
+
1st: ambulo
|
47
|
+
2nd: ambulas
|
48
|
+
subj:
|
49
|
+
pres:
|
50
|
+
pl:
|
51
|
+
1st: ambulemus
|
52
|
+
2nd: ambuletis
|
53
|
+
|
data/spec/isodoc/base_spec.rb
CHANGED
@@ -90,18 +90,18 @@ RSpec.describe IsoDoc::I18n do
|
|
90
90
|
e = HTMLEntities.new
|
91
91
|
c = IsoDoc::I18n.new("fr", "Latn")
|
92
92
|
expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
|
93
|
-
.to be_equivalent_to "Code ; « code » "\
|
93
|
+
.to be_equivalent_to "Code ; « code » " \
|
94
94
|
"and : code !"
|
95
95
|
expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
|
96
|
-
.to be_equivalent_to "Code ; « code » "\
|
96
|
+
.to be_equivalent_to "Code ; « code » " \
|
97
97
|
"and : code !"
|
98
98
|
c = IsoDoc::I18n.new("fr", "Latn", locale: "FR")
|
99
99
|
expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
|
100
|
-
.to be_equivalent_to "Code ; « code » "\
|
100
|
+
.to be_equivalent_to "Code ; « code » " \
|
101
101
|
"and : code !"
|
102
102
|
c = IsoDoc::I18n.new("fr", "Latn", locale: "CH")
|
103
103
|
expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
|
104
|
-
.to be_equivalent_to "Code ; « code » "\
|
104
|
+
.to be_equivalent_to "Code ; « code » " \
|
105
105
|
"and : code !"
|
106
106
|
expect(e.encode(c.l10n("http://xyz a;b"), :hexadecimal))
|
107
107
|
.to be_equivalent_to "http://xyz a;b"
|
@@ -136,4 +136,23 @@ RSpec.describe IsoDoc::I18n do
|
|
136
136
|
expect(c.inflect_ordinal(5, term, "SpelloutRules"))
|
137
137
|
.to eq "fifth"
|
138
138
|
end
|
139
|
+
|
140
|
+
it "does inflections" do
|
141
|
+
c = IsoDoc::I18n.new("en", "Latn", i18nyaml: "spec/assets/new.yaml")
|
142
|
+
expect(c.inflect("John", number: "sg")).to eq "John"
|
143
|
+
expect(c.inflect("Fred", number: "sg")).to eq "Fred"
|
144
|
+
expect(c.inflect("Fred", number: "pl")).to eq "Freds"
|
145
|
+
expect(c.inflect("Fred", number: "du")).to eq "Fred"
|
146
|
+
expect(c.inflect("Fred", tense: "pres")).to eq "Fred"
|
147
|
+
expect(c.inflect("Man", case: "dat")).to eq "viri"
|
148
|
+
expect(c.inflect("Man", number: "sg", case: "dat")).to eq "viri"
|
149
|
+
expect(c.inflect("Man", number: "pl", case: "acc")).to eq "virem"
|
150
|
+
expect(c.inflect("Woman", number: "pl", case: "gen")).to eq "mulierum"
|
151
|
+
expect(c.inflect("Good", number: "pl", case: "gen")).to eq "bonorum"
|
152
|
+
expect(c.inflect("Good", number: "pl", case: "gen", gender: "fem"))
|
153
|
+
.to eq "bonarum"
|
154
|
+
expect(c.inflect("Walk", person: "2nd")).to eq "ambulas"
|
155
|
+
expect(c.inflect("Walk", person: "2nd", number: "pl", mood: "subj"))
|
156
|
+
.to eq "ambuletis"
|
157
|
+
end
|
139
158
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isodoc-i18n
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|