isodoc-i18n 1.1.1 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8f00540677aae6f53c0c517b376529ce6eb5094c6cebd1d1a9b31ce20e346266
4
- data.tar.gz: 69dd44b3aedc560540c5de52f05f0ae11b1332edccb4565817845648f23a9631
3
+ metadata.gz: 573397fe43411b36723793fd50893ce1fd5eda84ca84f8e00ba608f6e50f3cd5
4
+ data.tar.gz: 3117720a218de8fe171ee8c4d3237c97408a87beac521d4bb13d0d1021d329a9
5
5
  SHA512:
6
- metadata.gz: 8b3a53fe9058b2cba6f929938e61147adcb6eaa0782705ca2097cf448f63a76500abbe4b4d3513b9d0cfca04456547d8127e278ef7a521ec3b1cff6de8302f58
7
- data.tar.gz: 24ff8195e4172769e91fc4f879fe6835b4f24170041e16198ab3a405625f07ed56bce3a702604b5cb25102b4d6f21ce87d5d92889122cd524a4694549ba8d0c8
6
+ metadata.gz: 865a347ccd8f837f4a88f950179d375a79db880ca61de6880d545163ad5e8ca19488410f7dc0a03cf8205a3d948256b58dae49909a442427ffb409635ed7d9c5
7
+ data.tar.gz: a0a3ba98de7b47388e81f9bbdc8dac86e95f9c91aaca0290b1952d9bf7b9f98d23b5725bc70c75f8018c65d61bbaaa629fe65ee2edb868f6cb1dab1223afa8a3
data/lib/isodoc/i18n.rb CHANGED
@@ -5,10 +5,13 @@ require "twitter_cldr"
5
5
 
6
6
  module IsoDoc
7
7
  class I18n
8
+ Hash.include Metanorma::Utils::Hash
9
+
8
10
  def load_yaml(lang, script, i18nyaml = nil, i18nhash = nil)
9
11
  ret = load_yaml1(lang, script)
10
- return normalise_hash(ret.merge(YAML.load_file(i18nyaml))) if i18nyaml
11
- return normalise_hash(ret.merge(i18nhash)) if i18nhash
12
+ i18nyaml and
13
+ return normalise_hash(ret.deep_merge(YAML.load_file(i18nyaml)))
14
+ i18nhash and return normalise_hash(ret.deep_merge(i18nhash))
12
15
 
13
16
  normalise_hash(ret)
14
17
  end
@@ -98,26 +101,27 @@ module IsoDoc
98
101
  end
99
102
 
100
103
  def l10n_zh(text)
101
- xml = Nokogiri::HTML::DocumentFragment.parse(text)
104
+ xml = Nokogiri::XML::DocumentFragment.parse(text)
102
105
  xml.traverse do |n|
103
106
  next unless n.text?
104
107
 
105
108
  n.replace(cleanup_entities(l10_zh1(n.text), is_xml: false))
106
109
  end
107
- xml.to_xml.gsub(/<b>/, "").gsub("</b>", "").gsub(/<\?[^>]+>/, "")
110
+ xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
111
+ .gsub(/<\?[^>]+>/, "")
108
112
  end
109
113
 
110
114
  def l10n_fr(text, locale)
111
- xml = Nokogiri::HTML::DocumentFragment.parse(text)
115
+ xml = Nokogiri::XML::DocumentFragment.parse(text)
112
116
  xml.traverse do |n|
113
117
  next unless n.text?
114
118
 
115
119
  n.replace(cleanup_entities(l10n_fr1(n.text, locale), is_xml: false))
116
120
  end
117
- xml.to_xml
121
+ xml.to_xml(encoding: "UTF-8")
118
122
  end
119
123
 
120
- ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|"\
124
+ ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
121
125
  "\\p{In Halfwidth And Fullwidth Forms}".freeze
122
126
 
123
127
  # note: we can't differentiate comma from enumeration comma 、
@@ -171,11 +175,11 @@ module IsoDoc
171
175
  c = HTMLEntities.new
172
176
  if is_xml
173
177
  text.split(/([<>])/).each_slice(4).map do |a|
174
- a[0] = c.encode(c.decode(a[0]), :hexadecimal)
178
+ a[0] = c.decode(a[0])
175
179
  a
176
180
  end.join
177
181
  else
178
- c.encode(c.decode(text), :hexadecimal)
182
+ c.decode(text)
179
183
  end
180
184
  end
181
185
 
@@ -195,13 +199,21 @@ module IsoDoc
195
199
  num.localize(:en)
196
200
  end
197
201
 
202
+ INFLECTIONS = {
203
+ number: "sg",
204
+ case: "nom",
205
+ gender: "masc",
206
+ person: "3rd",
207
+ voice: "act",
208
+ mood: "ind",
209
+ tense: "pres",
210
+ }.freeze
211
+
212
+ INFLECTION_ORDER = %i(voice mood tense number case gender person).freeze
213
+
198
214
  def ordinal_key(term)
199
215
  @labels["ordinal_keys"].each_with_object([]) do |k, m|
200
- m << case k
201
- when "gender" then term["gender"]
202
- when "number" then term["number"] || "sg"
203
- when "case" then term["case"] || "nom"
204
- end
216
+ m << (term[k] || INFLECTIONS[k.to_sym])
205
217
  end.join(".")
206
218
  end
207
219
 
@@ -211,5 +223,18 @@ module IsoDoc
211
223
  else @lang.to_sym
212
224
  end
213
225
  end
226
+
227
+ # can skip category if not present
228
+ def inflect(word, options)
229
+ i = @labels.dig("inflection", word) or return word
230
+ i.is_a? String and return i
231
+
232
+ INFLECTION_ORDER.each do |x|
233
+ infl = options[x] || INFLECTIONS[x]
234
+ i = i[infl] if i[infl]
235
+ i.is_a? String and return i
236
+ end
237
+ word
238
+ end
214
239
  end
215
240
  end
@@ -1,5 +1,5 @@
1
1
  module IsoDoc
2
2
  class I18n
3
- VERSION = "1.1.1".freeze
3
+ VERSION = "1.1.3".freeze
4
4
  end
5
5
  end
data/spec/assets/new.yaml CHANGED
@@ -5,3 +5,49 @@ arr:
5
5
  - arr1
6
6
  - arr2
7
7
  text: "&#x74;ext2"
8
+ inflection:
9
+ Fred:
10
+ sg: Fred
11
+ pl: Freds
12
+ Man:
13
+ dat: viri
14
+ acc: virem
15
+ Woman:
16
+ sg:
17
+ nom: mulier
18
+ gen: mulieris
19
+ pl:
20
+ nom: mulieres
21
+ gen: mulierum
22
+ Good:
23
+ sg:
24
+ nom:
25
+ masc: bonus
26
+ fem: bona
27
+ neut: bonum
28
+ gen:
29
+ masc: boni
30
+ fem: bonae
31
+ neut: boni
32
+ pl:
33
+ nom:
34
+ masc: boni
35
+ fem: bonae
36
+ neut: bona
37
+ gen:
38
+ masc: bonorum
39
+ fem: bonarum
40
+ neut: bonorum
41
+ Walk:
42
+ act:
43
+ ind:
44
+ pres:
45
+ sg:
46
+ 1st: ambulo
47
+ 2nd: ambulas
48
+ subj:
49
+ pres:
50
+ pl:
51
+ 1st: ambulemus
52
+ 2nd: ambuletis
53
+
@@ -90,18 +90,18 @@ RSpec.describe IsoDoc::I18n do
90
90
  e = HTMLEntities.new
91
91
  c = IsoDoc::I18n.new("fr", "Latn")
92
92
  expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
93
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
93
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
94
94
  "and&#xa0;: code&#x202f;!"
95
95
  expect(e.encode(c.l10n("Code; &#xab;code&#xbb; and: code!"), :hexadecimal))
96
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
96
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
97
97
  "and&#xa0;: code&#x202f;!"
98
98
  c = IsoDoc::I18n.new("fr", "Latn", locale: "FR")
99
99
  expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
100
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
100
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
101
101
  "and&#xa0;: code&#x202f;!"
102
102
  c = IsoDoc::I18n.new("fr", "Latn", locale: "CH")
103
103
  expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
104
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
104
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
105
105
  "and&#x202f;: code&#x202f;!"
106
106
  expect(e.encode(c.l10n("http://xyz a;b"), :hexadecimal))
107
107
  .to be_equivalent_to "http://xyz a;b"
@@ -136,4 +136,23 @@ RSpec.describe IsoDoc::I18n do
136
136
  expect(c.inflect_ordinal(5, term, "SpelloutRules"))
137
137
  .to eq "fifth"
138
138
  end
139
+
140
+ it "does inflections" do
141
+ c = IsoDoc::I18n.new("en", "Latn", i18nyaml: "spec/assets/new.yaml")
142
+ expect(c.inflect("John", number: "sg")).to eq "John"
143
+ expect(c.inflect("Fred", number: "sg")).to eq "Fred"
144
+ expect(c.inflect("Fred", number: "pl")).to eq "Freds"
145
+ expect(c.inflect("Fred", number: "du")).to eq "Fred"
146
+ expect(c.inflect("Fred", tense: "pres")).to eq "Fred"
147
+ expect(c.inflect("Man", case: "dat")).to eq "viri"
148
+ expect(c.inflect("Man", number: "sg", case: "dat")).to eq "viri"
149
+ expect(c.inflect("Man", number: "pl", case: "acc")).to eq "virem"
150
+ expect(c.inflect("Woman", number: "pl", case: "gen")).to eq "mulierum"
151
+ expect(c.inflect("Good", number: "pl", case: "gen")).to eq "bonorum"
152
+ expect(c.inflect("Good", number: "pl", case: "gen", gender: "fem"))
153
+ .to eq "bonarum"
154
+ expect(c.inflect("Walk", person: "2nd")).to eq "ambulas"
155
+ expect(c.inflect("Walk", person: "2nd", number: "pl", mood: "subj"))
156
+ .to eq "ambuletis"
157
+ end
139
158
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isodoc-i18n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-27 00:00:00.000000000 Z
11
+ date: 2022-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities