isodoc-i18n 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8f00540677aae6f53c0c517b376529ce6eb5094c6cebd1d1a9b31ce20e346266
4
- data.tar.gz: 69dd44b3aedc560540c5de52f05f0ae11b1332edccb4565817845648f23a9631
3
+ metadata.gz: 573397fe43411b36723793fd50893ce1fd5eda84ca84f8e00ba608f6e50f3cd5
4
+ data.tar.gz: 3117720a218de8fe171ee8c4d3237c97408a87beac521d4bb13d0d1021d329a9
5
5
  SHA512:
6
- metadata.gz: 8b3a53fe9058b2cba6f929938e61147adcb6eaa0782705ca2097cf448f63a76500abbe4b4d3513b9d0cfca04456547d8127e278ef7a521ec3b1cff6de8302f58
7
- data.tar.gz: 24ff8195e4172769e91fc4f879fe6835b4f24170041e16198ab3a405625f07ed56bce3a702604b5cb25102b4d6f21ce87d5d92889122cd524a4694549ba8d0c8
6
+ metadata.gz: 865a347ccd8f837f4a88f950179d375a79db880ca61de6880d545163ad5e8ca19488410f7dc0a03cf8205a3d948256b58dae49909a442427ffb409635ed7d9c5
7
+ data.tar.gz: a0a3ba98de7b47388e81f9bbdc8dac86e95f9c91aaca0290b1952d9bf7b9f98d23b5725bc70c75f8018c65d61bbaaa629fe65ee2edb868f6cb1dab1223afa8a3
data/lib/isodoc/i18n.rb CHANGED
@@ -5,10 +5,13 @@ require "twitter_cldr"
5
5
 
6
6
  module IsoDoc
7
7
  class I18n
8
+ Hash.include Metanorma::Utils::Hash
9
+
8
10
  def load_yaml(lang, script, i18nyaml = nil, i18nhash = nil)
9
11
  ret = load_yaml1(lang, script)
10
- return normalise_hash(ret.merge(YAML.load_file(i18nyaml))) if i18nyaml
11
- return normalise_hash(ret.merge(i18nhash)) if i18nhash
12
+ i18nyaml and
13
+ return normalise_hash(ret.deep_merge(YAML.load_file(i18nyaml)))
14
+ i18nhash and return normalise_hash(ret.deep_merge(i18nhash))
12
15
 
13
16
  normalise_hash(ret)
14
17
  end
@@ -98,26 +101,27 @@ module IsoDoc
98
101
  end
99
102
 
100
103
  def l10n_zh(text)
101
- xml = Nokogiri::HTML::DocumentFragment.parse(text)
104
+ xml = Nokogiri::XML::DocumentFragment.parse(text)
102
105
  xml.traverse do |n|
103
106
  next unless n.text?
104
107
 
105
108
  n.replace(cleanup_entities(l10_zh1(n.text), is_xml: false))
106
109
  end
107
- xml.to_xml.gsub(/<b>/, "").gsub("</b>", "").gsub(/<\?[^>]+>/, "")
110
+ xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
111
+ .gsub(/<\?[^>]+>/, "")
108
112
  end
109
113
 
110
114
  def l10n_fr(text, locale)
111
- xml = Nokogiri::HTML::DocumentFragment.parse(text)
115
+ xml = Nokogiri::XML::DocumentFragment.parse(text)
112
116
  xml.traverse do |n|
113
117
  next unless n.text?
114
118
 
115
119
  n.replace(cleanup_entities(l10n_fr1(n.text, locale), is_xml: false))
116
120
  end
117
- xml.to_xml
121
+ xml.to_xml(encoding: "UTF-8")
118
122
  end
119
123
 
120
- ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|"\
124
+ ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
121
125
  "\\p{In Halfwidth And Fullwidth Forms}".freeze
122
126
 
123
127
  # note: we can't differentiate comma from enumeration comma 、
@@ -171,11 +175,11 @@ module IsoDoc
171
175
  c = HTMLEntities.new
172
176
  if is_xml
173
177
  text.split(/([<>])/).each_slice(4).map do |a|
174
- a[0] = c.encode(c.decode(a[0]), :hexadecimal)
178
+ a[0] = c.decode(a[0])
175
179
  a
176
180
  end.join
177
181
  else
178
- c.encode(c.decode(text), :hexadecimal)
182
+ c.decode(text)
179
183
  end
180
184
  end
181
185
 
@@ -195,13 +199,21 @@ module IsoDoc
195
199
  num.localize(:en)
196
200
  end
197
201
 
202
+ INFLECTIONS = {
203
+ number: "sg",
204
+ case: "nom",
205
+ gender: "masc",
206
+ person: "3rd",
207
+ voice: "act",
208
+ mood: "ind",
209
+ tense: "pres",
210
+ }.freeze
211
+
212
+ INFLECTION_ORDER = %i(voice mood tense number case gender person).freeze
213
+
198
214
  def ordinal_key(term)
199
215
  @labels["ordinal_keys"].each_with_object([]) do |k, m|
200
- m << case k
201
- when "gender" then term["gender"]
202
- when "number" then term["number"] || "sg"
203
- when "case" then term["case"] || "nom"
204
- end
216
+ m << (term[k] || INFLECTIONS[k.to_sym])
205
217
  end.join(".")
206
218
  end
207
219
 
@@ -211,5 +223,18 @@ module IsoDoc
211
223
  else @lang.to_sym
212
224
  end
213
225
  end
226
+
227
+ # can skip category if not present
228
+ def inflect(word, options)
229
+ i = @labels.dig("inflection", word) or return word
230
+ i.is_a? String and return i
231
+
232
+ INFLECTION_ORDER.each do |x|
233
+ infl = options[x] || INFLECTIONS[x]
234
+ i = i[infl] if i[infl]
235
+ i.is_a? String and return i
236
+ end
237
+ word
238
+ end
214
239
  end
215
240
  end
@@ -1,5 +1,5 @@
1
1
  module IsoDoc
2
2
  class I18n
3
- VERSION = "1.1.1".freeze
3
+ VERSION = "1.1.3".freeze
4
4
  end
5
5
  end
data/spec/assets/new.yaml CHANGED
@@ -5,3 +5,49 @@ arr:
5
5
  - arr1
6
6
  - arr2
7
7
  text: "&#x74;ext2"
8
+ inflection:
9
+ Fred:
10
+ sg: Fred
11
+ pl: Freds
12
+ Man:
13
+ dat: viri
14
+ acc: virem
15
+ Woman:
16
+ sg:
17
+ nom: mulier
18
+ gen: mulieris
19
+ pl:
20
+ nom: mulieres
21
+ gen: mulierum
22
+ Good:
23
+ sg:
24
+ nom:
25
+ masc: bonus
26
+ fem: bona
27
+ neut: bonum
28
+ gen:
29
+ masc: boni
30
+ fem: bonae
31
+ neut: boni
32
+ pl:
33
+ nom:
34
+ masc: boni
35
+ fem: bonae
36
+ neut: bona
37
+ gen:
38
+ masc: bonorum
39
+ fem: bonarum
40
+ neut: bonorum
41
+ Walk:
42
+ act:
43
+ ind:
44
+ pres:
45
+ sg:
46
+ 1st: ambulo
47
+ 2nd: ambulas
48
+ subj:
49
+ pres:
50
+ pl:
51
+ 1st: ambulemus
52
+ 2nd: ambuletis
53
+
@@ -90,18 +90,18 @@ RSpec.describe IsoDoc::I18n do
90
90
  e = HTMLEntities.new
91
91
  c = IsoDoc::I18n.new("fr", "Latn")
92
92
  expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
93
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
93
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
94
94
  "and&#xa0;: code&#x202f;!"
95
95
  expect(e.encode(c.l10n("Code; &#xab;code&#xbb; and: code!"), :hexadecimal))
96
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
96
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
97
97
  "and&#xa0;: code&#x202f;!"
98
98
  c = IsoDoc::I18n.new("fr", "Latn", locale: "FR")
99
99
  expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
100
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
100
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
101
101
  "and&#xa0;: code&#x202f;!"
102
102
  c = IsoDoc::I18n.new("fr", "Latn", locale: "CH")
103
103
  expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
104
- .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
104
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; " \
105
105
  "and&#x202f;: code&#x202f;!"
106
106
  expect(e.encode(c.l10n("http://xyz a;b"), :hexadecimal))
107
107
  .to be_equivalent_to "http://xyz a;b"
@@ -136,4 +136,23 @@ RSpec.describe IsoDoc::I18n do
136
136
  expect(c.inflect_ordinal(5, term, "SpelloutRules"))
137
137
  .to eq "fifth"
138
138
  end
139
+
140
+ it "does inflections" do
141
+ c = IsoDoc::I18n.new("en", "Latn", i18nyaml: "spec/assets/new.yaml")
142
+ expect(c.inflect("John", number: "sg")).to eq "John"
143
+ expect(c.inflect("Fred", number: "sg")).to eq "Fred"
144
+ expect(c.inflect("Fred", number: "pl")).to eq "Freds"
145
+ expect(c.inflect("Fred", number: "du")).to eq "Fred"
146
+ expect(c.inflect("Fred", tense: "pres")).to eq "Fred"
147
+ expect(c.inflect("Man", case: "dat")).to eq "viri"
148
+ expect(c.inflect("Man", number: "sg", case: "dat")).to eq "viri"
149
+ expect(c.inflect("Man", number: "pl", case: "acc")).to eq "virem"
150
+ expect(c.inflect("Woman", number: "pl", case: "gen")).to eq "mulierum"
151
+ expect(c.inflect("Good", number: "pl", case: "gen")).to eq "bonorum"
152
+ expect(c.inflect("Good", number: "pl", case: "gen", gender: "fem"))
153
+ .to eq "bonarum"
154
+ expect(c.inflect("Walk", person: "2nd")).to eq "ambulas"
155
+ expect(c.inflect("Walk", person: "2nd", number: "pl", mood: "subj"))
156
+ .to eq "ambuletis"
157
+ end
139
158
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isodoc-i18n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-27 00:00:00.000000000 Z
11
+ date: 2022-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities