isodoc-i18n 1.1.10 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 17fcd1159b3550278a757d9bf50e2fde08d83b1c2be92b14f3ad6789dc509f20
4
- data.tar.gz: fce8b97a8ac2f5df8754eb62b5994a74a5983098c00c259a5d751da9b30a3f11
3
+ metadata.gz: 7552e0a62364e47c4f8ea7b2810e1ba5d11e39d2a3711792c826412b1aef661f
4
+ data.tar.gz: 29d4eef4f2a07bcf6ad1de7ddc229cf7ff9b0ffa8e55c81652d2174165cf2214
5
5
  SHA512:
6
- metadata.gz: 18cc15dbfc165f6b0428ecb40a804065f1493c7c3fb77dd98a944f6613c8cf18c93920c559b5354eea818f4623d43e6be7dd1c58979556739bd4935811e0f935
7
- data.tar.gz: 8d1e430afb9e49a7a82868c2987266fac810c0b4bf4e30df2e67965ed3462e5dcaa289199fa9fc29e9bbe306cca427ee6b7c712821c1ec424a1f20dcf548aee3
6
+ metadata.gz: 220fddc821f5f00d4f1eecca1f7583b0236254bb0a5141c50d7583f4c725be2b4aca949ffc4f89ed390c01850f2912d86b9562959773efabf1a3df4838e8ec60
7
+ data.tar.gz: 59373ef3a3688bf4f382da1e2a5df55b3b7ae1615385bc04504faf9421e9c5e66333ed1859243f0e8a86d4f42724692ab1cd9b8f9f0bd62d4c4e70cceb7e93cd
data/Gemfile CHANGED
@@ -6,6 +6,4 @@ git_source(:github) { |repo| "https://github.com/#{repo}" }
6
6
 
7
7
  gemspec
8
8
 
9
- if File.exist? 'Gemfile.devel'
10
- eval File.read('Gemfile.devel'), nil, 'Gemfile.devel' # rubocop:disable Security/Eval
11
- end
9
+ eval_gemfile("Gemfile.devel") rescue nil
data/isodoc-i18n.gemspec CHANGED
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
23
23
  spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
24
24
 
25
25
  spec.add_dependency "htmlentities", "~> 4.3.4"
26
- spec.add_dependency "metanorma-utils", ">= 1.5.0"
26
+ spec.add_dependency "liquid", "~> 5"
27
+ spec.add_dependency "metanorma-utils", ">= 1.7.0"
27
28
  spec.add_dependency "twitter_cldr"
28
29
 
29
30
  spec.add_development_dependency "debug"
@@ -1,5 +1,5 @@
1
1
  module IsoDoc
2
2
  class I18n
3
- VERSION = "1.1.10".freeze
3
+ VERSION = "1.2.1".freeze
4
4
  end
5
5
  end
data/lib/isodoc/i18n.rb CHANGED
@@ -1,11 +1,16 @@
1
1
  require "htmlentities"
2
2
  require "twitter_cldr"
3
- require_relative "i18n/version"
4
3
  require_relative "i18n-yaml"
5
4
  require_relative "date"
5
+ require_relative "l10n"
6
+ require_relative "liquid/liquid"
7
+ require "liquid"
8
+ require_relative "i18n/version"
6
9
 
7
10
  module IsoDoc
8
11
  class I18n
12
+ attr_accessor :labels
13
+
9
14
  def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
10
15
  @lang = lang
11
16
  @script = script
@@ -14,6 +19,12 @@ module IsoDoc
14
19
  @cal_en = TwitterCldr::Shared::Calendar.new(:en)
15
20
  @c = HTMLEntities.new
16
21
  init_labels(i18nyaml, i18nhash)
22
+ liquid_init
23
+ end
24
+
25
+ def liquid_init
26
+ ::IsoDoc::I18n::Liquid.set(self)
27
+ ::Liquid::Template.register_filter(::IsoDoc::I18n::Liquid)
17
28
  end
18
29
 
19
30
  def calendar_data
@@ -26,119 +37,15 @@ module IsoDoc
26
37
  @labels = load_yaml(@lang, @script, i18nyaml, i18nhash)
27
38
  @labels["language"] = @lang
28
39
  @labels["script"] = @script
29
- @labels.each do |k, _v|
40
+ @labels.each_key do |k|
30
41
  self.class.send(:define_method, k.downcase) { get[k] }
31
42
  end
32
43
  end
33
44
 
34
- def self.l10n(text, lang = @lang, script = @script, locale = @locale)
35
- l10n(text, lang, script, locale)
36
- end
37
-
38
- # function localising spaces and punctuation.
39
- # Not clear if period needs to be localised for zh
40
- def l10n(text, lang = @lang, script = @script, locale = @locale)
41
- lang == "zh" and text = l10n_zh(text, script)
42
- lang == "fr" && text = l10n_fr(text, locale || "FR")
43
- bidiwrap(text, lang, script)
44
- end
45
-
46
- def bidiwrap(text, lang, script)
47
- my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
48
- if my_rtl && !outer_rtl
49
- mark = %w(Arab Aran).include?(my_script) ? "؜" : "‏"
50
- "#{mark}#{text}#{mark}"
51
- elsif !my_rtl && outer_rtl then "‎#{text}‎"
52
- else text
53
- end
54
- end
55
-
56
- def bidiwrap_vars(lang, script)
57
- my_script = script || Metanorma::Utils.default_script(lang)
58
- [my_script,
59
- Metanorma::Utils.rtl_script?(my_script),
60
- Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
61
- .default_script(@lang))]
62
- end
63
-
64
- def l10n_zh(text, script = "Hans")
65
- xml = Nokogiri::XML::DocumentFragment.parse(text)
66
- xml.traverse do |n|
67
- next unless n.text?
68
-
69
- n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
70
- end
71
- xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
72
- .gsub(/<\?[^>]+>/, "")
73
- end
74
-
75
- def l10n_fr(text, locale)
76
- xml = Nokogiri::XML::DocumentFragment.parse(text)
77
- xml.traverse do |n|
78
- next unless n.text?
79
-
80
- n.replace(l10n_fr1(cleanup_entities(n.text, is_xml: false), locale))
81
- end
82
- xml.to_xml(encoding: "UTF-8")
83
- end
84
-
85
- ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
86
- "\\p{In Halfwidth And Fullwidth Forms}".freeze
87
-
88
- # note: we can't differentiate comma from enumeration comma 、
89
- def l10_zh1(text, _script)
90
- l10n_zh_remove_space(l10n_zh_punct(text))
91
- end
92
-
93
- def l10n_zh_punct(text)
94
- ["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
95
- text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
96
- end
97
- ["((", "[["].each do |m|
98
- text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
99
- end
100
- text
101
- end
102
-
103
- def l10n_zh_remove_space(text)
104
- text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
105
- .gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
106
- .gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
107
- .gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
108
- end
109
-
110
- def l10n_fr1(text, locale)
111
- text = text.gsub(/(?<=\p{Alnum})([»›;?!])(?=\s)/, "\u202f\\1")
112
- text = text.gsub(/(?<=\p{Alnum})([»›;?!])$/, "\u202f\\1")
113
- text = text.gsub(/^([»›;?!])/, "\u202f\\1")
114
- text = text.gsub(/([«‹])/, "\\1\u202f")
115
- colonsp = locale == "CH" ? "\u202f" : "\u00a0"
116
- text = text.gsub(/(?<=\p{Alnum})(:)(?=\s)/, "#{colonsp}\\1")
117
- text = text.gsub(/(?<=\p{Alnum})(:)$/, "#{colonsp}\\1")
118
- text.gsub(/^(:\s)/, "#{colonsp}\\1")
119
- end
120
-
121
- def self.cjk_extend(text)
122
- cjk_extend(text)
123
- end
124
-
125
- def cjk_extend(title)
126
- @c.decode(title).chars.map.with_index do |n, i|
127
- if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
128
- n
129
- else "\u3000#{n}"
130
- end
131
- end.join
132
- end
133
-
134
- def interleave_space_cjk?(text)
135
- text.size == 2 or return
136
- ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
137
- /\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
138
- /^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
139
- /[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
140
- /[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
141
- true
45
+ # populate with variables, Liquid, inflections, ordinals/spellout
46
+ def populate(keys, vars = {})
47
+ ::Liquid::Template.parse(@labels.dig(*Array(keys)))
48
+ .render(vars.merge("labels" => @labels))
142
49
  end
143
50
 
144
51
  def boolean_conj(list, conn)
@@ -190,7 +97,7 @@ module IsoDoc
190
97
  INFLECTIONS = {
191
98
  number: "sg",
192
99
  case: "nom",
193
- gender: "masc",
100
+ gender: "m",
194
101
  person: "3rd",
195
102
  voice: "act",
196
103
  mood: "ind",
@@ -201,7 +108,7 @@ module IsoDoc
201
108
 
202
109
  def ordinal_key(term)
203
110
  @labels["ordinal_keys"].each_with_object([]) do |k, m|
204
- m << (term[k] || INFLECTIONS[k.to_sym])
111
+ m << (term[k.to_s] || INFLECTIONS[k.to_sym])
205
112
  end.join(".")
206
113
  end
207
114
 
@@ -216,7 +123,6 @@ module IsoDoc
216
123
  def inflect(word, options)
217
124
  i = @labels.dig("inflection", word) or return word
218
125
  i.is_a? String and return i
219
-
220
126
  INFLECTION_ORDER.each do |x|
221
127
  infl = options[x] || INFLECTIONS[x]
222
128
  i = i[infl] if i[infl]
@@ -0,0 +1,113 @@
1
+ module IsoDoc
2
+ class I18n
3
+ def self.l10n(text, lang = @lang, script = @script, locale = @locale)
4
+ l10n(text, lang, script, locale)
5
+ end
6
+
7
+ # function localising spaces and punctuation.
8
+ # Not clear if period needs to be localised for zh
9
+ def l10n(text, lang = @lang, script = @script, locale = @locale)
10
+ lang == "zh" and text = l10n_zh(text, script)
11
+ lang == "fr" && text = l10n_fr(text, locale || "FR")
12
+ bidiwrap(text, lang, script)
13
+ end
14
+
15
+ def bidiwrap(text, lang, script)
16
+ my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
17
+ if my_rtl && !outer_rtl
18
+ mark = %w(Arab Aran).include?(my_script) ? "&#x61c;" : "&#x200f;"
19
+ "#{mark}#{text}#{mark}"
20
+ elsif !my_rtl && outer_rtl then "&#x200e;#{text}&#x200e;"
21
+ else text
22
+ end
23
+ end
24
+
25
+ def bidiwrap_vars(lang, script)
26
+ my_script = script || Metanorma::Utils.default_script(lang)
27
+ [my_script,
28
+ Metanorma::Utils.rtl_script?(my_script),
29
+ Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
30
+ .default_script(@lang))]
31
+ end
32
+
33
+ def l10n_zh(text, script = "Hans")
34
+ xml = Nokogiri::XML::DocumentFragment.parse(text)
35
+ xml.traverse do |n|
36
+ next unless n.text?
37
+
38
+ n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
39
+ end
40
+ xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
41
+ .gsub(/<\?[^>]+>/, "")
42
+ end
43
+
44
+ def l10n_fr(text, locale)
45
+ xml = Nokogiri::XML::DocumentFragment.parse(text)
46
+ xml.traverse do |n|
47
+ next unless n.text?
48
+
49
+ n.replace(l10n_fr1(cleanup_entities(n.text, is_xml: false), locale))
50
+ end
51
+ xml.to_xml(encoding: "UTF-8")
52
+ end
53
+
54
+ ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
55
+ "\\p{In Halfwidth And Fullwidth Forms}".freeze
56
+
57
+ # note: we can't differentiate comma from enumeration comma 、
58
+ def l10_zh1(text, _script)
59
+ l10n_zh_remove_space(l10n_zh_punct(text))
60
+ end
61
+
62
+ def l10n_zh_punct(text)
63
+ ["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
64
+ text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
65
+ end
66
+ ["((", "[["].each do |m|
67
+ text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
68
+ end
69
+ text
70
+ end
71
+
72
+ def l10n_zh_remove_space(text)
73
+ text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
74
+ .gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
75
+ .gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
76
+ .gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
77
+ end
78
+
79
+ def l10n_fr1(text, locale)
80
+ text = text.gsub(/(?<=\p{Alnum})([»›;?!])(?=\s)/, "\u202f\\1")
81
+ text = text.gsub(/(?<=\p{Alnum})([»›;?!])$/, "\u202f\\1")
82
+ text = text.gsub(/^([»›;?!])/, "\u202f\\1")
83
+ text = text.gsub(/([«‹])/, "\\1\u202f")
84
+ colonsp = locale == "CH" ? "\u202f" : "\u00a0"
85
+ text = text.gsub(/(?<=\p{Alnum})(:)(?=\s)/, "#{colonsp}\\1")
86
+ text = text.gsub(/(?<=\p{Alnum})(:)$/, "#{colonsp}\\1")
87
+ text.gsub(/^(:\s)/, "#{colonsp}\\1")
88
+ end
89
+
90
+ def self.cjk_extend(text)
91
+ cjk_extend(text)
92
+ end
93
+
94
+ def cjk_extend(title)
95
+ @c.decode(title).chars.map.with_index do |n, i|
96
+ if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
97
+ n
98
+ else "\u3000#{n}"
99
+ end
100
+ end.join
101
+ end
102
+
103
+ def interleave_space_cjk?(text)
104
+ text.size == 2 or return
105
+ ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
106
+ /\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
107
+ /^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
108
+ /[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
109
+ /[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
110
+ true
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,38 @@
1
+ module IsoDoc
2
+ class I18n
3
+ module Liquid
4
+ def self.set(i18n)
5
+ @@i18n = i18n
6
+ end
7
+
8
+ def parse_hash(str, symbol: true)
9
+ str.split(/,\s*/).each_with_object({}) do |e, m|
10
+ k, v = e.split(/:/, 2)
11
+ m[symbol ? k.to_sym : k.to_s] = v
12
+ end
13
+ end
14
+
15
+ # word | inflect: nom,sg,neut
16
+ def inflect(word, options)
17
+ @@i18n.inflect(word, parse_hash(options))
18
+ end
19
+
20
+ # num | ordinal_num: label, inflection
21
+ # e.g. "3" | ordinal: "edition", "case:acc"
22
+ # combines inflection information from label and inflection
23
+ def ordinal_num(num, label = "", infl = "")
24
+ l = @@i18n.labels[label]
25
+ h = @@i18n.labels["inflection"]&.dig(l, "grammar") || {}
26
+ grammar = h.merge(parse_hash(infl, symbol: false))
27
+ @@i18n.inflect_ordinal(num.to_i, grammar, "OrdinalRules")
28
+ end
29
+
30
+ def ordinal_word(num, label = "", infl = "")
31
+ l = @@i18n.labels[label]
32
+ h = @@i18n.labels["inflection"]&.dig(l, "grammar") || {}
33
+ grammar = h.merge(parse_hash(infl, symbol: false))
34
+ @@i18n.inflect_ordinal(num.to_i, grammar, "SpelloutRules")
35
+ end
36
+ end
37
+ end
38
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isodoc-i18n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.10
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-04 00:00:00.000000000 Z
11
+ date: 2024-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -24,20 +24,34 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.3.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: liquid
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: metanorma-utils
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - ">="
32
46
  - !ruby/object:Gem::Version
33
- version: 1.5.0
47
+ version: 1.7.0
34
48
  type: :runtime
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - ">="
39
53
  - !ruby/object:Gem::Version
40
- version: 1.5.0
54
+ version: 1.7.0
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: twitter_cldr
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -214,6 +228,8 @@ files:
214
228
  - lib/isodoc/i18n-yaml.rb
215
229
  - lib/isodoc/i18n.rb
216
230
  - lib/isodoc/i18n/version.rb
231
+ - lib/isodoc/l10n.rb
232
+ - lib/isodoc/liquid/liquid.rb
217
233
  homepage: https://github.com/metanorma/isodoc-i18n
218
234
  licenses:
219
235
  - BSD-2-Clause