isodoc-i18n 1.0.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce8cb0d922cdc28fe16fc3605db3d205fe66c12d0224282f6597d9510d44ee6d
4
- data.tar.gz: 6fe8fc95e446fdc6b57eb9172d6f4d0665fd2f48ed0c29877819c3d8398805dc
3
+ metadata.gz: 71a7b4e2dcb8b00587cc92988c85a99c411444fb8698f8bd067c92a4f90751e2
4
+ data.tar.gz: 6304cf921c57b555ff259b9c1dfad4d7306e726c7f56f08291e4959a188a6181
5
5
  SHA512:
6
- metadata.gz: 75991361380709948d84f9707bb999e3637f634433ebd83ee3f0b24e783381131326fa824e1897f8900a6cf8c30522a56589887fc93def7458480eadaaa60c21
7
- data.tar.gz: 0e1a2149f1a06190962dda186978f01677d35ef25caf711dd058753741dca79b74c3de98cb04cbe0be4d1ba770b5f649589d7fd0b9f12e6dafb04b58372fa7fe
6
+ metadata.gz: 9de07fcd21fdfd8ba2ab4d758f76952cbfe84075caad216543cd0a6fa2bcf409fab8e71bc71309aa98ed50342727be9a46c5ffc2625c27f987ede80c65e29b18
7
+ data.tar.gz: fff4809002eef23c8d550baed4fb1185f134b99fb7a61733d3413045134b11b1e18eaab54a8787e0e4817601ff9051d67ffc1efc97cfaa589a73320188e70d98
data/lib/isodoc/i18n.rb CHANGED
@@ -54,9 +54,10 @@ module IsoDoc
54
54
  @labels[key] = val
55
55
  end
56
56
 
57
- def initialize(lang, script, i18nyaml: nil, i18nhash: nil)
57
+ def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
58
58
  @lang = lang
59
59
  @script = script
60
+ @locale = locale
60
61
  y = load_yaml(lang, script, i18nyaml, i18nhash)
61
62
  @labels = y
62
63
  @labels["language"] = @lang
@@ -66,17 +67,16 @@ module IsoDoc
66
67
  end
67
68
  end
68
69
 
69
- def self.l10n(text, lang = @lang, script = @script)
70
- l10n(text, lang, script)
70
+ def self.l10n(text, lang = @lang, script = @script, locale = @locale)
71
+ l10n(text, lang, script, locale)
71
72
  end
72
73
 
73
- # TODO: move to localization file
74
74
  # function localising spaces and punctuation.
75
75
  # Not clear if period needs to be localised for zh
76
- def l10n(text, lang = @lang, script = @script)
77
- if lang == "zh" && script == "Hans" then l10n_zh(text)
78
- else bidiwrap(text, lang, script)
79
- end
76
+ def l10n(text, lang = @lang, script = @script, locale = @locale)
77
+ lang == "zh" && script == "Hans" and text = l10n_zh(text)
78
+ lang == "fr" && text = l10n_fr(text, locale || "FR")
79
+ bidiwrap(text, lang, script)
80
80
  end
81
81
 
82
82
  def bidiwrap(text, lang, script)
@@ -107,23 +107,51 @@ module IsoDoc
107
107
  xml.to_xml.gsub(/<b>/, "").gsub("</b>", "").gsub(/<\?[^>]+>/, "")
108
108
  end
109
109
 
110
+ def l10n_fr(text, locale)
111
+ xml = Nokogiri::HTML::DocumentFragment.parse(text)
112
+ xml.traverse do |n|
113
+ next unless n.text?
114
+
115
+ n.replace(cleanup_entities(l10n_fr1(n.text, locale), is_xml: false))
116
+ end
117
+ xml.to_xml
118
+ end
119
+
110
120
  ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|"\
111
121
  "\\p{In Halfwidth And Fullwidth Forms}".freeze
112
122
 
113
123
  # note: we can't differentiate comma from enumeration comma 、
114
124
  def l10_zh1(text)
125
+ l10n_zh_remove_space(l10n_zh_punct(text))
126
+ end
127
+
128
+ def l10n_zh_punct(text)
115
129
  ["::", ",,", ".。", "))", "]】", "::", ";;", "??", "!!"].each do |m|
116
130
  text = text.gsub(/(?<=#{ZH_CHAR})#{Regexp.quote m[0]}/, m[1])
131
+ text = text.gsub(/^#{Regexp.quote m[0]}/, m[1])
117
132
  end
118
133
  ["((", "[【"].each do |m|
119
134
  text = text.gsub(/#{Regexp.quote m[0]}(?=#{ZH_CHAR})/, m[1])
120
135
  end
136
+ text
137
+ end
138
+
139
+ def l10n_zh_remove_space(text)
121
140
  text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
122
141
  .gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
123
142
  .gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
124
143
  .gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
125
144
  end
126
145
 
146
+ def l10n_fr1(text, locale)
147
+ text = text.gsub(/(?<=\p{Alnum})([»›;?!])/, "\u202f\\1")
148
+ text = text.gsub(/^([»›;?!])/, "\u202f\\1")
149
+ text = text.gsub(/([«‹])/, "\\1\u202f")
150
+ colonsp = locale == "CH" ? "\u202f" : "\u00a0"
151
+ text = text.gsub(/(?<=\p{Alnum})(:)/, "#{colonsp}\\1")
152
+ text.gsub(/^(:)/, "#{colonsp}\\1")
153
+ end
154
+
127
155
  def boolean_conj(list, conn)
128
156
  case list.size
129
157
  when 0 then ""
@@ -1,5 +1,5 @@
1
1
  module IsoDoc
2
2
  class I18n
3
- VERSION = "1.0.7".freeze
3
+ VERSION = "1.1.0".freeze
4
4
  end
5
5
  end
@@ -38,7 +38,8 @@ RSpec.describe IsoDoc::I18n do
38
38
  end
39
39
 
40
40
  it "loads language hash overrides" do
41
- c = IsoDoc::I18n.new("en", "Latn", i18nhash: YAML.load_file("spec/assets/new.yaml"))
41
+ c = IsoDoc::I18n.new("en", "Latn",
42
+ i18nhash: YAML.load_file("spec/assets/new.yaml"))
42
43
  expect(c.text).to eq "text2"
43
44
  expect(c.at).to eq "at"
44
45
  expect(c.hash.to_s).to be_equivalent_to '{"key1"=>"val1", "key2"=>"val2"}'
@@ -85,6 +86,25 @@ RSpec.describe IsoDoc::I18n do
85
86
  .to be_equivalent_to "&#x61c;Code (hello, world.)&#x61c;"
86
87
  end
87
88
 
89
+ it "does French localisation" do
90
+ e = HTMLEntities.new
91
+ c = IsoDoc::I18n.new("fr", "Latn")
92
+ expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
93
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
94
+ "and&#xa0;: code&#x202f;!"
95
+ expect(e.encode(c.l10n("Code; &#xab;code&#xbb; and: code!"), :hexadecimal))
96
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
97
+ "and&#xa0;: code&#x202f;!"
98
+ c = IsoDoc::I18n.new("fr", "Latn", locale: "FR")
99
+ expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
100
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
101
+ "and&#xa0;: code&#x202f;!"
102
+ c = IsoDoc::I18n.new("fr", "Latn", locale: "CH")
103
+ expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
104
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
105
+ "and&#x202f;: code&#x202f;!"
106
+ end
107
+
88
108
  it "does boolean conjunctions" do
89
109
  c = IsoDoc::I18n.new("en", "Latn")
90
110
  expect(c.boolean_conj([], "and")).to eq ""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isodoc-i18n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-05 00:00:00.000000000 Z
11
+ date: 2022-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities