isodoc-i18n 1.0.7 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce8cb0d922cdc28fe16fc3605db3d205fe66c12d0224282f6597d9510d44ee6d
4
- data.tar.gz: 6fe8fc95e446fdc6b57eb9172d6f4d0665fd2f48ed0c29877819c3d8398805dc
3
+ metadata.gz: 8f00540677aae6f53c0c517b376529ce6eb5094c6cebd1d1a9b31ce20e346266
4
+ data.tar.gz: 69dd44b3aedc560540c5de52f05f0ae11b1332edccb4565817845648f23a9631
5
5
  SHA512:
6
- metadata.gz: 75991361380709948d84f9707bb999e3637f634433ebd83ee3f0b24e783381131326fa824e1897f8900a6cf8c30522a56589887fc93def7458480eadaaa60c21
7
- data.tar.gz: 0e1a2149f1a06190962dda186978f01677d35ef25caf711dd058753741dca79b74c3de98cb04cbe0be4d1ba770b5f649589d7fd0b9f12e6dafb04b58372fa7fe
6
+ metadata.gz: 8b3a53fe9058b2cba6f929938e61147adcb6eaa0782705ca2097cf448f63a76500abbe4b4d3513b9d0cfca04456547d8127e278ef7a521ec3b1cff6de8302f58
7
+ data.tar.gz: 24ff8195e4172769e91fc4f879fe6835b4f24170041e16198ab3a405625f07ed56bce3a702604b5cb25102b4d6f21ce87d5d92889122cd524a4694549ba8d0c8
data/lib/isodoc/i18n.rb CHANGED
@@ -54,9 +54,10 @@ module IsoDoc
54
54
  @labels[key] = val
55
55
  end
56
56
 
57
- def initialize(lang, script, i18nyaml: nil, i18nhash: nil)
57
+ def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
58
58
  @lang = lang
59
59
  @script = script
60
+ @locale = locale
60
61
  y = load_yaml(lang, script, i18nyaml, i18nhash)
61
62
  @labels = y
62
63
  @labels["language"] = @lang
@@ -66,17 +67,16 @@ module IsoDoc
66
67
  end
67
68
  end
68
69
 
69
- def self.l10n(text, lang = @lang, script = @script)
70
- l10n(text, lang, script)
70
+ def self.l10n(text, lang = @lang, script = @script, locale = @locale)
71
+ l10n(text, lang, script, locale)
71
72
  end
72
73
 
73
- # TODO: move to localization file
74
74
  # function localising spaces and punctuation.
75
75
  # Not clear if period needs to be localised for zh
76
- def l10n(text, lang = @lang, script = @script)
77
- if lang == "zh" && script == "Hans" then l10n_zh(text)
78
- else bidiwrap(text, lang, script)
79
- end
76
+ def l10n(text, lang = @lang, script = @script, locale = @locale)
77
+ lang == "zh" && script == "Hans" and text = l10n_zh(text)
78
+ lang == "fr" && text = l10n_fr(text, locale || "FR")
79
+ bidiwrap(text, lang, script)
80
80
  end
81
81
 
82
82
  def bidiwrap(text, lang, script)
@@ -107,23 +107,53 @@ module IsoDoc
107
107
  xml.to_xml.gsub(/<b>/, "").gsub("</b>", "").gsub(/<\?[^>]+>/, "")
108
108
  end
109
109
 
110
+ def l10n_fr(text, locale)
111
+ xml = Nokogiri::HTML::DocumentFragment.parse(text)
112
+ xml.traverse do |n|
113
+ next unless n.text?
114
+
115
+ n.replace(cleanup_entities(l10n_fr1(n.text, locale), is_xml: false))
116
+ end
117
+ xml.to_xml
118
+ end
119
+
110
120
  ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|"\
111
121
  "\\p{In Halfwidth And Fullwidth Forms}".freeze
112
122
 
113
123
  # note: we can't differentiate comma from enumeration comma 、
114
124
  def l10_zh1(text)
125
+ l10n_zh_remove_space(l10n_zh_punct(text))
126
+ end
127
+
128
+ def l10n_zh_punct(text)
115
129
  ["::", ",,", ".。", "))", "]】", "::", ";;", "??", "!!"].each do |m|
116
130
  text = text.gsub(/(?<=#{ZH_CHAR})#{Regexp.quote m[0]}/, m[1])
131
+ text = text.gsub(/^#{Regexp.quote m[0]}/, m[1])
117
132
  end
118
133
  ["((", "[【"].each do |m|
119
134
  text = text.gsub(/#{Regexp.quote m[0]}(?=#{ZH_CHAR})/, m[1])
120
135
  end
136
+ text
137
+ end
138
+
139
+ def l10n_zh_remove_space(text)
121
140
  text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
122
141
  .gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
123
142
  .gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
124
143
  .gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
125
144
  end
126
145
 
146
+ def l10n_fr1(text, locale)
147
+ text = text.gsub(/(?<=\p{Alnum})([»›;?!])(?=\s)/, "\u202f\\1")
148
+ text = text.gsub(/(?<=\p{Alnum})([»›;?!])$/, "\u202f\\1")
149
+ text = text.gsub(/^([»›;?!])/, "\u202f\\1")
150
+ text = text.gsub(/([«‹])/, "\\1\u202f")
151
+ colonsp = locale == "CH" ? "\u202f" : "\u00a0"
152
+ text = text.gsub(/(?<=\p{Alnum})(:)(?=\s)/, "#{colonsp}\\1")
153
+ text = text.gsub(/(?<=\p{Alnum})(:)$/, "#{colonsp}\\1")
154
+ text.gsub(/^(:\s)/, "#{colonsp}\\1")
155
+ end
156
+
127
157
  def boolean_conj(list, conn)
128
158
  case list.size
129
159
  when 0 then ""
@@ -1,5 +1,5 @@
1
1
  module IsoDoc
2
2
  class I18n
3
- VERSION = "1.0.7".freeze
3
+ VERSION = "1.1.1".freeze
4
4
  end
5
5
  end
@@ -38,7 +38,8 @@ RSpec.describe IsoDoc::I18n do
38
38
  end
39
39
 
40
40
  it "loads language hash overrides" do
41
- c = IsoDoc::I18n.new("en", "Latn", i18nhash: YAML.load_file("spec/assets/new.yaml"))
41
+ c = IsoDoc::I18n.new("en", "Latn",
42
+ i18nhash: YAML.load_file("spec/assets/new.yaml"))
42
43
  expect(c.text).to eq "text2"
43
44
  expect(c.at).to eq "at"
44
45
  expect(c.hash.to_s).to be_equivalent_to '{"key1"=>"val1", "key2"=>"val2"}'
@@ -85,6 +86,27 @@ RSpec.describe IsoDoc::I18n do
85
86
  .to be_equivalent_to "&#x61c;Code (hello, world.)&#x61c;"
86
87
  end
87
88
 
89
+ it "does French localisation" do
90
+ e = HTMLEntities.new
91
+ c = IsoDoc::I18n.new("fr", "Latn")
92
+ expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
93
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
94
+ "and&#xa0;: code&#x202f;!"
95
+ expect(e.encode(c.l10n("Code; &#xab;code&#xbb; and: code!"), :hexadecimal))
96
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
97
+ "and&#xa0;: code&#x202f;!"
98
+ c = IsoDoc::I18n.new("fr", "Latn", locale: "FR")
99
+ expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
100
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
101
+ "and&#xa0;: code&#x202f;!"
102
+ c = IsoDoc::I18n.new("fr", "Latn", locale: "CH")
103
+ expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
104
+ .to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
105
+ "and&#x202f;: code&#x202f;!"
106
+ expect(e.encode(c.l10n("http://xyz a;b"), :hexadecimal))
107
+ .to be_equivalent_to "http://xyz a;b"
108
+ end
109
+
88
110
  it "does boolean conjunctions" do
89
111
  c = IsoDoc::I18n.new("en", "Latn")
90
112
  expect(c.boolean_conj([], "and")).to eq ""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isodoc-i18n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-05 00:00:00.000000000 Z
11
+ date: 2022-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities