isodoc-i18n 1.2.1 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7552e0a62364e47c4f8ea7b2810e1ba5d11e39d2a3711792c826412b1aef661f
4
- data.tar.gz: 29d4eef4f2a07bcf6ad1de7ddc229cf7ff9b0ffa8e55c81652d2174165cf2214
3
+ metadata.gz: 490a22f13264a470afa34644c450651239bb43409aa8e579bae72229d165fe38
4
+ data.tar.gz: 9be5c331d23f732e37e2588ff3269e867231f56b6b074417258f5e01fa55aeba
5
5
  SHA512:
6
- metadata.gz: 220fddc821f5f00d4f1eecca1f7583b0236254bb0a5141c50d7583f4c725be2b4aca949ffc4f89ed390c01850f2912d86b9562959773efabf1a3df4838e8ec60
7
- data.tar.gz: 59373ef3a3688bf4f382da1e2a5df55b3b7ae1615385bc04504faf9421e9c5e66333ed1859243f0e8a86d4f42724692ab1cd9b8f9f0bd62d4c4e70cceb7e93cd
6
+ metadata.gz: 024f04dedc8bdef757f52d1ae35b69af155d786bb5e1075b6be8c11e2a2039b0bf9ee4e39a0bbca260e3edee85aafb70360240286a38ce12cb41fef88db70e02
7
+ data.tar.gz: 6952b1cf007e02b5e7fcb9ca3507cda68e22e27facee436fafc41daaa65b6b5eed3b5e0b9fbbde57d8c10361ab55dcc870a25b92a0f624597b02bb3a912c7586
data/isodoc-i18n.gemspec CHANGED
@@ -37,5 +37,6 @@ Gem::Specification.new do |spec|
37
37
  spec.add_development_dependency "simplecov", "~> 0.15"
38
38
  spec.add_development_dependency "timecop", "~> 0.9"
39
39
  spec.add_development_dependency "webmock"
40
+ spec.add_development_dependency "xml-c14n"
40
41
  # spec.metadata["rubygems_mfa_required"] = "true"
41
42
  end
@@ -1,5 +1,5 @@
1
1
  module IsoDoc
2
2
  class I18n
3
- VERSION = "1.2.1".freeze
3
+ VERSION = "1.2.2".freeze
4
4
  end
5
5
  end
data/lib/isodoc/l10n.rb CHANGED
@@ -5,9 +5,8 @@ module IsoDoc
5
5
  end
6
6
 
7
7
  # function localising spaces and punctuation.
8
- # Not clear if period needs to be localised for zh
9
8
  def l10n(text, lang = @lang, script = @script, locale = @locale)
10
- lang == "zh" and text = l10n_zh(text, script)
9
+ %w(zh ja ko).include?(lang) and text = l10n_zh(text, script)
11
10
  lang == "fr" && text = l10n_fr(text, locale || "FR")
12
11
  bidiwrap(text, lang, script)
13
12
  end
@@ -30,11 +29,11 @@ module IsoDoc
30
29
  .default_script(@lang))]
31
30
  end
32
31
 
32
+ # CJK
33
33
  def l10n_zh(text, script = "Hans")
34
34
  xml = Nokogiri::XML::DocumentFragment.parse(text)
35
35
  xml.traverse do |n|
36
- next unless n.text?
37
-
36
+ n.text? or next
38
37
  n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
39
38
  end
40
39
  xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
@@ -56,19 +55,32 @@ module IsoDoc
56
55
 
57
56
  # note: we can't differentiate comma from enumeration comma 、
58
57
  def l10_zh1(text, _script)
59
- l10n_zh_remove_space(l10n_zh_punct(text))
58
+ l10n_zh_dash(l10n_zh_remove_space(l10n_zh_punct(text)))
60
59
  end
61
60
 
61
+ # CJK punct if (^|CJK).($|CJK)
62
62
  def l10n_zh_punct(text)
63
- ["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
64
- text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
65
- end
66
- ["((", "[["].each do |m|
67
- text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
63
+ ["::", ",,", "..", "))", "]]", ";;", "??", "!!", "((", "[["].each do |m|
64
+ text = text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
65
+ (\s*) # Latin spaces optional
66
+ #{Regexp.quote(m[0])} # Latin punctuation we want to convert to CJK
67
+ (?= \s* # followed (lookahead) by ignorable Latin spaces
68
+ [:,.()\[\];?!-]* # Latin punctuation which we will also convert to CJK
69
+ (#{ZH_CHAR}|$) # CJK character, or end of string
70
+ ) /x, "\\1#{m[1]}")
68
71
  end
69
72
  text
70
73
  end
71
74
 
75
+ def l10n_zh_dash(text)
76
+ text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
77
+ (\d*) # optional digits
78
+ – # en-dash
79
+ (\d*) # optional digits
80
+ (#{ZH_CHAR}|$) # CJK character, or end of string
81
+ /xo, "\\1~\\2\\3")
82
+ end
83
+
72
84
  def l10n_zh_remove_space(text)
73
85
  text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
74
86
  .gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
@@ -102,7 +114,8 @@ module IsoDoc
102
114
 
103
115
  def interleave_space_cjk?(text)
104
116
  text.size == 2 or return
105
- ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
117
+ ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
118
+ "\u22ef\u22ef"].include?(text) ||
106
119
  /\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
107
120
  /^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
108
121
  /[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isodoc-i18n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-19 00:00:00.000000000 Z
11
+ date: 2024-10-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -206,6 +206,20 @@ dependencies:
206
206
  - - ">="
207
207
  - !ruby/object:Gem::Version
208
208
  version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: xml-c14n
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
209
223
  description: 'Internationalisation for Metanorma rendering
210
224
 
211
225
  '
@@ -249,7 +263,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
249
263
  - !ruby/object:Gem::Version
250
264
  version: '0'
251
265
  requirements: []
252
- rubygems_version: 3.3.26
266
+ rubygems_version: 3.3.27
253
267
  signing_key:
254
268
  specification_version: 4
255
269
  summary: isodoc-i18n