isodoc-i18n 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7552e0a62364e47c4f8ea7b2810e1ba5d11e39d2a3711792c826412b1aef661f
4
- data.tar.gz: 29d4eef4f2a07bcf6ad1de7ddc229cf7ff9b0ffa8e55c81652d2174165cf2214
3
+ metadata.gz: 490a22f13264a470afa34644c450651239bb43409aa8e579bae72229d165fe38
4
+ data.tar.gz: 9be5c331d23f732e37e2588ff3269e867231f56b6b074417258f5e01fa55aeba
5
5
  SHA512:
6
- metadata.gz: 220fddc821f5f00d4f1eecca1f7583b0236254bb0a5141c50d7583f4c725be2b4aca949ffc4f89ed390c01850f2912d86b9562959773efabf1a3df4838e8ec60
7
- data.tar.gz: 59373ef3a3688bf4f382da1e2a5df55b3b7ae1615385bc04504faf9421e9c5e66333ed1859243f0e8a86d4f42724692ab1cd9b8f9f0bd62d4c4e70cceb7e93cd
6
+ metadata.gz: 024f04dedc8bdef757f52d1ae35b69af155d786bb5e1075b6be8c11e2a2039b0bf9ee4e39a0bbca260e3edee85aafb70360240286a38ce12cb41fef88db70e02
7
+ data.tar.gz: 6952b1cf007e02b5e7fcb9ca3507cda68e22e27facee436fafc41daaa65b6b5eed3b5e0b9fbbde57d8c10361ab55dcc870a25b92a0f624597b02bb3a912c7586
data/isodoc-i18n.gemspec CHANGED
@@ -37,5 +37,6 @@ Gem::Specification.new do |spec|
37
37
  spec.add_development_dependency "simplecov", "~> 0.15"
38
38
  spec.add_development_dependency "timecop", "~> 0.9"
39
39
  spec.add_development_dependency "webmock"
40
+ spec.add_development_dependency "xml-c14n"
40
41
  # spec.metadata["rubygems_mfa_required"] = "true"
41
42
  end
@@ -1,5 +1,5 @@
1
1
  module IsoDoc
2
2
  class I18n
3
- VERSION = "1.2.1".freeze
3
+ VERSION = "1.2.2".freeze
4
4
  end
5
5
  end
data/lib/isodoc/l10n.rb CHANGED
@@ -5,9 +5,8 @@ module IsoDoc
5
5
  end
6
6
 
7
7
  # function localising spaces and punctuation.
8
- # Not clear if period needs to be localised for zh
9
8
  def l10n(text, lang = @lang, script = @script, locale = @locale)
10
- lang == "zh" and text = l10n_zh(text, script)
9
+ %w(zh ja ko).include?(lang) and text = l10n_zh(text, script)
11
10
  lang == "fr" && text = l10n_fr(text, locale || "FR")
12
11
  bidiwrap(text, lang, script)
13
12
  end
@@ -30,11 +29,11 @@ module IsoDoc
30
29
  .default_script(@lang))]
31
30
  end
32
31
 
32
+ # CJK
33
33
  def l10n_zh(text, script = "Hans")
34
34
  xml = Nokogiri::XML::DocumentFragment.parse(text)
35
35
  xml.traverse do |n|
36
- next unless n.text?
37
-
36
+ n.text? or next
38
37
  n.replace(l10_zh1(cleanup_entities(n.text, is_xml: false), script))
39
38
  end
40
39
  xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
@@ -56,19 +55,32 @@ module IsoDoc
56
55
 
57
56
  # note: we can't differentiate comma from enumeration comma 、
58
57
  def l10_zh1(text, _script)
59
- l10n_zh_remove_space(l10n_zh_punct(text))
58
+ l10n_zh_dash(l10n_zh_remove_space(l10n_zh_punct(text)))
60
59
  end
61
60
 
61
+ # CJK punct if (^|CJK).($|CJK)
62
62
  def l10n_zh_punct(text)
63
- ["::", ",,", "..", "))", "]]", "::", ";;", "??", "!!", "–~"].each do |m|
64
- text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
65
- end
66
- ["((", "[["].each do |m|
67
- text = text.gsub(/#{Regexp.quote m[0]}/, m[1])
63
+ ["::", ",,", "..", "))", "]]", ";;", "??", "!!", "((", "[["].each do |m|
64
+ text = text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
65
+ (\s*) # Latin spaces optional
66
+ #{Regexp.quote(m[0])} # Latin punctuation we want to convert to CJK
67
+ (?= \s* # followed (lookahead) by ignorable Latin spaces
68
+ [:,.()\[\];?!-]* # Latin punctuation which we will also convert to CJK
69
+ (#{ZH_CHAR}|$) # CJK character, or end of string
70
+ ) /x, "\\1#{m[1]}")
68
71
  end
69
72
  text
70
73
  end
71
74
 
75
+ def l10n_zh_dash(text)
76
+ text.gsub(/(?<=#{ZH_CHAR}|^) # CJK character, or start of string
77
+ (\d*) # optional digits
78
+ – # en-dash
79
+ (\d*) # optional digits
80
+ (#{ZH_CHAR}|$) # CJK character, or end of string
81
+ /xo, "\\1~\\2\\3")
82
+ end
83
+
72
84
  def l10n_zh_remove_space(text)
73
85
  text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
74
86
  .gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
@@ -102,7 +114,8 @@ module IsoDoc
102
114
 
103
115
  def interleave_space_cjk?(text)
104
116
  text.size == 2 or return
105
- ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) ||
117
+ ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
118
+ "\u22ef\u22ef"].include?(text) ||
106
119
  /\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
107
120
  /^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
108
121
  /[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isodoc-i18n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-19 00:00:00.000000000 Z
11
+ date: 2024-10-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -206,6 +206,20 @@ dependencies:
206
206
  - - ">="
207
207
  - !ruby/object:Gem::Version
208
208
  version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: xml-c14n
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
209
223
  description: 'Internationalisation for Metanorma rendering
210
224
 
211
225
  '
@@ -249,7 +263,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
249
263
  - !ruby/object:Gem::Version
250
264
  version: '0'
251
265
  requirements: []
252
- rubygems_version: 3.3.26
266
+ rubygems_version: 3.3.27
253
267
  signing_key:
254
268
  specification_version: 4
255
269
  summary: isodoc-i18n