metanorma-utils 1.10.2 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f44541a6cf6b2c8a2ddb65412981c68516f37ebacda4bc7c1cec364c46a4530
4
- data.tar.gz: e775ca9ed734be1acdbd7e6275143fe61f43e4403510eb768e552d89cfc084e2
3
+ metadata.gz: fb7eb442187f8f1129b1e5e0a6ae9e05f504522cb55c2a3f75b784fc803717cf
4
+ data.tar.gz: 6819e04fc69ba93d601ce0053905db9ab10cea6d1e4af39e51a6d3b283603c02
5
5
  SHA512:
6
- metadata.gz: b38ccf0cf8e92485d94d6af2389d32f2cc6fa9582167ee96d0a3a2388b8e4336cad2df1f9257bb97430a71ab1d4a030291b05a078e246bdab63d1d7e6061f3e3
7
- data.tar.gz: 43a6945199cdc8f8303e97ebcce4e6c3508e517745247bbf362b7c334ed3e600223e3fd6cd0a8a98e06723d3efe158721f6ff4945c91a5ec3b965247c1a62e79
6
+ metadata.gz: f21d1d2c7431b3de60715d50848d1a9ae6314c0dce3918a4db1d020e81928fff965148c8de4865186ab470e2242f775d35184ea212aa9c96f988a9ee32fc06bf
7
+ data.tar.gz: cc86b4e88d4cb126ac2d7824a996341db7194724ff95242bf17f4d2743a56552bb01486fb122e674901012cfe6a7f361acba60dd9504b98af7b8a8f7bbf3dbb8
data/lib/utils/cjk.rb ADDED
@@ -0,0 +1,110 @@
1
+ module Metanorma
2
+ module Utils
3
+ class << self
4
+ # Basic CJK scripts
5
+ HAN = "\\p{Han}".freeze
6
+ BOPOMOFO = "\\p{Bopomofo}".freeze
7
+ HANGUL = "\\p{Hangul}".freeze
8
+ HIRAGANA = "\\p{Hiragana}".freeze
9
+ KATAKANA = "\\p{Katakana}".freeze
10
+
11
+ # Script extensions - characters shared between scripts
12
+
13
+ # CJK Symbols and Punctuation (U+3000–U+303F)
14
+ # Used across all CJK scripts
15
+ CJK_SYMBOLS = "[\\u3000-\\u303F]".freeze
16
+
17
+ # CJK Punctuation (subset of CJK Symbols commonly used)
18
+ CJK_PUNCTUATION = "[\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F]".freeze
19
+
20
+ # Halfwidth and Fullwidth Forms (U+FF00–U+FFEF)
21
+ # Used in all CJK contexts
22
+ CJK_HALFWIDTH_FULLWIDTH = "[\\uFF00-\\uFFEF]".freeze
23
+
24
+ # CJK Compatibility Forms (U+FE30–U+FE4F)
25
+ # Primarily used with Han but relevant for all CJK
26
+ CJK_COMPAT = "[\\uFE30-\\uFE4F]".freeze
27
+
28
+ # Vertical Forms (U+FE10–U+FE1F)
29
+ # Used in vertical text layout for all CJK
30
+ CJK_VERTICAL = "[\\uFE10-\\uFE1F]".freeze
31
+
32
+ # Small Form Variants (U+FE50–U+FE6F)
33
+ # Used in all CJK contexts
34
+ CJK_SMALL_FORMS = "[\\uFE50-\\uFE6F]".freeze
35
+
36
+ # Ideographic Description Characters (U+2FF0–U+2FFF)
37
+ # Used with Han script
38
+ HAN_IDC = "[\\u2FF0-\\u2FFF]".freeze
39
+
40
+ # Kanbun (U+3190–U+319F)
41
+ # Used with Han script for Japanese
42
+ KANBUN = "[\\u3190-\\u319F]".freeze
43
+
44
+ # CJK Compatibility (U+3300–U+33FF)
45
+ # Used with Han script
46
+ CJK_COMPAT_IDEOGRAPHS = "[\\u3300-\\u33FF]".freeze
47
+
48
+ # CJK Compatibility Ideographs (U+F900–U+FAFF)
49
+ HAN_COMPAT_IDEOGRAPHS = "[\\uF900-\\uFAFF]".freeze
50
+
51
+ # Script extensions by primary script
52
+ HAN_EXTENSIONS = [
53
+ HAN,
54
+ CJK_SYMBOLS,
55
+ CJK_PUNCTUATION,
56
+ CJK_HALFWIDTH_FULLWIDTH,
57
+ CJK_COMPAT,
58
+ CJK_VERTICAL,
59
+ CJK_SMALL_FORMS,
60
+ HAN_IDC,
61
+ KANBUN,
62
+ CJK_COMPAT_IDEOGRAPHS,
63
+ HAN_COMPAT_IDEOGRAPHS
64
+ ].join("|").freeze
65
+
66
+ HANGUL_EXTENSIONS = [
67
+ HANGUL,
68
+ CJK_SYMBOLS,
69
+ CJK_PUNCTUATION,
70
+ CJK_HALFWIDTH_FULLWIDTH,
71
+ CJK_VERTICAL,
72
+ CJK_SMALL_FORMS
73
+ ].join("|").freeze
74
+
75
+ HIRAGANA_EXTENSIONS = [
76
+ HIRAGANA,
77
+ CJK_SYMBOLS,
78
+ CJK_PUNCTUATION,
79
+ CJK_HALFWIDTH_FULLWIDTH,
80
+ CJK_VERTICAL,
81
+ CJK_SMALL_FORMS
82
+ ].join("|").freeze
83
+
84
+ KATAKANA_EXTENSIONS = [
85
+ KATAKANA,
86
+ CJK_SYMBOLS,
87
+ CJK_PUNCTUATION,
88
+ CJK_HALFWIDTH_FULLWIDTH,
89
+ CJK_VERTICAL,
90
+ CJK_SMALL_FORMS
91
+ ].join("|").freeze
92
+
93
+ BOPOMOFO_EXTENSIONS = [
94
+ BOPOMOFO,
95
+ CJK_SYMBOLS,
96
+ CJK_PUNCTUATION,
97
+ CJK_HALFWIDTH_FULLWIDTH
98
+ ].join("|").freeze
99
+
100
+ # Combined CJK pattern including all script extensions
101
+ CJK = [
102
+ HAN_EXTENSIONS,
103
+ HANGUL_EXTENSIONS,
104
+ HIRAGANA_EXTENSIONS,
105
+ KATAKANA_EXTENSIONS,
106
+ BOPOMOFO_EXTENSIONS
107
+ ].join("|").freeze
108
+ end
109
+ end
110
+ end
data/lib/utils/log.rb CHANGED
@@ -43,6 +43,14 @@ module Metanorma
43
43
  end
44
44
  end
45
45
 
46
+ def messages
47
+ @log.values.each_with_object([]) do |v, m|
48
+ v.each do |e|
49
+ m << e
50
+ end
51
+ end
52
+ end
53
+
46
54
  def suppress_log?(category, severity, msg)
47
55
  category == "Relaton" && /^Fetching /.match?(msg) ||
48
56
  @suppress_log[:severity] <= severity ||
@@ -78,7 +86,7 @@ module Metanorma
78
86
  while !node.nil? && node["id"].nil? && node.respond_to?(:parent)
79
87
  node = node.parent
80
88
  end
81
- node.respond_to?(:parent) ? "ID #{node['id']}" : ""
89
+ node.respond_to?(:parent) ? "ID #{node['anchor'] || node['id']}" : ""
82
90
  elsif node.is_a? String then node
83
91
  elsif node.respond_to?(:lineno) && !node.lineno.nil? &&
84
92
  !node.lineno.empty?
@@ -216,7 +224,7 @@ module Metanorma
216
224
  /^ID /.match?(loc) or return [loc, nil]
217
225
  loc.sub!(/^ID /, "")
218
226
  loc = @mapid[loc] while @mapid[loc]
219
- url = "#{@htmlfilename}##{loc}"
227
+ url = "#{@htmlfilename}##{to_ncname loc}"
220
228
  [loc, url]
221
229
  end
222
230
 
data/lib/utils/main.rb CHANGED
@@ -5,6 +5,7 @@ require "htmlentities"
5
5
  require "nokogiri"
6
6
  require "csv"
7
7
  require_relative "../sterile/sterile"
8
+ require_relative "cjk"
8
9
 
9
10
  module Metanorma
10
11
  module Utils
@@ -35,9 +36,6 @@ module Metanorma
35
36
  docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
36
37
  end
37
38
 
38
- CJK = "\\p{Han}|\\p{Bopomofo}|\\p{Hangul}|\\p{Hiragana}|\\p{Katakana}"
39
- .freeze
40
-
41
39
  # TODO needs internationalisation of quote
42
40
  def smartformat(text)
43
41
  ret = HTMLEntities.new.decode(
@@ -143,7 +141,7 @@ module Metanorma
143
141
  nopunct = LONGSTR_NOPUNCT)
144
142
  /^\s*$/.match?(text) and return text
145
143
  text.split(/(?=(?:\s|-))/).map do |w|
146
- if /^\s*$/.match(text) || (w.size < threshold) then w
144
+ if /^\s*$/.match(w) || (w.size < threshold) then w
147
145
  else
148
146
  w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
149
147
  w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
data/lib/utils/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module Metanorma
2
2
  module Utils
3
- VERSION = "1.10.2".freeze
3
+ VERSION = "1.11.2".freeze
4
4
  end
5
5
  end
data/lib/utils/xml.rb CHANGED
@@ -69,7 +69,7 @@ module Metanorma
69
69
  cjk2 = /#{CJK}/o.match?(nextfirst)
70
70
  text1 = /[^\p{Z}\p{C}]/.match?(last)
71
71
  text2 = /[^\p{Z}\p{C}]/.match?(nextfirst)
72
- (cjk1 && (cjk2 || !text2)) and next
72
+ cjk1 && (cjk2 || !text2) and next
73
73
  !text1 && cjk2 and next
74
74
  ret[i] += " "
75
75
  end
@@ -125,10 +125,18 @@ module Metanorma
125
125
  end
126
126
 
127
127
  # all element/attribute pairs that are ID anchors in Metanorma
128
- def anchor_attributes
129
- [%w[* id], %w[* bibitemid], %w[review from],
130
- %w[review to], %w[index to], %w[xref target],
131
- %w[callout target], %w[location target]]
128
+ def anchor_attributes(presxml: false)
129
+ ret = [%w(review from), %w(review to), %w(callout target), %w(xref to),
130
+ %w(eref bibitemid), %w(citation bibitemid), %w(xref target),
131
+ %w(label for), %w(location target), %w(index to),
132
+ %w(termsource bibitemid), %w(admonition target)]
133
+ ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
134
+ %w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
135
+ %w(fmt-xref-label container), %w(fmt-fn-body target),
136
+ %w(fmt-review-start source), %w(fmt-review-start end),
137
+ %w(fmt-review-start target), %w(fmt-review-end source),
138
+ %w(fmt-review-end start), %w(fmt-review-end target)]
139
+ presxml ? ret + ret1 : ret
132
140
  end
133
141
 
134
142
  # convert definition list term/value pair into Nokogiri XML attribute
@@ -41,7 +41,8 @@ Gem::Specification.new do |spec|
41
41
  spec.add_development_dependency "guard-rspec", "~> 4.7"
42
42
  spec.add_development_dependency "rake", "~> 13.0"
43
43
  spec.add_development_dependency "rspec", "~> 3.6"
44
- spec.add_development_dependency "rubocop", "~> 1.5.2"
44
+ spec.add_development_dependency "rubocop", "~> 1"
45
+ spec.add_development_dependency "rubocop-performance"
45
46
  spec.add_development_dependency "simplecov", "~> 0.15"
46
47
  spec.add_development_dependency "timecop", "~> 0.9"
47
48
  spec.add_development_dependency "vcr", "~> 6.1.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.2
4
+ version: 1.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-17 00:00:00.000000000 Z
11
+ date: 2025-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -212,14 +212,28 @@ dependencies:
212
212
  requirements:
213
213
  - - "~>"
214
214
  - !ruby/object:Gem::Version
215
- version: 1.5.2
215
+ version: '1'
216
216
  type: :development
217
217
  prerelease: false
218
218
  version_requirements: !ruby/object:Gem::Requirement
219
219
  requirements:
220
220
  - - "~>"
221
221
  - !ruby/object:Gem::Version
222
- version: 1.5.2
222
+ version: '1'
223
+ - !ruby/object:Gem::Dependency
224
+ name: rubocop-performance
225
+ requirement: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :development
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: '0'
223
237
  - !ruby/object:Gem::Dependency
224
238
  name: simplecov
225
239
  requirement: !ruby/object:Gem::Requirement
@@ -307,6 +321,7 @@ files:
307
321
  - README.adoc
308
322
  - lib/metanorma-utils.rb
309
323
  - lib/sterile/sterile.rb
324
+ - lib/utils/cjk.rb
310
325
  - lib/utils/hash_transform_keys.rb
311
326
  - lib/utils/image.rb
312
327
  - lib/utils/linestatus.rb
@@ -320,7 +335,7 @@ homepage: https://github.com/metanorma/metanorma-utils
320
335
  licenses:
321
336
  - BSD-2-Clause
322
337
  metadata: {}
323
- post_install_message:
338
+ post_install_message:
324
339
  rdoc_options: []
325
340
  require_paths:
326
341
  - lib
@@ -335,8 +350,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
335
350
  - !ruby/object:Gem::Version
336
351
  version: '0'
337
352
  requirements: []
338
- rubygems_version: 3.3.27
339
- signing_key:
353
+ rubygems_version: 3.5.22
354
+ signing_key:
340
355
  specification_version: 4
341
356
  summary: metanorma-utils
342
357
  test_files: []